<resource schema="glots">
<!-- run
  CREATE EXTENSION pg_trgm;
before importing this for the first time -->

	<meta name="description" format="rst">
		The global TAP schema collects information on
		tables and columns from known TAP servers.  This facilitates locating
		queriable data by physics (via UCD) or keywords (via description).

		Note that this shouldn't really be necessary as all information
		present here should be exposed through Registry records.  However,
		in reality data providers currently are much more liable to give
		column metadata in their tap_schema than in their Registry records.
		Hence, for the time being, we maintain this service by harvesting
		tap_schemas about monthly.
	</meta>
	<meta name="schema-rank">500</meta>
	<meta name="creationDate">2010-07-26T13:39:00</meta>
	<meta name="subject">virtual-observatories</meta>
	<meta name="subject">astronomy-databases</meta>

	<meta name="title">GloTS, the Global TAP Schema</meta>

	<meta name="_intro" format="rst">
		This service lets you find columns within VO
		\RSTservicelink{/__system__/tap/run/info}{TAP} services by
		\RSTservicelink{/ucds/ui/ui/form}{UCD} or
		description words.
		
		For UCD, you will frequently want to use regular expression matches (e.g., ``~
		pos.eq*``).  Note that, unfortunately, some services still use
		old-style UCDs (see `Moving to UCD1+`_).  Thus, to find all interesting
		columns, you may need to search multiple times.

		The table can also be queried using TAP or on our
		\RSTservicelink{/adql}{ad-hoc ADQL service}.

		.. _Moving to UCD1+: http://www.ivoa.net/Documents/WD/UCD/UCD-20040521.html
	</meta>

	<execute title="Update TAP service list" at="3:30,15:30">
		<job>
			<code>
				execDef.spawnPython("bin/retrieveTAPServices.py")
				execDef.spawnPython("bin/ingest.py")
			</code>
		</job>
	</execute>

	<table id="services" onDisk="True" adql="True"
			primary="ivoid">

		<property key="supportsModel">GloTS 1.0</property>
		<property key="supportsModelURI"
				>ivo://org.gavo.dc/std/glots#tables-1.0</property>

		<meta name="description">A table of TAP services harvested from the
			registry (and some spoon-fed).</meta>
		<column name="ivoid" type="text" tablehead="id" verbLevel="5"
			description="IVOA identifier of providing service"/>
		<column name="accessurl" type="text" tablehead="Srv. URL"
			verbLevel="15"
			description="Base URL of TAP endpoint"/>
		<column name="nextharvest" type="timestamp" verbLevel="30"
			description="Next scheduled harvest of data"
			xtype="timestamp"/>
		<column name="harvestinterval" unit="d" type="integer" verbLevel="30"
			description="Approximate interval of harvest"><values
			nullLiteral="-1"/></column>
		<column name="lastsuccess" type="timestamp" verbLevel="30"
			description="Last successful harvest of this service"
			xtype="timestamp"/>
	</table>

	<table id="tables" onDisk="True" adql="True"
			primary="ivoid,table_name">
		<meta name="description">A table of tables accesible through the TAP
			services known to glots.services.</meta>

		<foreignKey dest="ivoid" source="ivoid" inTable="services"/>

		<index columns="table_name" method="gin"
			>lower(table_name) gin_trgm_ops</index>
		<index columns="table_desc" method="gin"
			>to_tsvector('english', table_desc)</index>

		<column original="services.ivoid"/>
		<column name="table_name" type="text" tablehead="Table name"
			verbLevel="5" description="Fully qualified table name"/>
		<column name="table_desc" type="unicode" tablehead="Table description"
			description="Brief description of the table" verbLevel="15"/>
		<column name="utype" type="text"
			description="utype if the table corresponds to a data model"/>
	</table>

	<table id="columns" onDisk="True" adql="True"
			primary="ivoid,table_name,column_name"
			dupePolicy="drop">
		<meta name="description">A table of columns within the tables listed
			in glots.tables.</meta>
		<foreignKey dest="ivoid,table_name" source="ivoid,table_name"
			inTable="tables"/>

		<index columns="table_name" method="gin"
			>lower(table_name) gin_trgm_ops</index>
		<index columns="column_name" method="gin"
			>lower(column_name) gin_trgm_ops</index>
		<index columns="ucd"/>
		<index columns="description" method="gin"
			>to_tsvector('english', description)</index>

		<column original="services.ivoid"/>
		<column original="tables.table_name"/>
		<column name="column_name" type="text" tablehead="Column"
			description="Column name" verbLevel="1"/>
		<column name="description" type="text" tablehead="Description"
			description="Brief description of column" verbLevel="5"/>
		<column name="unit" type="text" tablehead="Unit"
			description="Unit in VO standard format" verbLevel="15"/>
		<column name="ucd" type="text" tablehead="UCD" verbLevel="5"
			description="UCD of column (some services still have 1.0 UCDs)."/>
		<column name="utype" type="text"
			description="Utype of column"/>
		<column name="datatype" type="text"
			description="ADQL datatype"/>
		<column name="quoted/size" type="integer"
			description="Length of variable length datatypes">
			<values nullLiteral="-1"/>
		</column>
		<column name="principal" type="text"
			description="Is column principal?">
			<values nullLiteral="-1"/>
		</column>
		<column name="indexed" type="integer"
			tablehead="Index?"
			description="Is there an index on this column?" verbLevel="15">
			<values nullLiteral="-1"/>
		</column>
		<column name="std" type="text"
			description="Is this a standard column?"/>
	</table>

	<table id="allcols" onDisk="True">
		<column original="columns.column_name"/>
		<column original="columns.description"/>
		<column original="columns.ucd"/>
		<column original="columns.unit"/>
		<column original="columns.indexed"/>
		<column original="tables.table_name"/>
		<column original="tables.table_desc"/>
		<column original="services.ivoid"/>
		<column original="services.accessurl"/>
		<viewStatement>
			CREATE VIEW glots.allcols AS (
				SELECT column_name, columns.description, ucd, unit, indexed,
					table_name, table_desc,
					services.ivoid,
					services.accessurl
				FROM (
					glots.tables JOIN glots.columns USING (ivoid, table_name))
				JOIN glots.services USING (ivoid))
		</viewStatement>
	</table>

	<data id="createTables" auto="False" dependents="createView">
		<make table="services"/>
		<make table="tables"/>
		<make table="columns"/>
		<publish/>
	</data>

	<data id="createView" auto="False">
		<make table="allcols"/>
	</data>

	<!-- the following data items are for bin/ingest.py exclusively -->
	<data id="ingestService" auto="False" updating="True">
		<dictlistGrammar/>
		<make table="services"/>
	</data>

	<data id="ingestTables" updating="True">
		<dictlistGrammar/>
		<make table="tables"/>
	</data>

	<data id="ingestColumns" updating="True">
		<dictlistGrammar/>
		<make table="columns">
			<rowmaker idmaps="*" id="colmaker">
				<map dest="quoted/size" nullExcs="ValueError,TypeError">int(@size)</map>
			</rowmaker>
		</make>
	</data>

	<dbCore id="plaincore" queriedTable="allcols">
		<condDesc buildFrom="ucd"/>
		<condDesc>
			<inputKey name="tablewords"
				description="Terms in table description (case does not matter, patterns not allowed)"
				type="text" tablehead="Table words"/>
			<phraseMaker>
				<code>
					yield "to_tsvector(table_desc) @@ plainto_tsquery(%%(%s)s)"%(
						base.getSQLKey("tablewords", inPars["tablewords"], outPars))
				</code>
			</phraseMaker>
		</condDesc>
		<condDesc>
			<inputKey name="columnwords"
				description="Terms in column description (case does not
					matter, patterns not allowed)"
				type="text" tablehead="Column words"/>
			<phraseMaker>
				<code>
					yield "to_tsvector(description) @@ plainto_tsquery(%%(%s)s)"%(
						base.getSQLKey("columnwords", inPars["columnwords"], outPars))
				</code>
			</phraseMaker>
		</condDesc>

		<outputTable>
			<outputField original="column_name"/>
			<outputField original="description"/>
			<outputField original="ucd"/>
			<outputField original="unit"/>
			<outputField original="indexed"/>
			<outputField original="table_desc"/>
			<outputField original="table_name" select="array[table_name,ivoid]">
				<formatter>
					tableName, ivoId = data
					return T.a(href="/glots/q/showcols/form?__nevow_form__=genForm"
						"&amp;table_name=%s&amp;ivoid=%s"%(
							urllib.parse.quote(tableName), urllib.parse.quote(ivoId)))[tableName]
				</formatter>
			</outputField>
			<outputField original="ivoid">
				<formatter>
					return T.a(href="/glots/q/showtables/qp/%s"%urllib.parse.quote(data))[
						data[6:]]
				</formatter>
			</outputField>
			<outputField original="accessurl"/>
		</outputTable>
	</dbCore>

	<service id="plain" core="plaincore">
		<meta name="shortName">GloTS plain</meta>
		<meta name="title">Global TAP Schema Simple Search</meta>
		<meta name="_related" title="TAP servers">
			\internallink{glots/q/servers/form}</meta>
		<publish render="form" sets="local,ivo_managed"/>
	</service>

	<dbCore id="serverscore" queriedTable="services">
		<condDesc buildFrom="ivoid"/>
		<condDesc buildFrom="nextharvest"/>
		<outputTable>
			<outputField original="ivoid">
				<formatter>
					return T.a(href="/glots/q/showtables/qp/%s"%urllib.parse.quote(data))[
						data[6:]]
				</formatter>
			</outputField>
			<outputField original="accessurl"/>
			<outputField original="nextharvest"/>
			<outputField original="harvestinterval"/>
		</outputTable>
	</dbCore>

	<service id="servers" core="serverscore">
		<meta name="shortName">GloTS servers list</meta>
		<meta name="title">Global TAP Servers List</meta>
		<meta name="description">A list of TAP servers in the VO, mostly
			taken from the registry.</meta>
		<meta name="_related" title="Column search">
			\internallink{glots/q/plain/form}</meta>
		<meta name="_intro">Just sending an empty form will give you
		a list of all TAP servers we know of.  Note that some of them actually
		are not TAP servers at all or have some functional deficiencies.  Trying
		to follow their table links will yield 404 ("Not found") errors.
		</meta>
	</service>

	<dbCore id="showtablescore" queriedTable="tables" limit="5000">
		<condDesc buildFrom="ivoid"/>
		<outputTable autoCols="table_name,table_desc"/>
	</dbCore>

	<service id="showtables" core="showtablescore" allowed="qp,form">
		<meta name="shortName">GloTS tables</meta>
		<meta name="title">GloTS tables for service</meta>
		<meta name="description">A list of tables for a TAP service specified
			in the URL by its ivo-id.
		</meta>
		<template key="response">res/tablestemplate.html</template>
		<template key="resulttable">res/tablestemplate.html</template>
		<template key="resultline">res/tablestemplate.html</template>
		<property name="queryField">ivoid</property>

		<customRF name="tableshead">
			# wants n:data="result"
			ctx.tag.clear()
			if data:
				return ctx.tag["GloTS tables for service(s) '%s'"%(
					self.queryMeta["inputTable"].getParam("ivoid"))]
			else:
				return ctx.tag["GloTS tables for service X"]
		</customRF>

		<outputTable namePath="tables">
			<outputField original="table_name" select="array[table_name,ivoid]"
					type="text" name="tableLink" displayHint="noxml=true">
				<formatter>
					tableName, ivoId = data
					return T.a(href="/glots/q/showcols/form?__nevow_form__=genForm"
						"&amp;table_name=%s&amp;ivoid=%s"%(
							urllib.parse.quote(tableName), urllib.parse.quote(ivoId)))[tableName]
				</formatter>
			</outputField>
			<outputField original="table_desc"/>
		</outputTable>

	</service>

	<dbCore id="showcolumnscore" queriedTable="columns" limit="5000"
			sortKey="table_name, column_name">
		<condDesc buildFrom="ivoid"/>
		<condDesc buildFrom="table_name"/>

		<outputTable namePath="columns">
			<outputField original="table_name"/>
			<outputField original="column_name"/>
			<outputField original="description"/>
			<outputField original="ucd"/>
			<outputField original="datatype"/>
			<outputField original="size"/>
			<outputField original="indexed"/>
			<outputField original="unit"/>
			<outputField original="utype"/>
		</outputTable>
	</dbCore>

	<service id="showcols" core="showcolumnscore">
		<meta name="shortName">GloTS tabcol</meta>
		<meta name="title">GloTS columns for table</meta>
		<meta name="description">Columns within TAP-accessible tables.</meta>
		<template key="response">res/columnstemplate.html</template>

		<customRF name="coltitle">
			# wants n:data="result"
			ctx.tag.clear()
			return ctx.tag["GloTS columns for table %s"%(
				self.queryMeta["formal_data"]["table_name"])]
		</customRF>
		
		<!-- this should really be done in the template,
			but I'd need a n:data="querydict" for that, and I can't be bothered
			to add it now -->
		<customRF name="infotext">
			# wants n:data=result
			fd = self.queryMeta["formal_data"]
			return ctx.tag["Table %s on service %s contains the following columns:"%(
				fd["table_name"], fd["ivoid"])]
		</customRF>

	</service>

	<regSuite>
		<regTest title="GloTS ad-hoc service returns sensible data.">
			<url parSet="form" ivoId="ivo://org.gavo.dc/tap"
				table_name="ppmxl.main" _FORMAT="VOTable">showcols/form</url>
			<code>
				row = self.getFirstVOTableRow(rejectExtras=False)
				self.assertEqual(row["description"], "B mag from USNO-B, first epoch")
				self.assertEqual(row["datatype"], "float")
				self.assertEqual(row["table_name"], "ppmxl.main")
			</code>
		</regTest>

		<regTest title="GloTS accessible through TAP">
			<url QUERY="select * from glots.tables natural join glots.columns where
				1=ivo_hasword(table_desc, 'FK6 union') and ucd='pos.parallax'"
				LANG="ADQL" REQUEST="doQuery">/tap/sync</url>
			<code>
				row = self.getFirstVOTableRow()
				self.assertEqual(row["description"], "'Resulting' parallax p_res"
					" of the star. This is either the HIPPARCOS parallax or a"
					" photometric/spectroscopic parallax.")
				self.assertEqual(row["table_desc"], "The union of all published parts"
					" of FK6, comprising only the common\\nfields.")
			</code>
		</regTest>

		<regTest title="GloTS shows tables per service">
			<url parSet="form" ivoid="~ *gavo*">showtables/form</url>
			<code><![CDATA[
				self.assertHasStrings("GloTS tables for service(s) '~ *gavo*'",
					'<a href="/glots/q/showcols/form?__nevow_form__=genForm&amp;'
					'table_name=rr.res_role&amp;ivoid=ivo%3A//aip.gavo.org/tap"'
					'>rr.res_role')
			]]></code>
		</regTest>
	</regSuite>
</resource>
