"""
Query TAP servers for various metadata and ingest the info into local
tables.
"""

import argparse
import datetime
import gzip
import io
import os
import socket
import sys
import urllib.request, urllib.parse, urllib.error
import urllib.parse

from gavo import api
from gavo import base
from gavo import utils
from gavo import votable
from gavo.formats import texttable
from gavo.user import logui
from gavo.user import plainui
from gavo.votable import simple
from gavo.votable import tapquery
from gavo.votable import V


RDID = "glots/q"
RD = api.getRD(RDID)


USER_AGENT = f"GloTS-harvester (IVOA-copy) DaCHS/{api.__version__}"
tapquery.USER_AGENT = USER_AGENT
try:
	api.setUserAgent(USER_AGENT)
except AttributeError:
	# compatibility for DaCHS <2.3.2
	pass


def _queryServer(service, query, useResponse=None):
	if useResponse is None:
		try:
			query = votable.ADQLSyncJob(service["accessurl"], query,
				userParams={"MAXREC": "4000000"})
		except tapquery.WrongStatus as ex:
			sys.stderr.write("Failed to create job on %s: %s"%(
				service["ivoid"], ex.payload))
			raise

		query.run()
		data = query.openResult(simple=False).read()
	
	else:
		data = open(useResponse).read()
	
	# ESAVO TAP returned gzip unsolicitedly 9/2016
	if data.startswith(b"\x1f\x8b"):
		data = gzip.GzipFile(fileobj=io.BytesIO(data)).read()

#	with open("lastvotable.xml", "w") as f:
#		f.write(data)

	for element in votable.parse(io.BytesIO(data)):
		metadata = simple.TableMetadata(element.tableDefinition, None)
		break
	names = [field.name.lower() for field in metadata]
	for tuple in element:
		yield dict(zip(names, tuple))


class VOSITablesParser(utils.StartEndHandler):
	def __init__(self, serviceRec):
		self.serviceRec = serviceRec
		self.parsingColumn = False
		utils.StartEndHandler.__init__(self)

	def _initialize(self):
		self.tables = []
		self.columns = []

	def _start_table(self, name, attrs):
		self.tables.append({"ivoid": self.serviceRec["ivoid"]})

	def _start_column(self, name, attrs):
		self.parsingColumn = True
		self.columns.append({"ivoid": self.serviceRec["ivoid"],
			"table_name": self.tables[-1]["table_name"],
			"indexed": 0})

	def _end_column(self, name, attrs, content):
		self.parsingColumn = False

	def _end_name(self, name, attrs, content):
		if self.getParentTag()=='table':
			self.tables[-1]["table_name"] = _makeTextFrom(content)
		elif self.getParentTag()=='column':
			self.columns[-1]["column_name"] = _makeTextFrom(content)
		# else it's probably a schema and we don't want to know

	def _end_description(self, name, attrs, content):
		if self.getParentTag()=='schema':
			# we don't do schemas
			pass
		elif self.getParentTag()=='table':
			self.tables[-1]["table_desc"] = _makeTextFrom(content)
		else:
			self.columns[-1]["description"] = _makeTextFrom(content)

	def end_column_attribute(self, name, attrs, content):
		if not self.parsingColumn:
			return
		self.columns[-1][name.lower()] = _makeTextFrom(content)
	
	_end_ucd = _end_dataType = _end_unit = _end_utype = _end_size = \
		end_column_attribute


def _makeTextFrom(data):
	if data is None:
		return None
	if isinstance(data, bytes):
		try:
			data = data.decode("utf-8")
		except UnicodeDecodeError:
			data = data.decode("iso-8859-1")

	return data.strip()


def getMetaFromVOSI(service):
	"""returns a faked TAP_SCHEMA response cleaned from service's VOSI endpoints.
	"""
	# DSA once used to require this:
#		tablesURL = service["accessurl"].replace("/TAP", "/VOSI/tables")
	tablesURL = service["accessurl"].rstrip("/")+"/tables"
	inF = api.urlopenRemote(tablesURL)
	handler = VOSITablesParser(service)
	handler.parse(inF)
	return handler.tables, handler.columns


class NamedIterator(object):
	def __init__(self, name, ivoId, iterator):
		self.name = name
		self.iterator = iterator
		self.ivoId = ivoId

	def __repr__(self):
		return "%s on %s"%(self.name, self.ivoId)

	def __next__(self):
		res = next(self.iterator)
		res["ivoid"] = self.ivoId
		return res
	
	def __iter__(self):
		return self


class TableIterator(NamedIterator):
	def __next__(self):
		res = NamedIterator.__next__(self)
		res["table_desc"] = res["description"]
		return res


class ColumnIterator(NamedIterator):
	def __next__(self):
		res = NamedIterator.__next__(self)
		if res["indexed"] is None:
			res["indexed"] = False
		if res["std"] is None:
			res["std"] = False
		if res["principal"] is None:
			res["principal"] = False
		return res


def getMetaFromTAP_SCHEMA(service):
	"""returns a pair of tables, columns sequences of metadata for service.

	This tries to get stuff from the service's TAP_SCHEMA.
	"""
	return (TableIterator("TAP_SCHEMA.tables",
			service["ivoid"],
			_queryServer(service,
				"SELECT * FROM TAP_SCHEMA.tables")),
		ColumnIterator("TAP_SCHEMA.columns",
			service["ivoid"],
			_queryServer(service,
		 	 	"SELECT * FROM TAP_SCHEMA.columns")))


def updateService(conn, service, tables, columns):
	"""replaces the rows for service in the database with the contents
	of tables and columns.
	"""
	# completely delete the old service record to clean
	# columns and tables from data originating from this service.
	cursor = conn.cursor()
	cursor.execute("DELETE FROM glots.services"
		" WHERE ivoid=%(ivoid)s",
		{"ivoid": service["ivoid"]})

	recsTouched = api.makeData(RD.getById("ingestService"),
		forceSource=[service],
		connection=conn).nAffected
	recsTouched += api.makeData(RD.getById("ingestTables"),
		forceSource=tables,
		connection=conn).nAffected
	recsTouched += api.makeData(RD.getById("ingestColumns"),
		forceSource=columns,
		connection=conn).nAffected

	service["lastsuccess"] = datetime.datetime.utcnow()
	service["nextharvest"] = (datetime.datetime.utcnow()+
		datetime.timedelta(days=service["harvestinterval"]))
	conn.execute("UPDATE glots.services SET nextharvest=%(nextharvest)s,"
		" lastsuccess=%(lastsuccess)s"
		" WHERE ivoid=%(ivoid)s", service)

	return recsTouched


def ingestOne(service, conn, useVOSI=False, verbose=False):
	"""queries TAP metadata from service and ingests it via conn.

	Pass useVOSI=True to obtain metadata from VOSI rather than TAP_SCHEMA.
	"""
	base.ui.notifyNewSource(service["ivoid"])
	if useVOSI:
		tables, columns = getMetaFromVOSI(service)
	else:
		tables, columns = getMetaFromTAP_SCHEMA(service)

	recsTouched = updateService(conn, service, tables, columns)
	base.ui.notifySourceFinished()
	return recsTouched


def getServices(conn, constraint, fillers):
	"""returns a pair of the services table and the services in it
	matching a (where clause) constaint using fillers.
	"""
	serviceTD = RD.getById("services")
	serviceTable = api.TableForDef(serviceTD, connection=conn)

	resIter = serviceTable.iterQuery(serviceTD, constraint, fillers)

	return serviceTD, list(resIter)


def parseCommandLine():
	parser = argparse.ArgumentParser()
	parser.add_argument("-f", help="re-harvest even when it's not yet time",
		action="store_true", dest="force")
	parser.add_argument("-i", help="only harvest services matching the SQL"
		" pattern IVOID, if at all",
		metavar="IVOID", action="store", dest="onlyHarvest")
	parser.add_argument("-n", help="Do not fall back to VOSI if TAP_SCHEMA"
		" cannot be queried.", action="store_true", dest="suppressVOSI")
	parser.add_argument("-v", help="verbose (talk while working; else"
		" info goes into the log.)", action="store_true",
		dest="verbose")
	return parser.parse_args()


def main():
	socket.setdefaulttimeout(60)

	args = parseCommandLine()
	if args.verbose:
		plainui.PlainUI(base.ui)
		base.DEBUG = True
	harvestStamp = datetime.datetime.utcnow()
	with api.base.getWritableAdminConn() as conn:
		if args.onlyHarvest:
			constraint, fillers = \
				"ivoid ILIKE %(idpat)s", {"idpat": args.onlyHarvest}
		else:
			constraint, fillers = "", {}

		serviceTable, services = getServices(conn, constraint, fillers)

		for service in services:
			recsTouched = -1
			if (service["nextharvest"]
					and (service["nextharvest"]<harvestStamp or args.force)):


				base.ui.notifyInfo("Updating %s..."%service["ivoid"])
				sys.stdout.flush()
				try:
					try:
						recsTouched = ingestOne(service, conn, verbose=args.verbose)
					except Exception as ex:
						if args.verbose:
							base.ui.notifyError("Failed to harvest TAP_SCHEMA")
						if args.suppressVOSI:
							raise
						else:
							try:
								recsTouched = ingestOne(
									service, conn, useVOSI=True, verbose=args.verbose)
							except Exception as ex:
								if args.verbose:
									base.ui.notifyError("Failed to harvest VOSI endpoint")
								raise ex

					base.ui.notifyInfo("Success on %s (%d recs)."%(
						service["ivoid"],
						recsTouched))

					conn.commit()
				except Exception as msg:
					conn.rollback()
#					service["nextharvest"] = (datetime.datetime.utcnow()+
#						datetime.timedelta(days=service["harvestinterval"]))
#					serviceTable.query("UPDATE \qName SET nextharvest=%(nextharvest)s"
#						" WHERE ivoid=%(ivoid)s", service)
#					conn.commit()
					base.ui.notifyInfo("GloTS failed to harvest %s: %s"%(
						service["ivoid"], str(msg)))


if __name__=="__main__":
	from gavo.user import logui
	from gavo.user import plainui
	logui.LoggingUI(base.ui)
	main()
	with base.getAdminConn() as conn:
		conn.execute("VACUUM ANALYZE glots.columns")
		conn.execute("VACUUM ANALYZE glots.tables")
