"""
Some routines to resolve dataset identifiers.

For context, see Demleinter et al: Identifiers 2.0, IVOA Recommendation,
http://ivoa.net/documents/Identifiers

This depends on GAVO's VO packages; it also doubles as a core within
GAVO DaCHS.
"""

import requests

from gavo import votable


DEBUG = None

if False:
	# set to true to collect instrumentation data for testing
	class _DebugAccumulator(object):
		def __init__(self):
			self.registry_responses = []
			self.requests_responses = []
			self.obscore_responses = []

		def add_registry(self, res):
			self.registry_responses.append(res)

		def add_requests(self, res):
			self.requests_responses.append(res)

		def add_obscore(self, res):
			self.obscore_responses.append(res)

		def print_report(self):
			print("\tregistry_responses = %s"%repr(self.registry_responses))
			print("\trequests_get_responses = %s"%repr(self.requests_responses))
			print("\tobscore_responses = %s"%repr(self.obscore_responses))
	
	DEBUG = _DebugAccumulator()


class Results(list):
	"""A shallow wrapper for list to keep the results of the resolution.
	"""
	def add_error(self, err_msg):
		"""adds a result record for an error.
		"""
		self.append({"rec_type": "ERROR", "message": err_msg, "access_uri": None})

	def add_warning(self, msg):
		"""adds a result record for a warning.
		"""
		self.append({"rec_type": "WARNING", "message": msg, "access_uri": None})

	def add_result(self, msg, uri):
		"""returns a result with a comment and an access URL.

		This probably shouldn't be called directly by "user" code; rather
		use add_<protocol>.
		"""
		self.append({"rec_type": "RESULT", "message": msg,
			"access_uri": uri})

	def add_datalink(self, dataset_id, access_url):
		"""tries a datalink service at access_url to see if dataset_id
		matches there.

		"match" means a record for the given dataset_id with semantics #this.
		"""
		res = requests.get(access_url, params={'ID': dataset_id})
		if DEBUG:
			DEBUG.add_requests(res.content)

		data, metadata = votable.loads(res.content)
		for row in metadata.iterDicts(data):
			if row["semantics"]=="#this":
				self.add_result("From a datalink service at %s"%access_url,
					row["access_url"])

	def add_ssap(self, dataset_id, access_url):
		"""tries an SSA service at access_url to see if dataset_id matches
		there.
		"""
		res = requests.get(access_url,
			params={'PUBDID': dataset_id, "REQUEST": "queryData"})
		if DEBUG:
			DEBUG.add_requests(res.text)

		data, metadata = votable.loads(res.content)
		for field in metadata:
			if field.utype and field.utype.lower()=='ssa:access.reference':
				url_label = field.name
				break
		else:
			self.add_warning("While trying to use SSAP service at %s:"
				" service doesn't have an ssa:access.reference-utyped column."%
				access_url)
			return

		for row in metadata.iterDicts(data):
			self.add_result("From an SSAP service at %s"%access_url,
				row[url_label])
	
	def add_obscore(self, dataset_id, access_url):
		"""tries to add matches from an obscore service at access_url.

		We try to unconditionally access ivoa.obscore.  Perhaps we should
		check for its presence first?

		Also see query_obscore for cautioning words.
		"""
		for row in query_obscore(dataset_id, access_url):
			self.add_result("From an obscore service at %s"%access_url,
				row["access_url"])


def query_obscore(dataset_id, access_url):
	"""returns a list of access URL, media type of dataset_id for an obscore
	service at access_url.

	This is a bit tricky because pubdids are supposed to be partly
	case-insensitive.  There's no way this can currently be handled
	sanely within ADQL, so we trust nobody changed the case of the pubDID.
	"""
	try:
		job = votable.ADQLSyncJob(access_url,
			"select access_url, access_format from ivoa.obscore"
			" where obs_publisher_did='%s'"%dataset_id.replace("'", "''")).run()
	except votable.TAPQueryError:
		return []
		
	data, metadata = votable.load(job.openResult())
	res = list(metadata.iterDicts(data))
	if DEBUG:
		DEBUG.add_obscore(res)
	return res


def query_registry(query):
	"""exectues query on reg.g-vo.org and returns the result in a list
	of dicts.

	So, yes, it's not registry-specific, it's just a TAP wrapper.
	"""
	job = votable.ADQLSyncJob("http://reg.g-vo.org/tap", query)
	job.run()
	data, metadata = votable.load(job.openResult())
	res = list(metadata.iterDicts(data))
	if DEBUG:
		DEBUG.add_registry(res)
	return res


def get_capabilities(ivoid):
	"""returns a list of pairs (standard_id, access_url) for the service ivoid.
	"""
	res = list(query_registry(
		"select ivoid, ivo_string_agg(standard_id, '_&ID_SEP&_') as caps,"
		"    ivo_string_agg(access_url, '_&ID_SEP&_') as urls"
		" from rr.resource"
		"   natural join rr.interface"
		"   natural join rr.capability"
		# no risk of SQL injection here, as we're querying a public
		# ADQL resource in the first place
		" where ivoid='%s'"
		" 	and intf_type='vs:paramhttp'"
		" group by ivoid"%ivoid.replace("'", "''")))

	if res:
		return list(zip(res[0]["caps"].split("_&ID_SEP&_"),
			res[0]["urls"].split("_&ID_SEP&_")))
	else:
		return []


def get_services_for(ivoid):
	"""returns the ivoids of services in a served-by relationship to ivoid.
	"""
	res = query_registry("select related_id"
		" from rr.relationship"
		" where relationship_type='served-by'"
		" and ivoid='%s'"%ivoid.replace("'", "''"))
	return [r["related_id"] for r in res if r["related_id"]]


def resolve_in_service(service_id, dataset_id, results):
	"""adds links for dataset_id from service_id to results.
	"""
	for cap, access_url in get_capabilities(service_id):
		if cap.startswith("ivo://ivoa.net/std/datalink"):
			results.add_datalink(dataset_id, access_url)
		elif cap.startswith("ivo://ivoa.net/std/ssa"):
			results.add_ssap(dataset_id, access_url)
		elif cap.startswith("ivo://ivoa.net/std/tap"):
			results.add_obscore(dataset_id, access_url)


def resolve_did(pub_DID, force_related=False):
	"""returns a dict of messages and results for resolving pub_DID.

	Each dict has keys
	
		* type (error, warning, result)
		* message (human-readable)
		* access_uri (for results)
	
	Yes, that's mainly convenient for DaCHS's core, and yet, this is the
	primary entry point of this module.

	This implements the scheme proposed in Identifiers 2.0, Sect. 4.1
	"""
	results = Results()
	try:
		regpart, local = pub_DID.split("?", 1)
	except (TypeError, ValueError):
		results.add_error("No PubDID (no local part?)")
		return results

	regpart = regpart.lower()
	if not regpart.startswith("ivo://"):
		results.add_error("No PubDID (not in ivo:// scheme)")
		return results

	resolve_in_service(regpart, pub_DID, results)

	if force_related or not results:
		# resolution directly in the service failed, try in
		# served_by serivces
		for ivoid in get_services_for(regpart):
			resolve_in_service(ivoid, pub_DID, results)

	if not results:
		results.add_error("Could not resolve %s"%pub_DID)

	return results


#################### GAVO DaCHS interface
try:
	from gavo import rsc
	from gavo.svcs import core

	class Core(core.Core):
		def run(self, service, inputTable, queryMeta):
			tuples = resolve_did(inputTable.getParam("pub_did"),
				inputTable.getParam("force_related"))
			dd = service.rd.getById("import_dids")
			return rsc.makeData(dd, forceSource=tuples)

except ImportError:
	pass

if __name__=="__main__":
	print(resolve_did(
		"ivo://org.gavo.dc/apo/res/apo/frames?apo/cd/9506/L2237_950602_r_01.fits",
		True))
	if DEBUG:
		DEBUG.print_report()
