"""
Quick hack to enrich the maidanak fits headers with information on the observed
object and frame type.
"""

from __future__ import with_statement

import sys
import os
import re
import gzip

from gavo import base
from gavo import utils
from gavo.utils import fitstools


class Error(Exception):
	pass


class CrappyStuff(Exception):
	pass


projRoot = os.path.join(base.getConfig("inputsDir"), "maidanak")
srcPath = os.path.join(projRoot, "reduced")
resDir = os.path.join(projRoot, "res")


class ObjectNotFound(Error):
	"""signals that the object in the payload cannot be processed and
	should be ignored henceforth.
	"""
	pass


class NameMap:
	def __init__(self, src):
		self._parseSrc(src)
	
	def _parseSrc(self, src):
		self.namesDict = {}
		for ln in open(src).readlines():
			if ln.startswith("#") or not ln.strip():
				continue
			ob, names = re.split("\t+", ln)
			for name in names.lower().split():
				self.namesDict[name.decode("quoted-printable")] = ob
	
	def resolve(self, name):
		return self.namesDict[name.lower()]


class Processor:
	def __init__(self, nameMap):
		self.nameMap = nameMap

	def _fixHeader(self, fPath, headerList):
		hm = fitstools.HeaderManipulator(fPath)
		hm.updateFromList(headerList)
		hm.close()
		os.chmod(fPath, 0664)

	def _processInstrumentFrame(self, id, path, fName, type="FLAT"):
		self._fixHeader(os.path.join(path, fName), [
			("ARI_TYPE", type, "type of observation"),
			("ARI_RAW", id, "internal object reference"),
			("ARI_OBJC", "", "No object observed"),
		])

	def _processFlat(self, id, path, fName):
		self._processInstrumentFrame(id, path, fName, "FLAT")

	def _processBias(self, id, path, fName):
		self._processInstrumentFrame(id, path, fName, "BIAS")

	def _processCalibration(self, id, path, fName):
		self._fixHeader(os.path.join(path, fName), [
			("ARI_TYPE", "CALIB", "type of observation (calibration stars)"),
			("ARI_RAW", str(id), "internal reference to calibration field",),
			("ARI_OBJC", str(id), "name of calibration field"),
		])

	def _lookupRawId(self, rawId):
		return self.nameMap.resolve(rawId)

	def _processScience(self, id, path, fName):
		objCname = self._lookupRawId(id)
		if objCname=="CRAP":
			raise CrappyStuff(os.path.join(path, fName))
		self._fixHeader(os.path.join(path, fName), [
			("ARI_TYPE", "SCIENCE", "type of observation (science frame)"),
			("ARI_RAW", str(id), "internal object reference"),
			("ARI_OBJC", str(objCname), "Simbad-compliant name of object observed"),
		])

	def _getIdFromPath(self, path):
		parts = path.rstrip("/").lower().split("/")
		while parts:
			curPart = parts.pop()
			if len(curPart)<3:
				continue
			elif curPart[:3] in self.monthNames:
				continue
			elif curPart.startswith("cd"):
				break
			return curPart
		raise Error("path %s not understood."%path)

	def _getIdFromHeader(self, header):
		return header["object"]

	knownConfusers = {
		"2237+030": set(["q2237_ogak"]),
		"apm0827": set(["apm08279"]),
		"be56": set(["berk56"]),
		"ev_flat": set(["flat1"]),
		"h1422+231": set(["b1422"]),
		"pj1115+080": set(["pg1115"]),
		"q2237+030": set(["q227_ogak"]),
		"qso 2237+030": set(["q2237", "q2237_ogak"]),
		"q227+030": set(["q2237_ogak"]),
		"qso2237": set(["q2237_ogak"]),
		"rxj0921+4529": set(["rjx0921"]),
		"rxj0921+4529": set(["rjx0921"]),
		"rxt0921+4529": set(["rxj0921"]),
		"sbs1520+530": set(["sbs5120"]),
		"sdss0924+0219": set(["sdds0924"]),
		"sdss 1155+6346": set(["dss1155"]),
		"sdss1335+0118": set(["sds1335"]),
		"sdss j1650+4251": set(["dss1650"]),
		"ugc9958": set(["9995"]),
		"ugc9958": set(["99958"]),
		"99958": set(["ugc9958"]),
		"um673": set(["um763"]),
	}

	# (header-name, path-name) -> header or path
	resolvedConflicts = {
		("di tau", "sbs0909"): "P",
		("di tau", "sdss1004"): "P",
		("h1413+117", "b1422"): "P",
		("h1413+117", "sbs1520"): "P",
		("lq69=1351+6400", "h1413"): "P",
		("ngc3077", "sbs0909"): "P",
		("ngc7479", "um673"): "P",
		("pg1115+080", "h1413"): "P",
		("pg2331", "um673"): "P",
		("q2237", "sbs1520"): "P",
		("sa94", "sa95"): "P",
		("sa94", "um673"): "P",
		("sa95", "sa98"): "P",
		("sa98-653/667", "sbs0909"): "P",
		("sbs1520+530", "240"): "H",
		("sbs1520+530", "300"): "H",
		("sdss1004", "q0957"): "P",
		("sdss 1650+4251", "dss1650"): "P",
		("string1223-1238", "h1413"): "P",
		("um673=q0142-100", "1"): "H",
		("xte j1118+480", "h1413"): "P",
		("lq73=1411+4414", "lq74"): "H",
		("ngc1275", "sa98"): "P",
		("ngc2339", "sa98"): "P",
		("m39=ngc7092_x1", "ngc7092"): "P",
		("ugc9958", "galaxy"): "H",
		("q138=2211-1915", "q2237"): "P",
		("rotaetetest", "center"): "P",
	}

	monthNames = set(["aug", "jul_2", "jul", "jun", "sep", "oct", "feb",
		"jan"])

	def _guessId(self, path, fName):
		"""returns a guess for the maidanak internal id of the object from
		both path and header, trying to reconcile the two in case of differences.

		Since we usually identify the path ids, they take precedence.
		"""
		pathId = self._getIdFromPath(path)
		alphaPath = re.match("[a-z0-9]*", pathId).group()
		header = utils.readPrimaryHeaderQuick(gzip.open(fName))
		headerId = self._getIdFromHeader(header).lower().strip("'")
		alphaHeader = re.match("[a-z0-9]*", re.sub(r"\s", "", headerId)).group()
		# Should've gone for rule engine -- next time around...
		if pathId==headerId:
			return headerId
		elif pathId.startswith("flat") and headerId=="bias":
			return headerId
		if pathId.startswith("fla"):
			return "flat"
		elif headerId==None or headerId.strip()=="":
			return pathId
		elif headerId.startswith(pathId):
			return pathId
		elif len(alphaPath)>4 and alphaPath==alphaHeader:
			return pathId
		elif re.sub(r"\s", "", headerId)==pathId:
			return pathId
		elif headerId.startswith("flat"):
			return "flat"
		elif pathId in self.knownConfusers.get(headerId, set()):
			return pathId
		elif (headerId, pathId) in self.resolvedConflicts:
			if self.resolvedConflicts[headerId, pathId]=="P":
				return pathId
			else:
				return headerId
		else:
			raise Error("Not reconcilable: %s, %s"%(headerId, pathId))

	def _getProductHandler(self, productId):
		if productId.startswith("flat"):
			return self._processFlat
		if productId=="bias":
			return self._processBias
		if productId.startswith("sa"):
			return self._processCalibration
		try:
			simbadableId = self._lookupRawId(productId)
		except KeyError:
			raise Error("Unknown object %s ignored"%productId)
		return self._processScience

	def process(self, path, fName):
		fName = os.path.join(path, fName)
		hdr = utils.readPrimaryHeaderQuick(gzip.open(fName))
		if hdr.get("ARI_TYPE"):
			return
		targetId = self._guessId(path, fName)
		productHandler = self._getProductHandler(targetId)
		productHandler(targetId, path, fName)


def _renameIfUppercase(fName):
	disambiguator = ""
	if fName.endswith(".GZ"):
		basename = fName[:-3]
		while os.path.exists(basename+disambiguator+".gz"):
			disambiguator += "x"
		newName = basename+disambiguator+".gz"
		os.rename(fName, newName)
		return fName
	return fName


def _compressIfFits(fName):
	magic = open(fName).read(20)
	if magic.startswith("SIMPLE "):
		if os.path.exists(fName+".gz"):
			print ("File %s already exists compressed -- resolve manually."%
				fName)
#			raise Error("File %s already exists compressed -- resolve manually."%
#				fName)
		print "Compressing %s"%fName
		os.system("gzip '%s'"%fName)
		fName = fName+".gz"
	return fName


def processAll(processor):
	processed, ignored = 0, 0
	crapFiles = []
	for path, _, fNames in os.walk(srcPath):
		for fName in fNames:
			fName = os.path.join(path, fName)
			fName = _renameIfUppercase(fName)
			if not fName.endswith(".gz"):
				fName = _compressIfFits(fName)
			if fName.endswith(".gz"):
				try:
					processor.process(path, fName)
				except Error, msg:
					print "Skipping %s (%s, %s)"%(fName, msg.__class__.__name__, msg)
					ignored += 1
				except CrappyStuff, ex:
					crapFiles.append(str(ex))
				except Exception, msg:
					print ">>>>>>>>>>>>>", fName
					raise
				processed += 1
				sys.stdout.write("%6d (-%5d)\r"%(processed, ignored))
				sys.stdout.flush()
	if crapFiles:
		with open("crap.files", "w") as f:
			f.write("\n".join(crapFiles)+"\n")


def processSome(processor, files):
	for f in files:
		print f
		processor.process(os.path.dirname(f), os.path.basename(f))


def main():
	processor = Processor(NameMap(os.path.join(resDir, "maydanak_targets.txt")))
	processAll(processor)
	#processSome(processor,['/data/gavo/inputs/maidanak/reduced/cd042/2004/q2237/f3/aug/nh160103.fits.gz'])


if __name__=="__main__":
	main()

# vim:ts=2: