"""
Flatten the directory hierarchy from "reduced" (see ../README), doing all
kinds of sanity checks.

This only needs to be run once; when you read this, chances are the "reduced"
hierarchy is long gone.

The "flat" hierarchy is

object/Johnson_(U, B, V, R, I) -- science images in the respective bands
flat/Johnson_(...) -- flatfields
bias/Johnson_(...) -- bias exposures
calib/object/Johnson_(...) -- calibration images in the respective bands

The file names are sanitized to always have the extension .fits.gz.
"""

from __future__ import with_statement

import gzip
import os
import pyfits
import sys
import traceback

from gavo import base
from gavo import utils

resdir = os.path.join(base.getConfig("inputsDir"), "maidanak")

SRC_DIR = os.path.join(resdir, "reduced")
DST_DIR = os.path.join(resdir, "data")
FILTER_MAP = utils.NameMap(os.path.join(resdir, "res", "filtermap.txt"))


class LogThis(Exception): pass

class ClobberDisaster(Exception):
	def __init__(self, f1, f2):
		self.f1, self.f2 = f1, f2


def classify(hdr):
	"""returns a relative path that the image should go to.

	See above for the hierarchy.
	"""
	try:
		expType = hdr["ARI_TYPE"]
	except KeyError:  # Check out these guys...
		raise LogThis()
	if expType=="SCIENCE":
		toplevel = hdr["ARI_OBJC"]
	elif expType=="FLAT":
		toplevel = "flat"
	elif expType=="BIAS":
		toplevel = "bias"
	elif expType=="CALIB":
		toplevel = os.path.join("calib", hdr["ARI_OBJC"].strip())
	else:  # Fallthrough for weird images; doesn't happen for current data
		print hdr
		sys.exit()
	return os.path.join(toplevel, FILTER_MAP.resolve(str(hdr["FILTER"]))
	# Defuse "difficult" characters ("+" is a pain in HTTP URLs)
		).replace(" ", "_").replace("+", "p")


class Checker(object):
	def __init__(self):
		self.files = {}
		self.dupes = []

	def assertSameThing(self, src1, src2):
		hdr1 = utils.readPrimaryHeaderQuick(gzip.open(src1))
		hdr2 = utils.readPrimaryHeaderQuick(gzip.open(src2))
		# Let's assume files are identical if TM_START and DATE_OBS
		# are identical
		if (hdr1["TM_START"]!=hdr2["TM_START"]
				or hdr1["DATE-OBS"]!=hdr2["DATE-OBS"]):
			raise ClobberDesaster(src1, src2)
		self.dupes.append(src1)

	def move(self, src, dst):
		if dst in self.files:
			self.assertSameThing(src, self.files[dst])
			return
		self.files[dst] = src

	def writeScript(self, destF):
		for dst, src in self.files.iteritems():
			destF.write("mv -i '%s' '%s'\n"%(src, dst))


def traverse(root):
	checker = Checker()
	destdirs = set()
	with open("sortproblems.log", "w") as logFile:
		for dir, children, names in os.walk(root):
			for n in names:
				srcPath = os.path.join(dir, n)
				try:
					f = gzip.open(srcPath)
					hdr = utils.readPrimaryHeaderQuick(f)
					f.close()
					destDir = classify(hdr)
					destdirs.add(destDir)
					destPath = os.path.join(destDir, n)
					checker.move(srcPath, destPath)
				except (KeyboardInterrupt, SystemExit):
					raise
				except IOError, ex:
					logFile.write("%s (io): %s\n"%(srcPath, str(ex)))
				except LogThis, ex:
					logFile.write("%s (log): %s\n"%(srcPath, str(ex)))
				except:
					sys.stderr.write("Ignoring %s, traceback follows.\n"%srcPath)
					traceback.print_exc()
	with open("sortscript.sh", "w") as script:
		checker.writeScript(script)
	with open("makedirs.sh", "w") as script:
		for d in destdirs:
			script.write("mkdir -p '%s'\n"%d)
	with open("rmdupes.sh", "w") as script:
		for d in checker.dupes:
			script.write("rm '%s'\n"%d)


if __name__=="__main__":
	traverse(SRC_DIR)
