"""
A grammar that takes upstream's TeX files and spits out HTML and other stuff
for ingestion into the appendix table.

This requires TeX and tex4ht.
"""

import csv
import os
import re
import subprocess
import tempfile

from BeautifulSoup import BeautifulSoup

from gavo import base
from gavo.grammars.customgrammar import CustomRowIterator


def runTeX(teXSource, filesToLinkBackTo=[], extractFunc=None):
	owd = os.getcwd()
	wdName = tempfile.mkdtemp("runtex")
	teXSource = teXSource.encode("utf-8", "replace")
	res = None

	try:
		os.chdir(wdName)
		for fname in filesToLinkBackTo:
			os.symlink(fname, os.path.basename(fname))
		with open("tmp.tex", "w") as f:
			f.write(teXSource)
		try:
			msgs = subprocess.check_output(["tex", "--interaction", "batchmode",
				"tmp.tex"], stderr=subprocess.STDOUT)
		except subprocess.CalledProcessError:
			with open("tmp.log") as f:
				raise base.ValidationError("Building description failed",
					"description", hint=f.read())
		if extractFunc:
			res = extractFunc()
	finally:
		os.chdir(owd)
		os.system("rm -rf '%s'"%wdName)

	return res


def getHTMLFromTeX(rd, sourceToken, citations):
	def makeHTML():
		with open("tex4ht.log", "w") as output:
			subprocess.check_call(["tex4ht", "tmp.dvi"],
				stdout=output, stderr=subprocess.STDOUT)
		with open("tmp.html") as f:
			return f.read().decode("iso-8859-1")

	with open(sourceToken) as f:
		src = "\\input appstyle\n"+f.read()+"\n\\bye\n"
		# I don't have the time to figure out why tex4ht bombs out for those
		# and rather hack it:
		src = re.sub(r"(\\mathrm\{[^}]*\})", r"{\1}", src)
		rawHTML = runTeX(src,
			filesToLinkBackTo=[rd.getAbsPath("res/appstyle.tex")],
			extractFunc=makeHTML)

	parsed = BeautifulSoup(rawHTML)
	refsToInclude = set()

	for el in parsed.findAll("cite"):
		key = re.sub("<[^>]*>", "", str(el))
		el.replaceWith(BeautifulSoup('<a href="#%s">%s</a>'%(key, key)))
		refsToInclude.add(key)

	refElement = BeautifulSoup("<ul class='references'/>")
	for key in sorted(refsToInclude):
		refElement.contents[0].append(citations[key])

	stuff = parsed.findAll("body")[0].contents
	stuff.append(refElement.prettify().decode("utf-8"))
	return "".join([unicode(el) for el in stuff])


class RowIterator(CustomRowIterator):
	def _iterRows(self):
		yield {'name': self.grammar.dataPack.nameTrans[
			self.sourceToken.split("/")[-1].split(".")[0]],
		'description': getHTMLFromTeX(self.grammar.rd, self.sourceToken,
			self.grammar.dataPack.citations)}


def makeDataPack(grammar):
	class Datapack(object):
		pass
	res = Datapack()

	with open(grammar.rd.getAbsPath("data/Name_conversions.txt")) as f:
		res.nameTrans = dict(
			(a.strip(), b.strip()) for a,b in csv.reader(f, delimiter=";"))
	
	with open(grammar.rd.getAbsPath("bibextract/citations.bbl")) as f:
		soup = BeautifulSoup("<div>"+f.read().decode("utf-8")+"</div>")
		res.citations = dict((el.get("name"), el.parent.parent)
			for el in soup.findAll("a"))

	return res


def test():
	from gavo import api
	rd = api.getRD("sasmirala/q")
	dataPack = makeDataPack(rd.getById("texgrammar"))
	for src in [
		"/home/msdemlei/gavo/inputs/sasmirala/data/descriptions/Circinus.tex",
		]:
		try:
			res = getHTMLFromTeX(rd, src, dataPack.citations)
		except base.ValidationError, ex:
			print ">>>>>>>>>>>", src
			print(ex.hint)
		else:
			print("Ok")
			print(res.encode("utf-8"))


if __name__=="__main__":
	test()