"""
A one-time hack to copy catalog inputs from the arigfh archive fiels.

This uses data data from proj-data/s07/archiv.linux.

You need res/mapping as written by katkatgrammar.
"""

from __future__ import with_statement

import glob
import re
import os
import sys
import tarfile


def getMapping():
	return dict((r[0], r[1])
		for r in (ln.strip().split("\t")
				for ln in open("res/mapping") if not ln.startswith("#")))


def getData(tarName):
	tf = tarfile.open(tarName, "r:*")
	res = tf.extractfile("katalog").read()
	tf.close()
	return res


fsClean = re.compile("[()/]")


if __name__=="__main__":
	sys.exit("You probably don't want to run this.")
	mapping = getMapping()
	for name in glob.glob("proj-data/s07/archiv.linux/*.tar.gz"):
		key = os.path.basename(name)[:-7].upper()
		if not key in mapping:
			# this happens when multiple t/catpvs are assigned to a single
			# file -- katkatgrammar only emits one of these t/catpvs.  The
			# "katalog" we are interested in here is the same for all, so
			# we can skip those.
			continue
		destDir = "cats/katalog/%s"%(fsClean.sub("-", mapping[key]))
		data = getData(name)
		if not os.path.isdir(destDir):
			ddt
		with open(os.path.join(destDir, "data"), "w") as f:
			f.write(data)
