"""
A rough guess at how we might parse what I suppose is IDL grammar.
"""

import gzip
import itertools
import re

from gavo.grammars.customgrammar import CustomRowIterator

# The input files come in a totally wonky format with lots of
# semistructured metadata at the top that we ignore for now.
# Then, there's a data section with one array per line.  There's a
# label at the start, a colon, and then something.
#
# As data comes in here, the first three items of the arrays contain
# non-array data.  For the 3D spectra, however, some of them need to be
# added to the label.  It's all too insane to consider.

def parseASCII(inName):
	"""iterates over data in an IDL? file.
	"""
	labels, data = [], []
	with gzip.open(inName) as f:
		for ln in f:
			if ln.startswith("*"): # metadata evaluation?
				continue
			if not ln.startswith("!"):
				# who knows?  emit a diagnostic here?
				continue

			# 1D spectra have temperature (?) as first array element (chuck),
			# 3D spectra use it to distinguish disk positions.
			labelEnd = 25 if "/3D" in inName else 10
			label, content = ln[1:labelEnd], ln[49:]
			labels.append(re.sub("[^\\w]+", "", label))
			data.append([float(v) for v in content.split()])

	for row in itertools.starmap(lambda *args: dict(zip(labels, args)),
			itertools.izip(*data)):
		yield row


class RowIterator(CustomRowIterator):
	def _iterRows(self):
		for row in parseASCII(self.sourceToken):
			yield row


if __name__=="__main__":
   import sys

   from gavo.grammars.customgrammar import CustomGrammar
   ri = RowIterator(CustomGrammar(None), sys.argv[1])
   for row in ri:
     print row

