"""
A custom grammar for parsing the "gbin" files.

See README for where this fits in the general picture.

To make these actually useful, the grammar needs a property named gbinmap,
which must be in the form of a python dict "body".  Keys are the rawdict
keys, values expressions accessing attributes from the objects serialized.

These expressions come out of bin/getschema.py
"""

import math
import re
import struct
import sys
import zipfile
from cStringIO import StringIO

import numpy

from gavo import api
from gavo.grammars.customgrammar import CustomRowIterator

javaobj, _ = api.loadPythonModule("javaobj", relativeTo=__file__)


class RowIterator(CustomRowIterator):
	def _iterRows(self):
		if not self.grammar.hasProperty("gbinmap"):
			raise api.ReportableError("jsogrammar needs a gbinmap property.")

		recordFactory = compile("{%s}"%
			self.grammar.getProperty("gbinmap"), self.grammar.rd.sourceId,
			"eval")

		with open(self.sourceToken) as f:
			for zipstream in iterContainedFiles(f):
				try:
					zf = zipfile.ZipFile(file=StringIO(zipstream))
				except zipfile.BadZipfile:
					import pdb;pdb.set_trace()

				names = zf.namelist()
				assert len(names)==1
				data = zf.open(names[0]).read()
				stuff = javaobj.loads(data)
				for obj in stuff:
					yield eval(recordFactory, locals(), globals())


def iterContainedFiles(input):
	"""iterates over byte sequences corresponding to gbin zip files with
	the file input.

	GBIN is a JSO sequence of strings which contain zipped JSOs, which
	supposedly was done this way to enable incremental reading.

	That sounds good, but javaobj doesn't support incremental reading from
	JSOs.  Hence, we do a little bit of parsing of our own to fiddle out the
	strings.
	"""
	# read jso header (and verify a bit)
	while True:
		nextMagic = input.read(4)
		if nextMagic=='':
			return
		elif nextMagic!="\xac\xed\x00\x05":
			raise api.ReportableError("Bad GBIN magic: %s"%repr(nextMagic))
		input.read(19) # Skip JSO junk
		nBytes = struct.unpack("!i", input.read(4))[0]
		yield input.read(nBytes)


if __name__=="__main__":
	with open(sys.argv[1]) as f:
		for zipstream in iterContainedFiles(f):
			try:
				zf = zipfile.ZipFile(file=StringIO(zipstream))
			except zipfile.BadZipfile:
				import pdb;pdb.set_trace()
				continue
			names = zf.namelist()
			assert len(names)==1
			data = zf.open(names[0]).read()
#			stuff = javaobj.loads(data)
			#code.interact(local=locals())
#			print len(stuff)
