"""
A DaCHS grammar to parse simple ODS (open document spreadsheet) files.

It would probably be wiser to use one of the actual open document libraries,
but then we'd depend on them; this thing here just works with the standard
library and DaCHS.

Oh, and disclaimer: I've not actually read the ODS spec.  I just worked from
the openngc file.

Note that we pull all the record into RAM.  For tables that are fun
to change on libreoffice, I suppose that's all right.
"""

from xml.sax import parseString
from zipfile import ZipFile

from gavo.grammars.customgrammar import CustomRowIterator
from gavo.utils import plainxml


class ODSParser(plainxml.StartEndHandler):
	def __init__(self):
		self.labels = None
		self.rows = []
		plainxml.StartEndHandler.__init__(self)

	def _start_table_row(self, name, attribute):
		self.row = []

	def _end_table_row(self, name, attributes, content):
		print ">>>>>>>>>>>", self.row
		if self.labels:
			self.rows.append(dict(zip(self.labels, self.row)))
		else:
			self.labels = self.row

	def _start_table_cell(self, name, attributes):
		self.content = None

	def _end_table_cell(self, name, attributes, content):
		# TBD: we're in trouble if libreoffice starts playing games with
		# their namespace mappings; the parser unfortunately doesn't
		# expand the prefixes.
		if attributes.get("table:number-columns-repeated"):
			self.row.extend(
				[self.content]*int(attributes["table:number-columns-repeated"]))
		else:
			self.row.append(self.content)

	def _end_p(self, name, attributes, content):
		self.content = content


class RowIterator(CustomRowIterator):
	def _iterRows(self):
		src = ZipFile(self.sourceToken)
		content = src.read("content.xml")
		parser = ODSParser()
		parseString(content, parser)
		return parser.rows
		

def _test():
	src = ZipFile("data/NGC.ods")
	content = src.read("content.xml")
	parseString(content, ODSParser())


if __name__=="__main__":
	_test()

