"""
A module for generating natural-language expressions from UCD strings.

This is code salvaged from the old, abandoned ucdresolve module,

https://svn.ari.uni-heidelberg.de/svn/gavo/ucdresolve/trunk

The API simply is the explain(ucd) function below.
"""


import os
import re

from gavo import api

RESDIR = os.path.join(api.getConfig("inputsDir"), "ucds")
DEBUG = False


class InvalidUCD(Exception):
	"""is raised when a composed UCD contains unknown words or words in
	invalid positions.
	"""


############### Ex-ivoucd: A class for representing UCDs.

class IvoaUCD(object):
	"""A model for an IVOA-defined single UCD word.

	In addition to the ucd itself and description tokens, it should have a "role"
	attribute that can be P, S, Q, E, C, or V according to the [UCDs].

	Also, these might be used for the generation of explanations, so a
	human-digestible explanation is also given.
	"""
	role = None
	description = ""
	def __init__(self, tokens, ucd, role, description):
		self.tokens = list(tokens)
		self.ucd = ucd
		self.role = role
		self.description = description

	def __iter__(self):
		return iter(self.tokens)

	def __repr__(self):
		return "(%s)"%(self.ucd)

	def getSingleWords(self):
		"""returns a tuple of words making up a composed word.
		"""
		if self.ucd:
			return tuple(s.strip() for s in self.ucd.split(";"))

	@classmethod
	def fromUCDLiteral(cls, ucd):
		return cls([], ucd, None, None)

	@classmethod
	def fromIVOALine(cls, iLine):
		"""constructs a ucd from the vertical-bar seperated format given in [UCDs].
		"""
		try:
			role, ucd, description = [s.strip() for s in iLine.split("|", 3)]
		except ValueError:
			raise ValueError("Invalid input for IvoaUCD: %s"%repr(iLine))
		
		tokens = [w for w in re.findall(r"\w+", description.lower())]

		return cls(tokens, ucd, role, description)

	@classmethod
	def fromIVOAUCD(cls, ivoaUCD):
		return cls(tokens=ivoaUCD.tokens[:], ucd=ivoaUCD.ucd,
			role=ivoaUCD.role, description=ivoaUCD.description)

	def _getSelfPositions(self, splitUCD):
		"""returns a set of positions of self's ucd in splitUCD.
		"""
		return set(ind for ind, ucd in enumerate(splitUCD) if ucd==self.ucd)

	def _validate_P(self, splitUCD):
		pos = self._getSelfPositions(splitUCD)
		if len(pos)!=1 or pos.pop()!=0:
			raise InvalidUCD(
				"Primary word not in primary position: %s"%repr(splitUCD))
	
	def _validate_S(self, splitUCD):
		pos = self._getSelfPositions(splitUCD)
		if 0 in pos:
			raise InvalidUCD(
				"Secondary word in primary position: %s"%repr(splitUCD))

	def _validate_Q(self, splitUCD):
		# Nothing to be done here
		pass
	
	def _validate_C(self, splitUCD):
		pass
	
	def _validate_V(self, splitUCD):
		pass
	
	def _validate_E(self, splitUCD):
		pass

	def validate(self, splitUCD):
		"""raises a InvalidUCD exception if self's role in
		splitUCD violates [UCDs] syntactic rules.
		"""
		getattr(self, "_validate_%s"%self.role)(splitUCD)


############### Ex-nlgener: Rules for description generation

class Cut(Exception):
	"""is raised when the application of a single rule is to cease.
	"""


class PrepNode(object):
	"""a node in a generation tree joining two constituents with a
	preposition.

	Left and right are PrepNode or None.  content can be a IvoaUCD or
	a string.
	"""
	def __init__(self, content="", left=None, right=None):
		self.left, self.right, self.content = left, right, content
		self.parent = None

	def setLeft(self, node):
		if isinstance(node, PrepNode):
#			node.parent = weakref.proxy(self)
			node.parent = self
		if node:
			assert isinstance(node, PrepNode)
		self.__left = node

	def setRight(self, node):
		if isinstance(node, PrepNode):
			node.parent = self
		if node:
			assert isinstance(node, PrepNode)
		self.__right = node
		
	
	def getLeft(self): return self.__left
	def getRight(self): return self.__right


	left = property(getLeft, setLeft)
	right = property(getRight, setRight)

	def __repr__(self):
		leftRepr = rightRepr = "<empty>"
		if isinstance(self.left, PrepNode):
			leftRepr = "<node>"
		if self.right:
			rightRepr = "<node>"
		content = self.content
		if isinstance(content, IvoaUCD):
			content = content.ucd
		return "PrepNode(%s)"%(leftRepr+content+rightRepr)

	def isPrep(self):
		return isinstance(self.content, str)
	
	def isUCD(self):
		return isinstance(self.content, IvoaUCD)

	def printTree(self, depth=0):
		if self.left:
			self.left.printTree(depth+1)
		print(" "*depth, "."+str(self.content))
		if self.right:
			self.right.printTree(depth+1)

	wantsNPat = re.compile("(?i)[aeiou]")

	def _getPrep(self, flatLeft, flatRight):
		# This is a bit of a hack: for certain prepositional content (anything
		# ending in "a") whe want to adapt the content depending on what's
		# coming next right.  Possibly more coming our way.  This method
		# is to be called on preposition nodes only.
		if self.content.endswith(" a") and self.wantsNPat.match(flatRight):
			return self.content+"n"
		return self.content
		
	def generate(self):
		flatLeft, flatRight = "", ""
		if self.left:
			flatLeft = self.left.generate()
		if self.right:
			flatRight = self.right.generate()
		if self.isUCD():
			content = self.content.description
		else:
			content = self._getPrep(flatLeft, flatRight)
		return " ".join([s for s in [flatLeft, content, flatRight] if s])


class TransformationRule(object):
	"""a rule to transform a tree of PrepNodes in place.

	Rules currently are applied to entire trees.  To build concrete
	rules, fill out the visit method.  The visit method may return "cut"
	to cut the traversal on this branch.  It may also return a node
	to continue traversal on (which is usually necessary when you
	changed to topology of the tree).

	apply traverses the tree in preorder, such that you get to change
	nodes before anything below changes.
	"""
	def __str__(self):
		try:
			return "%s -> %s"%(self.__class__.__name__, self._action)
		except AttributeError:
			return "%s"%(self.__class__.__name__)

	def apply(self, node):
		if node==None:
			return
		newNode = self.visit(node)
		if newNode=="cut":
			return
		if newNode!=None:
			node = newNode
		self.apply(node.left)
		self.apply(node.right)
	
	def visit(self, node):
		pass

	def action(self, *args):
		if DEBUG:
			print(f"Performing {self}")
		return self._action(*args)


class TextMatchingRule(TransformationRule):
	"""a rule that fires on matching a text in a node's description.

	You construct it with a regular expression and a function that receives
	a node and can do with it whatever it likes.  In particular, action
	can return stop to abort the traversal for this rule.
	"""
	def __init__(self, expr, action):
		self.expr, self._action = re.compile(expr), action
	
	def visit(self, node):
		if node.isUCD():
			if self.expr.search(node.content.description):
				return self.action(node)
	

class UCDMatchingRule(TransformationRule):
	"""a rule that fires on matching starts of ucds.

	They are constructed with leftUCD and rightUCD, which are both compared
	against the start of the ucds on the respective sides of a node.
	On a match, the action is called with the node and the two ucds
	as arguments.  The ucds may be None if the (immideate) children do
	not carry UCDs.
	"""
	def __init__(self, leftPattern=None, rightPattern=None, rightText=None,
			action=lambda *args: "cut"):
		self.leftPattern, self.rightPattern, self._action = (leftPattern,
			rightPattern, action)
		self.rightText = rightText

	def __str__(self):
		return "Match UCD(%s, %s)"%(self.leftPattern, self.rightPattern)

	def visit(self, node):
		leftUCD, rightUCD, rightText = "", "", ""
#		if not node.left or not node.right:
#			return
		if node.left and node.left.isUCD():
			leftUCD = node.left.content.ucd

		if node.right and node.right.isUCD():
			rightUCD = node.right.content.ucd
		if node.right and node.right.isPrep():
			rightText = node.right.content
		if (self.leftPattern==None or
					leftUCD.startswith(self.leftPattern)) and (
				self.rightPattern==None or
					rightUCD.startswith(self.rightPattern)) and (
				self.rightText==None or
					rightText==self.rightText):
			return self.action(node, leftUCD, rightUCD)


class PrepHasEmptyChildRule(TransformationRule):
	"""a rule that fires on preposition nodes that are not
	followed by a phrase.
	"""
	def __init__(self, action):
		self._action = action

	def visit(self, node):
		if node.isPrep() and (not node.right or not node.left):
			self.action(node)


class CoordinateConstRule(TransformationRule):
	"""a rule marking coordination of phrases.

	We fire on certain identical UCD prefixes, so this rule should
	probably be applied conditionally only.

	The rule fires on the first constituent, i.e., it inspects the
	right child.
	"""
	def __init__(self, action):
		self._action = action

	coordinatePrefixes = set(["em."])

	def visit(self, node):
		if not node.isPrep():
			return
		if not node.right or not node.right.isPrep():
			return
		thisUCD = rightUCD = None
		if node.left and node.left.isUCD():
			thisUCD = node.left.content.ucd
		if (node.right and node.right.left and
				node.right.left.isUCD()):
			rightUCD = node.right.left.content.ucd
		if not thisUCD or not rightUCD:
			return
		commonPrefix = os.path.commonprefix([thisUCD, rightUCD])
		for p in self.coordinatePrefixes:
			if commonPrefix.startswith(p):
				self.action(node)
				break

	
class LowerCaseRule(TransformationRule):
	"""a rule that lowercases all descriptions.

	See TextMatchingRule for further information.
	"""
	def visit(self, node):
		if node.isUCD():
			node.content.description = node.content.description[0].lower(
				)+node.content.description[1:]


class FirstUpperCaseRule(TransformationRule):
	"""a rule that uppercases the leftmost description.
	"""
	def apply(self, node):
		if node.left:
			self.apply(node.left)
		else:
			if node.isUCD():
				node.content.description = node.content.description.capitalize()
			else:
				node.content = node.content.capitalize()


def _generifyDescription(part, prep):
# remove part in UCD descriptions and set parent's content to prep
	def action(node, *args):
		node.content.description = node.content.description[:-len(part)]
		if node.parent:
			node.parent.content = prep
	return action
		
def _modifyFirstPartOfDescr(part):
# remove first part in UCD descriptions and set parent's content to ""
# because no preposition is needed
	def action(node, *args):
		node.content.description = node.content.description[len(part):]
		if node.parent:
			node.parent.content = ""
	return action


def _firstIsParticipleAction(node):
# remove the preposition if the preposition is followed by the
# participle "related"
	getRoot(node).content = ""


def _twoQuantitiesAction(node):
# changes "two quantities" into "two  values":
	node.content.description = node.content.description[:node.content.description.find("quantities")] + "values"


def _swapAndInAction(node, leftUCD, rightUCD):
# swap left and right, make node's preposition "in the"
	node.left, node.right = node.right, node.left
	node.content = "in the"


def _swapParentAndOfAction(node, leftUCD, rightUCD):
# swap parent's left an right nodes and make it an "of the" node
	if not node.parent:
		return
	node.parent.right, node.parent.left = node.parent.left, node.parent.right
	node.parent.content = "of the"


def _swapParentAndInAction(node, leftUCD, rightUCD):
# swap parent's left an right nodes and make it an "in" node
	if not node.parent:
		return
	node.parent.right, node.parent.left = node.parent.left, node.parent.right
	node.parent.content = "in"
	

def _swapParentAndAboutAction(node, leftUCD, rightUCD):
# swap parent's left an right nodes and make it an "about" node
	if not node.parent:
		return
	node.parent.right, node.parent.left = node.parent.left, node.parent.right
	node.parent.content = "about"


def _setFirstSetPrepAction(prep):
	def newRootAction(node, *args):
		if not node.parent:
			# We're already at root
			node.content = prep
			return
		if node.parent.right==node:
			node.parent.right = node.right
		else:
			node.parent.left = node.right
		node.right = getRoot(node)			
		node.parent = None
		node.content = prep
		raise Cut("Stop application of rule")
	return newRootAction


def _setLastSetPrepAction(node):
# sets node as the right most node in the current subtree; we assert
# node is the right child of parent
	if not node.right:  # we already are rightmost
			return
# If we're not the right child of parent, refuse
	assert node.parent.right==node
	assert node.parent
# Find new parent node (current rightmost downtree)...
	newParent = node.right
	while newParent.right:
		newParent = newParent.right
# ...and fix the tree.
	node.parent.right = node.right
	node.right = None
	newParent.right = node
	newParent.right


def _setPrepAction(prep):
	def setPrep(node, *args):
		node.content = prep
	return setPrep


def _setPrepInParentAction(prep):
	def setPrep(node, *args):
		if node.parent:
			node.parent.content = prep
	return setPrep


def _swapSetPrepAction(prep):
	def swapAndPrepAction(node, *args):
		node.left, node.right = node.right, node.left
		node.content = prep
	return swapAndPrepAction


def _setConjunctionAction(prep):
	def setConj(node, *args):
		node.content = prep
	return setConj


def _betweenAndAction(node, *args):
# is an action for two-argument heads like phot.color
	if not node.right.left: # Make sure there's a second argument.
		return
	node.content = "between the"
	node.right.content = "and the"


def applyToRightChild(rule):
	"""returns an action that applies rule to the right children of
	the matching node.
	"""
	def action(node, *args):
		rule.apply(node.right)
	return action


def getRoot(node):
	root = node
	while root.parent:
		root = root.parent
	return root
	

_rules = [
	TextMatchingRule(" of something",
		_generifyDescription(" of something", "of")),
	TextMatchingRule("two quantities", _twoQuantitiesAction),
	TextMatchingRule(" of a generic event or phenomenon",
		_generifyDescription(" of a generic event or phenomenon", "of")),
	TextMatchingRule(" related to a generic event",
		_generifyDescription(" related to a generic event (epoch, date, julian"
			" date, time stamp/tag,...)", "related to")),

	UCDMatchingRule(leftPattern="src.", action=_setPrepAction("in")),	

	UCDMatchingRule(leftPattern="instr.", action=_setPrepInParentAction("of")),	
			
	UCDMatchingRule(leftPattern="em.", action=_setPrepInParentAction("in the")),
	UCDMatchingRule(leftPattern="pos.", action=_setPrepAction("in")),	
	UCDMatchingRule(leftPattern="phys.", action=_setFirstSetPrepAction("of")),
	UCDMatchingRule(leftPattern="phys.", action=_setPrepAction("in")),
	
	UCDMatchingRule(leftPattern="arith.", action=_setFirstSetPrepAction("of")),
	UCDMatchingRule(leftPattern="time", action=_setFirstSetPrepAction("of")),
	UCDMatchingRule(leftPattern="stat.", action=_setFirstSetPrepAction("of")),	
	UCDMatchingRule(leftPattern="meta.id", action=_setFirstSetPrepAction("of a")),
	UCDMatchingRule(leftPattern="meta.main", action=_setFirstSetPrepAction("of")),
	UCDMatchingRule(leftPattern="meta.note", action=_setFirstSetPrepAction("on")),
	UCDMatchingRule(leftPattern="meta.code",
		action=_setPrepAction("describing a")),
	UCDMatchingRule(leftPattern="meta.modelled",
		action=_swapSetPrepAction("")),
	UCDMatchingRule(leftPattern="meta.ref.ivoid",
		action=_setPrepAction("for an")),
		
	TextMatchingRule("Quantity was produced by a model",
		_modifyFirstPartOfDescr("Quantity")),
	UCDMatchingRule(leftPattern="phot.color",
		action=_betweenAndAction),

	TextMatchingRule("^[rR]elated", _firstIsParticipleAction),	
	
	PrepHasEmptyChildRule(_setPrepAction("")),

	LowerCaseRule(),
	FirstUpperCaseRule(),
]

def _buildTreeFromUCD(ucd, wordDir):
	"""returns a first rough tree of PrepNodes from ucd.

	The strategy is to build a completely skewed tree, where the last
	word of the ucd is the root of tree.  The tree has a ucd on each left
	branch and continues down the right branch.  Anything else must be done
	through rules.
	"""
	words = list(ucd.getSingleWords())
	root = PrepNode()
	root.left = PrepNode(
		IvoaUCD.fromIVOAUCD(wordDir.get(words.pop(0))))
	cur = root
	while words:
		new = PrepNode()
		new.left = PrepNode(
			IvoaUCD.fromIVOAUCD(wordDir.get(words.pop(0))))
		cur.right = new
		cur = new
	return root


####################### Ex-ivoawords: A representation of the official UCD list

class WordDirectory(object):
	"""A collection of IvoaUCDs.

	The feed input is hardcoded to be in the format of [UCDs].
	"""
	def __init__(self):
		self.ucds = []

	def __iter__(self):
		return iter(self.ucds)

	def feedItem(self, rawItem):
		if rawItem.startswith('#') or not rawItem.strip():
			return
		self.ucds.append(IvoaUCD.fromIVOALine(rawItem))

	@classmethod
	def fromSource(cls, input, *consArgs, **consKw):
		"""returns a finalized corpus from all items of the sequence input.
		"""
		corpus = cls(*consArgs, **consKw)
		for raw in input:
			corpus.feedItem(raw)
		corpus.notifyDataComplete()
		return corpus

	@classmethod
	def fromFile(cls, srcName):
		"""returns a finalized corpus from the input of a file.
		"""
		with open(srcName) as f:
			return cls.fromSource(f)

	def notifyDataComplete(self):
		self.ucdIndex = {}
		for ucd in self:
			self.ucdIndex[ucd.ucd.lower()] = ucd

	def explain(self, ucd):
		"""returns a string with an explaintion of ucd.
		"""
		if not isinstance(ucd, IvoaUCD):
			ucd = IvoaUCD.fromUCDLiteral(ucd)

		tree = _buildTreeFromUCD(ucd, self)
		for r in _rules:
			try:
				r.apply(tree)
				tree = getRoot(tree)
			except Cut:
				pass
		return tree.generate()

	def validate(self, ucd):
		"""raises a InvalidUCD exception if ucd is syntactically
		invalid.
		"""
		splitUCD = ucd.getSingleWords()
		for u in splitUCD:
			try:
				ivoaWord = self.ucdIndex[u.lower()]
			except KeyError:
				raise InvalidUCD("Word %s doesn't exist in %s"%(
					u, splitUCD))
			ivoaWord.validate(splitUCD)
	
	def get(self, literal):
		"""returns the IvoaUCD instance belonging to literal or raises
		an InvalidUCD error.
		"""
		try:
			return self.ucdIndex[literal.lower()]
		except KeyError:
			raise InvalidUCD("%s is not an IVOA UCD word"%literal)


####################### Top-Level logic: Prepare cached objects

_WD = WordDirectory.fromFile(
	os.path.join(RESDIR, "res", "ivoa_ucd.txt"))
explain = _WD.explain


if __name__=="__main__":
	print(explain("phot.flux;stat.interval;em.gamma.hard"))