Source code for ecstasy.parser

# -*- coding: utf-8 -*-

"""
The heart of the ecstasy package, containing the main *Parser* class.
"""

import re
import warnings
import collections

import ecstasy.flags as flags
import ecstasy.errors as errors

[docs]def beautify(string, *args, **kwargs):
	"""
		Convenient interface to the ecstasy package.

		Arguments:
			string (str): The string to beautify with ecstasy.
			args (list): The positional arguments.
			kwargs (dict): The keyword ('always') arguments.
	"""

	parser = Parser(args, kwargs)
	return parser.beautify(string)

[docs]class Phrase(object):
	"""
	Class describing a single parsed phrase.

	When a string is parsed in ecastasy, specially-marked regions of
	text are converted taken note of and converted into Phrase objects,
	which are later then used to replace the parsed strings (including any
	tags or arguments) with the string itself as well as the formatting
	codes specified by the arguments passed to Parser.beautify(), which
	are then interpreted by the command line.

	Attributes:
		string (str): The text of the phrase (between opening and closing tags).
		opening (int): The index of the opening tag.
		closing (int): The index of the closing tag.
		style (int): The formatting/style flag-combination of the phrase.
		nested (list): A list of nested Phrase objects (children).
		override (bool): The phrase's override specification.
	"""

	def __init__(self,
				 opening=None,
				 closing=None,
				 string="",
				 style=0,
				 args=None,
				 nested=None,
				 override=False,
				 increment=False):

		self.string = string

		self.opening = opening
		self.closing = closing

		self.style = style

		self.arguments = args if args else []

		self.nested = nested if nested else []

		self.override = override

		self.increment = increment

	def __str__(self):
		return self.string

	def __eq__(self, other):
		return (self.string == other.string			and
				self.opening == other.opening 		and
				self.closing == other.closing 		and
				self.style == other.style 			and
				self.arguments == other.arguments 	and
				self.nested == other.nested			and
				self.override == other.override		and
				self.increment == other.increment)

[docs]class Parser(object):
	"""
	Handles parsing and beautification of a string.

	This is the main class of the entire ecastasy package. It is
	initialized with a set of positional and keyword arguments that
	determine which styles (flag-combinations) are used for which
	phrases (tag-marked regions of text) found during parsing. Its
	beautify() method is then used to beautify a string according
	to the arguments passed to the constructor.

	Note:
		From the outside, the package-level beautify() method should
		handle the construction and beautify()-call process all-in-one
		(for convenience).

	Attributes:
		always: The list of 'always' (keyword) arguments.
		positional: The list of positional arguments.
		tags: A compiled regex matching opening or closing tags.
		argument: A compiled regex matching well-formed phrase arguments.
		counter: A counter for positional arguments.
	"""

	def __init__(self, args, kwargs):

		"""
		Initializes a Parser instance.

		Arguments:
			args (list): The positional arguments.
			kwargs (dict): The 'always' (keyword) arguments.
		"""

		self.always = kwargs

		self.positional = self.get_flags(args) if args else []

		self.meta = re.compile(r"[()<>]")

		self.arguments = re.compile(r"^(-?\d,?)+!?$|"
			 		 			    r"^!?(-?\d,?)+$|"
			 					    r"^(!\+?|\+!?)$")

		# Used in self.stringify to auto-increment
		# positional argument positions
		self.counter = 0

[docs]	def get_flags(self, args):

		"""
		Checks and retrieves positional and 'always' (keyword) flags
		from the many ways in which they may be passed to the
		constructor (or the beautify() method on package-level).

		Positional arguments can be passed either:

		* Individually, where each flag-combination is one positional argument.
		* Packaged inside a list, which is then expanded. There can be
		  multiple of such lists passed as arguments because it facilitates
		  interaction with the ecastasy module (one may want to organize
		  and update styles in certain ways depending on one's program), but
		  each list will be expanded and all flag-combinations found inside
		  each list will be interpreted as a single style argument, as if it
		  had been passed in the way desribed above (individually).

		'Always' arguments can be passed either:

		* Individually, with keyword-argument syntax, i.e. <word>=<style>
		* In a dictionary, which is expanded exactly like positional
		  arguments passed in lists (i.e. each key/value pair in the
		  dictionary is interpreted as if it had been passed individually,
		  as key=value to the constructor/the external beautify() method).

		Note:
			self.always is set equal to the keyword arguments passed to the
			constructor and then modified directly (when 'always'-arguments
			are found), while the positional arguments are put into a list
			here and returned (i.e. no interaction with self.positional).

		Arguments:
			args (list): The positional arguments passed to the constructor.

		Returns:
			The positional arguments.

		Raises:
			errors.FlagError: If an invalid (out-of-range)
							  flag combination was passed.

			errors.EcstasyError: If one of the arguments is of invalid type.

		"""

		positional = []

		for argument in args:
			# A flag is an instance of a subclass of
			# flags.Flags if it was passed alone
			if isinstance(argument, flags.Flags):
				positional.append(argument)

			# or is an integer if it was (bitwise) OR'd
			# with another flag (a "flag combination")
			elif isinstance(argument, int):
				if argument < 0 or argument >= flags.LIMIT:
					raise errors.FlagError("Flag value '{0}' is out of range "
										   "!".format(argument))
				positional.append(argument)

			# Dictionaries store 'always'-arguments
			elif isinstance(argument, dict):
				for key, value in argument.items():
					# Simple 'always'-argument where one string
					# is mapped to one formatting flag-combination
					if isinstance(key, str):
						self.always[key] = value

					# Complex 'always'-argument with a
					# tuple containing strings, each with the same
					# flag-combination (same value)
					elif isinstance(key, tuple):
						for i in key:
							self.always[i] = value
					else:
						raise errors.EcstasyError("Key '{0}' in dictionary "
												  "argument passed is neither "
												  "a string nor a tuple "
												  "of strings!".format(key))

			elif isinstance(argument, collections.Iterable):
				positional += self.get_flags(argument)

			else:
				raise errors.EcstasyError("Argument '{0}' is neither a flag, a "
										  "(bitwise) OR'd flag-combination, a "
										  "dictionary nor an  iterable of "
										  "positional arguments "
										  "!".format(argument))

		return positional

[docs]	def beautify(self, string):
		"""
		Wraps together all actions needed to beautify a string, i.e.
		parse the string and then stringify the phrases (replace tags
		with formatting codes).

		Arguments:
			string (str): The string to beautify/parse.

		Returns:
			The parsed, stringified and ultimately beautified string.

		Raises:
			errors.ArgumentError if phrases were found, but not a single style
			(flag combination) was supplied.
		"""

		if not string:
			return string

		# string may differ because of escaped characters
		string, phrases = self.parse(string)

		if not phrases:
			return string

		if not self.positional and not self.always:
			raise errors.ArgumentError("Found phrases, but no styles "
									   "were supplied!")

		return self.stringify(string, phrases)

[docs]	def parse(self, string, root=None):

		"""
		Parses a string to handle escaped tags and retrieve phrases.

		This method works recursively to parse nested tags. When escaped
		tags are found, those are removed from the string. Also argument
		sequences are removed from the string. The string returned can
		thus be quite different from the string passed.

		Arguments:
			string (str): The string to parse.
			root (Phrase): If in a recursive call, the root/parent phrase.

		Returns:
			For one, the escaped string (without escape characters and
			phrase arguments). For the other, it depends on the stack-depth.
			If this is the lowest recursion depth/level (i.e. the stack
			call resulting from the first function call in self.beautify()),
			it will return a list of phrases. For higher stack levels (
			i.e. resulting from recursive function calls from with
			self.parse(), for nested phrases), it returns exactly one
			Phrase instance.

		Raises:
			errors.ParseError: If no closing tag could be
							   found for an opening tag.
		"""

		phrases = []

		meta = self.meta.search(string)

		while meta:

			# Save some function calls
			pos = meta.start()

			if meta.group() == "<":
				string, child, meta = self.open_phrase(string, pos)

				if child and root:
					root.nested.append(child)
				elif child:
					phrases.append(child)

				# else it was escaped (+ new meta)
				continue

			elif root:

				if meta.group() == "(":
					meta = self.meta.search(string, pos + 1)
					if meta.group() == ")":
						string, root, meta = self.handle_arguments(string,
																   root,
																   pos,
																   meta.start())
						continue

				elif meta.group() == ">":
					string, phrase, meta = self.close_phrase(string,
															 root,
															 pos)
					if phrase:
						return string, phrase

					# else was escaped (+ new meta)
					continue

			string, meta = self.escape_meta(string, pos)

		if not root:
			return string, phrases

		# If this is not the first stack-depth the function should
		# have returned upon finding a closing tag,
		# i.e. we should never have gotten here.
		word = re.search(r"([\w\s]+)(?![\d]*>[\w\s]+>)", string)

		what = "No closing tag found for opening tag"

		if word:
			what += " after expression '{0}'".format(word.group())

		raise errors.ParseError(what + "!")

[docs]	def escape_meta(self, string, pos):

		"""
		Checks if a meta character is escaped or else warns about it.

		If the meta character has an escape character ('\') preceding it,
		the meta character is escaped. If it does not, a warning is emitted
		that the user should escape it.

		Arguments:
			string (str): The relevant string in which the character was found.
			pos (int): The index of the meta character within the string.

		Returns:
			The possibly escaped string and the next meta match.
		"""

		# Replace escape character
		if pos > 0 and string[pos - 1] == "\\":
			string = string[:pos - 1] + string[pos:]
		else:
			warnings.warn("Un-escaped meta-character: '{0}' (Escape"
						  " it with a '\\')".format(string[pos]),
						  Warning)
			pos += 1

		meta = self.meta.search(string, pos)

		return string, meta


[docs]	def open_phrase(self, string, pos):

		"""
		Helper function of self.parse() handling opening tags.

		Arguments:
			string (str): The string being parsed.
			pos (int): The index/position of the opening tag in the string.

		Returns:
			The (possibly) escaped string, a child phrase if the opening tag
			was not escaped and otherwise None, and a new tag match, either
			starting at one index passed the escaped tag or one index passed
			the closing tag of the child.
		"""

		# Check for escaping
		if string[pos - 1] == "\\":
			# Remove the escape character
			string = string[:pos - 1] + string[pos:]

			# When removing the escape character, the
			# pos tag index is pushed one back
			pos -= 1

			# If the escape character was not itself (double)
			# escaped we can look for the next tag
			if pos == 0 or string[pos - 1] != "\\":
				tag = self.meta.search(string, pos + 1)

				return string, None, tag

		child = Phrase(pos)

		escaped, child = self.parse(string[pos + 1:], child)

		string = string[:pos + 1] + escaped

		tag = self.meta.search(string, child.closing + 1)

		return string, child, tag

[docs]	def close_phrase(self, string, root, pos):

		"""
		Helper function of self.parse() handling closing tags.

		Arguments:
			string (str): The string being parsed.
			root (Phrase): The current root phrase.
			pos (int): The index/position of the closing tag in the string.

		Returns:
			Always the (possibly) escaped string, then either the fully
			formed phrase if the closing tag was not escaped (with its
			'closing' and 'string' attributes set) and otherwise None,
			and lastly the next tag if the closing tag was indeed escaped
			and otherwise None -- i.e. either the tuple
			(string, phrase, None) or (string, None, tag).
		"""

		# Whatever is between the opening tag and this closing tag
		substring = string[:pos]

		# Escape-character to escape the closing tag (/>)
		if substring.endswith("\\"):

			# Get rid of the escape character either way
			string = string[:pos - 1] + string[pos:]

			# Check if not double-escaped
			if not substring[:-1].endswith("\\"):
				# pos is now one index passed the closing tag
				tag = self.meta.search(string, pos)

				return string, None, tag

			# Double-escape means this is really supposed to be a
			# closing tag and thus we can return the phrase.
			else:
				# The closing position should be in the same scope
				# as the scope of the opening position (scope in
				# the sense of to which phrase the positions are
				# relative to). -1 due to the escaped character but
				# + 1 because index 0 is phrase.opening + 1
				root.closing = root.opening + pos
				root.string = string[:pos - 1]
		else:
			root.closing = root.opening + 1 + pos
			root.string = string[:pos]

		return string, root, None


[docs]	def handle_arguments(self, string, root, opening, closing):

		"""
		Handles phrase-arguments.

		Sets the override and increment flags if found. Also makes
		sure that the argument sequence is at the start of the phrase
		and else warns about the unescaped meta characters. If the
		arguments are indeed at the start but do not match the arguments
		regular expression, an error is raised.

		Arguments:
			string (str): The string being parsed.
			root (str): The current root phrase.
			opening (int): The index of the opening paranthese.
			closing (int): The index of the closing paranthese.

		Returns:
			The (possibly escaped) string, the root phrase (if no escaping,
			then with arguments and flags) and the next meta match.

		Raises:
			errors.ParseError: If the arguments are invalid.
		"""

		# The actual argument string (ignore whitespace)
		args = string[opening + 1 : closing].replace(" ", "")

		# The argument sequence must be at the start of the phrase
		# and must match the allowed argument regular expression
		if opening > 0 or not self.arguments.match(args):

			if opening == 0:
				raise errors.ParseError("Invalid argument sequence!")

			# If escape_meta does indeed escape a character and removes
			# a backward slash, the positions 'opening' and 'closing' are no
			# longer valid. escape_meta does a search for the next meta
			# character though, which is then the closing parantheses,
			# so we can use its index value (in the now escaped string)
			string, meta = self.escape_meta(string, opening)
			string, meta = self.escape_meta(string, meta.start())

			return string, root, meta

		if "!" in args:
			root.override = True
			args = args.replace("!", "")

		if "+" in args:
			root.increment = True
			args = args.replace("+", "")

		root.arguments = [int(i) for i in args.split(",") if i]

		# Remove the argument string including parantheses
		string = string[closing + 1:]

		meta = self.meta.search(string)

		return string, root, meta

[docs]	def stringify(self, string, phrases, parent=None):

		"""
		Stringifies phrases.

		After parsing of the string via self.parse(), this method takes the
		escaped string and the list of phrases returned by self.parse() and
		replaces the original phrases (with tags) with the Phrase-objects in
		the list and adds the appropriate flag-combinations as determined by
		the string or the position of the phrase (the string if it's in
		self.always, i.e. an 'always' argument). This method also works
		recursively to handle nested phrases (and resetting of parent-phrase
		styles).

		Arguments:
			string (str): The escaped string returned by self.parse().
			phrases (list): The list of Phrase-objects returned by self.parse().
			parent (Phrase): For recursive calls, the current parent Phrase.

		Returns:
			The finished, beautifully beautified string.

		Raises:
			errors.ArgumentError: If more positional arguments are requested
								  than were supplied.
		"""

		last_tag = 0

		beauty = ""

		for phrase in phrases:

			beauty += string[last_tag : phrase.opening]

			if phrase.string in self.always and not phrase.override:
				phrase.style = self.always[phrase.string]

			if phrase.arguments:
				combination = 0
				for i in phrase.arguments:
					try:
						combination |= self.positional[i]
					except IndexError:
						raise errors.ArgumentError("Positional argument '{0}' "
							 					   "is out of range"
							 					   "!".format(i))

				phrase.style |= combination

			elif (phrase.string not in self.always or
				  phrase.increment or phrase.override):
				try:
					combination = self.positional[self.counter]

					if phrase.increment or not phrase.override:
						self.counter += 1
				except IndexError:
					self.raise_not_enough_arguments(phrase.string)

				phrase.style |= combination

			phrase.style = flags.codify(phrase.style)

			if phrase.nested:
				phrase.string = self.stringify(phrase.string,
											   phrase.nested,
											   phrase)

			# After a nested phrase is over, we reset the style to the
			# parent style, this gives the notion of nested styles.
			reset = parent.style if parent else ""

			# \033[ signifies the start of a command-line escape-sequence
			beauty += "\033[{0}m{1}\033[0;{2}m".format(phrase.style,
													   phrase,
													   reset)
			last_tag = phrase.closing + 1

		beauty += string[last_tag:]

		return beauty

[docs]	def raise_not_enough_arguments(self, string):

		"""
		Raises an errors.ArgumentError if not enough arguments were supplied.

		Takes care of formatting for detailed error messages.

		Arguments:
			string (str): The string of the phrase for which there weren't enough
						  arguments.

		Raises:
			errors.ArgumentError with a detailed error message.
		"""

		requested = errors.number(self.counter + 1)

		number = len(self.positional)

		verb = "was" if number == 1 else "were"

		what = "Requested {} formatting argument for "\
			   "'{}' but only {} {} supplied!"

		what = what.format(requested, string, number, verb)

		raise errors.ArgumentError(what)