Source code for ecstasy.parser

# -*- coding: utf-8 -*-

"""
The heart of the ecstasy package, containing the main *Parser* class.
"""

import re
import warnings
import collections

import ecstasy.flags as flags
import ecstasy.errors as errors

[docs]def beautify(string, *args, **kwargs): """ Convenient interface to the ecstasy package. Arguments: string (str): The string to beautify with ecstasy. args (list): The positional arguments. kwargs (dict): The keyword ('always') arguments. """ parser = Parser(args, kwargs) return parser.beautify(string)
[docs]class Phrase(object): """ Class describing a single parsed phrase. When a string is parsed in ecastasy, specially-marked regions of text are converted taken note of and converted into Phrase objects, which are later then used to replace the parsed strings (including any tags or arguments) with the string itself as well as the formatting codes specified by the arguments passed to Parser.beautify(), which are then interpreted by the command line. Attributes: string (str): The text of the phrase (between opening and closing tags). opening (int): The index of the opening tag. closing (int): The index of the closing tag. style (int): The formatting/style flag-combination of the phrase. nested (list): A list of nested Phrase objects (children). override (bool): The phrase's override specification. """ def __init__(self, opening=None, closing=None, string="", style=0, args=None, nested=None, override=False, increment=False): self.string = string self.opening = opening self.closing = closing self.style = style self.arguments = args if args else [] self.nested = nested if nested else [] self.override = override self.increment = increment def __str__(self): return self.string def __eq__(self, other): return (self.string == other.string and self.opening == other.opening and self.closing == other.closing and self.style == other.style and self.arguments == other.arguments and self.nested == other.nested and self.override == other.override and self.increment == other.increment)
[docs]class Parser(object): """ Handles parsing and beautification of a string. This is the main class of the entire ecastasy package. It is initialized with a set of positional and keyword arguments that determine which styles (flag-combinations) are used for which phrases (tag-marked regions of text) found during parsing. Its beautify() method is then used to beautify a string according to the arguments passed to the constructor. Note: From the outside, the package-level beautify() method should handle the construction and beautify()-call process all-in-one (for convenience). Attributes: always: The list of 'always' (keyword) arguments. positional: The list of positional arguments. tags: A compiled regex matching opening or closing tags. argument: A compiled regex matching well-formed phrase arguments. counter: A counter for positional arguments. """ def __init__(self, args, kwargs): """ Initializes a Parser instance. Arguments: args (list): The positional arguments. kwargs (dict): The 'always' (keyword) arguments. """ self.always = kwargs self.positional = self.get_flags(args) if args else [] self.meta = re.compile(r"[()<>]") self.arguments = re.compile(r"^(-?\d,?)+!?$|" r"^!?(-?\d,?)+$|" r"^(!\+?|\+!?)$") # Used in self.stringify to auto-increment # positional argument positions self.counter = 0
[docs] def get_flags(self, args): """ Checks and retrieves positional and 'always' (keyword) flags from the many ways in which they may be passed to the constructor (or the beautify() method on package-level). Positional arguments can be passed either: * Individually, where each flag-combination is one positional argument. * Packaged inside a list, which is then expanded. There can be multiple of such lists passed as arguments because it facilitates interaction with the ecastasy module (one may want to organize and update styles in certain ways depending on one's program), but each list will be expanded and all flag-combinations found inside each list will be interpreted as a single style argument, as if it had been passed in the way desribed above (individually). 'Always' arguments can be passed either: * Individually, with keyword-argument syntax, i.e. <word>=<style> * In a dictionary, which is expanded exactly like positional arguments passed in lists (i.e. each key/value pair in the dictionary is interpreted as if it had been passed individually, as key=value to the constructor/the external beautify() method). Note: self.always is set equal to the keyword arguments passed to the constructor and then modified directly (when 'always'-arguments are found), while the positional arguments are put into a list here and returned (i.e. no interaction with self.positional). Arguments: args (list): The positional arguments passed to the constructor. Returns: The positional arguments. Raises: errors.FlagError: If an invalid (out-of-range) flag combination was passed. errors.EcstasyError: If one of the arguments is of invalid type. """ positional = [] for argument in args: # A flag is an instance of a subclass of # flags.Flags if it was passed alone if isinstance(argument, flags.Flags): positional.append(argument) # or is an integer if it was (bitwise) OR'd # with another flag (a "flag combination") elif isinstance(argument, int): if argument < 0 or argument >= flags.LIMIT: raise errors.FlagError("Flag value '{0}' is out of range " "!".format(argument)) positional.append(argument) # Dictionaries store 'always'-arguments elif isinstance(argument, dict): for key, value in argument.items(): # Simple 'always'-argument where one string # is mapped to one formatting flag-combination if isinstance(key, str): self.always[key] = value # Complex 'always'-argument with a # tuple containing strings, each with the same # flag-combination (same value) elif isinstance(key, tuple): for i in key: self.always[i] = value else: raise errors.EcstasyError("Key '{0}' in dictionary " "argument passed is neither " "a string nor a tuple " "of strings!".format(key)) elif isinstance(argument, collections.Iterable): positional += self.get_flags(argument) else: raise errors.EcstasyError("Argument '{0}' is neither a flag, a " "(bitwise) OR'd flag-combination, a " "dictionary nor an iterable of " "positional arguments " "!".format(argument)) return positional
[docs] def beautify(self, string): """ Wraps together all actions needed to beautify a string, i.e. parse the string and then stringify the phrases (replace tags with formatting codes). Arguments: string (str): The string to beautify/parse. Returns: The parsed, stringified and ultimately beautified string. Raises: errors.ArgumentError if phrases were found, but not a single style (flag combination) was supplied. """ if not string: return string # string may differ because of escaped characters string, phrases = self.parse(string) if not phrases: return string if not self.positional and not self.always: raise errors.ArgumentError("Found phrases, but no styles " "were supplied!") return self.stringify(string, phrases)
[docs] def parse(self, string, root=None): """ Parses a string to handle escaped tags and retrieve phrases. This method works recursively to parse nested tags. When escaped tags are found, those are removed from the string. Also argument sequences are removed from the string. The string returned can thus be quite different from the string passed. Arguments: string (str): The string to parse. root (Phrase): If in a recursive call, the root/parent phrase. Returns: For one, the escaped string (without escape characters and phrase arguments). For the other, it depends on the stack-depth. If this is the lowest recursion depth/level (i.e. the stack call resulting from the first function call in self.beautify()), it will return a list of phrases. For higher stack levels ( i.e. resulting from recursive function calls from with self.parse(), for nested phrases), it returns exactly one Phrase instance. Raises: errors.ParseError: If no closing tag could be found for an opening tag. """ phrases = [] meta = self.meta.search(string) while meta: # Save some function calls pos = meta.start() if meta.group() == "<": string, child, meta = self.open_phrase(string, pos) if child and root: root.nested.append(child) elif child: phrases.append(child) # else it was escaped (+ new meta) continue elif root: if meta.group() == "(": meta = self.meta.search(string, pos + 1) if meta.group() == ")": string, root, meta = self.handle_arguments(string, root, pos, meta.start()) continue elif meta.group() == ">": string, phrase, meta = self.close_phrase(string, root, pos) if phrase: return string, phrase # else was escaped (+ new meta) continue string, meta = self.escape_meta(string, pos) if not root: return string, phrases # If this is not the first stack-depth the function should # have returned upon finding a closing tag, # i.e. we should never have gotten here. word = re.search(r"([\w\s]+)(?![\d]*>[\w\s]+>)", string) what = "No closing tag found for opening tag" if word: what += " after expression '{0}'".format(word.group()) raise errors.ParseError(what + "!")
[docs] def escape_meta(self, string, pos): """ Checks if a meta character is escaped or else warns about it. If the meta character has an escape character ('\') preceding it, the meta character is escaped. If it does not, a warning is emitted that the user should escape it. Arguments: string (str): The relevant string in which the character was found. pos (int): The index of the meta character within the string. Returns: The possibly escaped string and the next meta match. """ # Replace escape character if pos > 0 and string[pos - 1] == "\\": string = string[:pos - 1] + string[pos:] else: warnings.warn("Un-escaped meta-character: '{0}' (Escape" " it with a '\\')".format(string[pos]), Warning) pos += 1 meta = self.meta.search(string, pos) return string, meta
[docs] def open_phrase(self, string, pos): """ Helper function of self.parse() handling opening tags. Arguments: string (str): The string being parsed. pos (int): The index/position of the opening tag in the string. Returns: The (possibly) escaped string, a child phrase if the opening tag was not escaped and otherwise None, and a new tag match, either starting at one index passed the escaped tag or one index passed the closing tag of the child. """ # Check for escaping if string[pos - 1] == "\\": # Remove the escape character string = string[:pos - 1] + string[pos:] # When removing the escape character, the # pos tag index is pushed one back pos -= 1 # If the escape character was not itself (double) # escaped we can look for the next tag if pos == 0 or string[pos - 1] != "\\": tag = self.meta.search(string, pos + 1) return string, None, tag child = Phrase(pos) escaped, child = self.parse(string[pos + 1:], child) string = string[:pos + 1] + escaped tag = self.meta.search(string, child.closing + 1) return string, child, tag
[docs] def close_phrase(self, string, root, pos): """ Helper function of self.parse() handling closing tags. Arguments: string (str): The string being parsed. root (Phrase): The current root phrase. pos (int): The index/position of the closing tag in the string. Returns: Always the (possibly) escaped string, then either the fully formed phrase if the closing tag was not escaped (with its 'closing' and 'string' attributes set) and otherwise None, and lastly the next tag if the closing tag was indeed escaped and otherwise None -- i.e. either the tuple (string, phrase, None) or (string, None, tag). """ # Whatever is between the opening tag and this closing tag substring = string[:pos] # Escape-character to escape the closing tag (/>) if substring.endswith("\\"): # Get rid of the escape character either way string = string[:pos - 1] + string[pos:] # Check if not double-escaped if not substring[:-1].endswith("\\"): # pos is now one index passed the closing tag tag = self.meta.search(string, pos) return string, None, tag # Double-escape means this is really supposed to be a # closing tag and thus we can return the phrase. else: # The closing position should be in the same scope # as the scope of the opening position (scope in # the sense of to which phrase the positions are # relative to). -1 due to the escaped character but # + 1 because index 0 is phrase.opening + 1 root.closing = root.opening + pos root.string = string[:pos - 1] else: root.closing = root.opening + 1 + pos root.string = string[:pos] return string, root, None
[docs] def handle_arguments(self, string, root, opening, closing): """ Handles phrase-arguments. Sets the override and increment flags if found. Also makes sure that the argument sequence is at the start of the phrase and else warns about the unescaped meta characters. If the arguments are indeed at the start but do not match the arguments regular expression, an error is raised. Arguments: string (str): The string being parsed. root (str): The current root phrase. opening (int): The index of the opening paranthese. closing (int): The index of the closing paranthese. Returns: The (possibly escaped) string, the root phrase (if no escaping, then with arguments and flags) and the next meta match. Raises: errors.ParseError: If the arguments are invalid. """ # The actual argument string (ignore whitespace) args = string[opening + 1 : closing].replace(" ", "") # The argument sequence must be at the start of the phrase # and must match the allowed argument regular expression if opening > 0 or not self.arguments.match(args): if opening == 0: raise errors.ParseError("Invalid argument sequence!") # If escape_meta does indeed escape a character and removes # a backward slash, the positions 'opening' and 'closing' are no # longer valid. escape_meta does a search for the next meta # character though, which is then the closing parantheses, # so we can use its index value (in the now escaped string) string, meta = self.escape_meta(string, opening) string, meta = self.escape_meta(string, meta.start()) return string, root, meta if "!" in args: root.override = True args = args.replace("!", "") if "+" in args: root.increment = True args = args.replace("+", "") root.arguments = [int(i) for i in args.split(",") if i] # Remove the argument string including parantheses string = string[closing + 1:] meta = self.meta.search(string) return string, root, meta
[docs] def stringify(self, string, phrases, parent=None): """ Stringifies phrases. After parsing of the string via self.parse(), this method takes the escaped string and the list of phrases returned by self.parse() and replaces the original phrases (with tags) with the Phrase-objects in the list and adds the appropriate flag-combinations as determined by the string or the position of the phrase (the string if it's in self.always, i.e. an 'always' argument). This method also works recursively to handle nested phrases (and resetting of parent-phrase styles). Arguments: string (str): The escaped string returned by self.parse(). phrases (list): The list of Phrase-objects returned by self.parse(). parent (Phrase): For recursive calls, the current parent Phrase. Returns: The finished, beautifully beautified string. Raises: errors.ArgumentError: If more positional arguments are requested than were supplied. """ last_tag = 0 beauty = "" for phrase in phrases: beauty += string[last_tag : phrase.opening] if phrase.string in self.always and not phrase.override: phrase.style = self.always[phrase.string] if phrase.arguments: combination = 0 for i in phrase.arguments: try: combination |= self.positional[i] except IndexError: raise errors.ArgumentError("Positional argument '{0}' " "is out of range" "!".format(i)) phrase.style |= combination elif (phrase.string not in self.always or phrase.increment or phrase.override): try: combination = self.positional[self.counter] if phrase.increment or not phrase.override: self.counter += 1 except IndexError: self.raise_not_enough_arguments(phrase.string) phrase.style |= combination phrase.style = flags.codify(phrase.style) if phrase.nested: phrase.string = self.stringify(phrase.string, phrase.nested, phrase) # After a nested phrase is over, we reset the style to the # parent style, this gives the notion of nested styles. reset = parent.style if parent else "" # \033[ signifies the start of a command-line escape-sequence beauty += "\033[{0}m{1}\033[0;{2}m".format(phrase.style, phrase, reset) last_tag = phrase.closing + 1 beauty += string[last_tag:] return beauty
[docs] def raise_not_enough_arguments(self, string): """ Raises an errors.ArgumentError if not enough arguments were supplied. Takes care of formatting for detailed error messages. Arguments: string (str): The string of the phrase for which there weren't enough arguments. Raises: errors.ArgumentError with a detailed error message. """ requested = errors.number(self.counter + 1) number = len(self.positional) verb = "was" if number == 1 else "were" what = "Requested {} formatting argument for "\ "'{}' but only {} {} supplied!" what = what.format(requested, string, number, verb) raise errors.ArgumentError(what)