Source code for certbot_nginx.nginxparser

"""Very low-level nginx config parser based on pyparsing."""
# Forked from https://github.com/fatiherikli/nginxparser (MIT Licensed)
import copy
import logging
import string

from pyparsing import (
    Literal, White, Word, alphanums, CharsNotIn, Combine, Forward, Group,
    Optional, OneOrMore, Regex, ZeroOrMore)
from pyparsing import stringEnd
from pyparsing import restOfLine

logger = logging.getLogger(__name__)

[docs]class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal("charset_map") + space + value + space + value map_statement = space + Literal("map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certainly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is correct wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source
[docs] def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source)
[docs] def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
[docs]class RawNginxDumper(object): # pylint: disable=too-few-public-methods """A class that dumps nginx configuration from the provided tree.""" def __init__(self, blocks): self.blocks = blocks def __iter__(self, blocks=None): """Iterates the dumped nginx content.""" blocks = blocks or self.blocks for b0 in blocks: if isinstance(b0, str): yield b0 continue b = copy.deepcopy(b0) if spacey(b[0]): yield b.pop(0) # indentation if not b: continue key, values = b.pop(0), b.pop(0) if isinstance(key, list): yield "".join(key) + '{' for parameter in values: for line in self.__iter__([parameter]): # negate "for b0 in blocks" yield line yield '}' else: if isinstance(key, str) and key.strip() == '#': # comment yield key + values else: # assignment gap = "" # Sometimes the parser has stuck some gap whitespace in here; # if so rotate it into gap if values and spacey(values): gap = values values = b.pop(0) yield key + gap + values + ';' def __str__(self): """Return the parsed block as a string.""" return ''.join(self)
# Shortcut functions to respect Python's serialization interface # (like pyyaml, picker or json)
[docs]def loads(source): """Parses from a string. :param str source: The string to parse :returns: The parsed tree :rtype: list """ return UnspacedList(RawNginxParser(source).as_list())
[docs]def load(_file): """Parses from a file. :param file _file: The file to parse :returns: The parsed tree :rtype: list """ return loads(_file.read())
[docs]def dumps(blocks): """Dump to a string. :param UnspacedList block: The parsed tree :param int indentation: The number of spaces to indent :rtype: str """ return str(RawNginxDumper(blocks.spaced))
[docs]def dump(blocks, _file): """Dump to a file. :param UnspacedList block: The parsed tree :param file _file: The file to dump to :param int indentation: The number of spaces to indent :rtype: NoneType """ return _file.write(dumps(blocks))
spacey = lambda x: (isinstance(x, str) and x.isspace()) or x == ''
[docs]class UnspacedList(list): """Wrap a list [of lists], making any whitespace entries magically invisible""" def __init__(self, list_source): # ensure our argument is not a generator, and duplicate any sublists self.spaced = copy.deepcopy(list(list_source)) self.dirty = False # Turn self into a version of the source list that has spaces removed # and all sub-lists also UnspacedList()ed list.__init__(self, list_source) for i, entry in reversed(list(enumerate(self))): if isinstance(entry, list): sublist = UnspacedList(entry) list.__setitem__(self, i, sublist) self.spaced[i] = sublist.spaced elif spacey(entry): # don't delete comments if "#" not in self[:i]: list.__delitem__(self, i)
[docs] def _coerce(self, inbound): """ Coerce some inbound object to be appropriately usable in this object :param inbound: string or None or list or UnspacedList :returns: (coerced UnspacedList or string or None, spaced equivalent) :rtype: tuple """ if not isinstance(inbound, list): # str or None return (inbound, inbound) else: if not hasattr(inbound, "spaced"): inbound = UnspacedList(inbound) return (inbound, inbound.spaced)
def insert(self, i, x): item, spaced_item = self._coerce(x) slicepos = self._spaced_position(i) if i < len(self) else len(self.spaced) self.spaced.insert(slicepos, spaced_item) list.insert(self, i, item) self.dirty = True def append(self, x): item, spaced_item = self._coerce(x) self.spaced.append(spaced_item) list.append(self, item) self.dirty = True def extend(self, x): item, spaced_item = self._coerce(x) self.spaced.extend(spaced_item) list.extend(self, item) self.dirty = True def __add__(self, other): l = copy.deepcopy(self) l.extend(other) l.dirty = True return l def pop(self, _i=None): raise NotImplementedError("UnspacedList.pop() not yet implemented") def remove(self, _): raise NotImplementedError("UnspacedList.remove() not yet implemented") def reverse(self): raise NotImplementedError("UnspacedList.reverse() not yet implemented") def sort(self, _cmp=None, _key=None, _Rev=None): raise NotImplementedError("UnspacedList.sort() not yet implemented") def __setslice__(self, _i, _j, _newslice): raise NotImplementedError("Slice operations on UnspacedLists not yet implemented") def __setitem__(self, i, value): if isinstance(i, slice): raise NotImplementedError("Slice operations on UnspacedLists not yet implemented") item, spaced_item = self._coerce(value) self.spaced.__setitem__(self._spaced_position(i), spaced_item) list.__setitem__(self, i, item) self.dirty = True def __delitem__(self, i): self.spaced.__delitem__(self._spaced_position(i)) list.__delitem__(self, i) self.dirty = True def __deepcopy__(self, memo): l = UnspacedList(self[:]) l.spaced = copy.deepcopy(self.spaced, memo=memo) l.dirty = self.dirty return l
[docs] def is_dirty(self): """Recurse through the parse tree to figure out if any sublists are dirty""" if self.dirty: return True return any((isinstance(x, list) and x.is_dirty() for x in self))
[docs] def _spaced_position(self, idx): "Convert from indexes in the unspaced list to positions in the spaced one" pos = spaces = 0 # Normalize indexes like list[-1] etc, and save the result if idx < 0: idx = len(self) + idx if not 0 <= idx < len(self): raise IndexError("list index out of range") idx0 = idx # Count the number of spaces in the spaced list before idx in the unspaced one while idx != -1: if spacey(self.spaced[pos]): spaces += 1 else: idx -= 1 pos += 1 return idx0 + spaces