You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
733 lines
24 KiB
733 lines
24 KiB
# -*- coding: utf-8 -*-
|
|
"""Productions parser used by css and stylesheets classes to parse
|
|
test into a cssutils.util.Seq and at the same time retrieving
|
|
additional specific cssutils.util.Item objects for later use.
|
|
|
|
TODO:
|
|
- ProdsParser
|
|
- handle EOF or STOP?
|
|
- handle unknown @rules
|
|
- handle S: maybe save to Seq? parameterized?
|
|
- store['_raw']: always?
|
|
|
|
- Sequence:
|
|
- opt first(), naive impl for now
|
|
|
|
"""
|
|
__all__ = ['ProdParser', 'Sequence', 'Choice', 'Prod', 'PreDef']
|
|
__docformat__ = 'restructuredtext'
|
|
__version__ = '$Id: parse.py 1418 2008-08-09 19:27:50Z cthedot $'
|
|
|
|
from helper import pushtoken
|
|
import cssutils
|
|
import re
|
|
import string
|
|
import sys
|
|
|
|
|
|
class ParseError(Exception):
|
|
"""Base Exception class for ProdParser (used internally)."""
|
|
pass
|
|
|
|
class Done(ParseError):
|
|
"""Raised if Sequence or Choice is finished and no more Prods left."""
|
|
pass
|
|
|
|
class Exhausted(ParseError):
|
|
"""Raised if Sequence or Choice is finished but token is given."""
|
|
pass
|
|
|
|
class Missing(ParseError):
|
|
"""Raised if Sequence or Choice is not finished but no matching token given."""
|
|
pass
|
|
|
|
class NoMatch(ParseError):
|
|
"""Raised if nothing in Sequence or Choice does match."""
|
|
pass
|
|
|
|
|
|
class Choice(object):
|
|
"""A Choice of productions (Sequence or single Prod)."""
|
|
|
|
def __init__(self, *prods, **options):
|
|
"""
|
|
*prods
|
|
Prod or Sequence objects
|
|
options:
|
|
optional=False
|
|
"""
|
|
self._prods = prods
|
|
|
|
try:
|
|
self.optional = options['optional']
|
|
except KeyError, e:
|
|
for p in self._prods:
|
|
if p.optional:
|
|
self.optional = True
|
|
break
|
|
else:
|
|
self.optional = False
|
|
|
|
self.reset()
|
|
|
|
def reset(self):
|
|
"""Start Choice from zero"""
|
|
self._exhausted = False
|
|
|
|
def matches(self, token):
|
|
"""Check if token matches"""
|
|
for prod in self._prods:
|
|
if prod.matches(token):
|
|
return True
|
|
return False
|
|
|
|
def nextProd(self, token):
|
|
"""
|
|
Return:
|
|
|
|
- next matching Prod or Sequence
|
|
- ``None`` if any Prod or Sequence is optional and no token matched
|
|
- raise ParseError if nothing matches and all are mandatory
|
|
- raise Exhausted if choice already done
|
|
|
|
``token`` may be None but this occurs when no tokens left."""
|
|
if not self._exhausted:
|
|
optional = False
|
|
for x in self._prods:
|
|
if x.matches(token):
|
|
self._exhausted = True
|
|
x.reset()
|
|
return x
|
|
elif x.optional:
|
|
optional = True
|
|
else:
|
|
if not optional:
|
|
# None matched but also None is optional
|
|
raise ParseError(u'No match in %s' % self)
|
|
elif token:
|
|
raise Exhausted(u'Extra token')
|
|
|
|
def __str__(self):
|
|
return u'Choice(%s)' % u', '.join([str(x) for x in self._prods])
|
|
|
|
|
|
class Sequence(object):
|
|
"""A Sequence of productions (Choice or single Prod)."""
|
|
def __init__(self, *prods, **options):
|
|
"""
|
|
*prods
|
|
Prod or Sequence objects
|
|
**options:
|
|
minmax = lambda: (1, 1)
|
|
callback returning number of times this sequence may run
|
|
"""
|
|
self._prods = prods
|
|
try:
|
|
minmax = options['minmax']
|
|
except KeyError:
|
|
minmax = lambda: (1, 1)
|
|
|
|
self._min, self._max = minmax()
|
|
if self._max is None:
|
|
# unlimited
|
|
try:
|
|
# py2.6/3
|
|
self._max = sys.maxsize
|
|
except AttributeError:
|
|
# py<2.6
|
|
self._max = sys.maxint
|
|
|
|
self._prodcount = len(self._prods)
|
|
self.reset()
|
|
|
|
def matches(self, token):
|
|
"""Called by Choice to try to find if Sequence matches."""
|
|
for prod in self._prods:
|
|
if prod.matches(token):
|
|
return True
|
|
try:
|
|
if not prod.optional:
|
|
break
|
|
except AttributeError:
|
|
pass
|
|
return False
|
|
|
|
def reset(self):
|
|
"""Reset this Sequence if it is nested."""
|
|
self._roundstarted = False
|
|
self._i = 0
|
|
self._round = 0
|
|
|
|
def _currentName(self):
|
|
"""Return current element of Sequence, used by name"""
|
|
# TODO: current impl first only if 1st if an prod!
|
|
for prod in self._prods[self._i:]:
|
|
if not prod.optional:
|
|
return str(prod)
|
|
else:
|
|
return 'Sequence'
|
|
|
|
optional = property(lambda self: self._min == 0)
|
|
|
|
def nextProd(self, token):
|
|
"""Return
|
|
|
|
- next matching Prod or Choice
|
|
- raises ParseError if nothing matches
|
|
- raises Exhausted if sequence already done
|
|
"""
|
|
while self._round < self._max:
|
|
# for this round
|
|
i = self._i
|
|
round = self._round
|
|
p = self._prods[i]
|
|
if i == 0:
|
|
self._roundstarted = False
|
|
|
|
# for next round
|
|
self._i += 1
|
|
if self._i == self._prodcount:
|
|
self._round += 1
|
|
self._i = 0
|
|
|
|
if p.matches(token):
|
|
self._roundstarted = True
|
|
# reset nested Choice or Prod to use from start
|
|
p.reset()
|
|
return p
|
|
|
|
elif p.optional:
|
|
continue
|
|
|
|
elif round < self._min:
|
|
raise Missing(u'Missing token for production %s' % p)
|
|
|
|
elif not token:
|
|
if self._roundstarted:
|
|
raise Missing(u'Missing token for production %s' % p)
|
|
else:
|
|
raise Done()
|
|
|
|
else:
|
|
raise NoMatch(u'No matching production for token')
|
|
|
|
if token:
|
|
raise Exhausted(u'Extra token')
|
|
|
|
def __str__(self):
|
|
return u'Sequence(%s)' % u', '.join([str(x) for x in self._prods])
|
|
|
|
|
|
class Prod(object):
|
|
"""Single Prod in Sequence or Choice."""
|
|
def __init__(self, name, match, optional=False,
|
|
toSeq=None, toStore=None,
|
|
stop=False, stopAndKeep=False,
|
|
nextSor=False, mayEnd=False,
|
|
storeToken=None,
|
|
exception=None):
|
|
"""
|
|
name
|
|
name used for error reporting
|
|
match callback
|
|
function called with parameters tokentype and tokenvalue
|
|
returning True, False or raising ParseError
|
|
toSeq callback (optional) or False
|
|
calling toSeq(token, tokens) returns (type_, val) == (token[0], token[1])
|
|
to be appended to seq else simply unaltered (type_, val)
|
|
|
|
if False nothing is added
|
|
|
|
toStore (optional)
|
|
key to save util.Item to store or callback(store, util.Item)
|
|
optional = False
|
|
wether Prod is optional or not
|
|
stop = False
|
|
if True stop parsing of tokens here
|
|
stopAndKeep
|
|
if True stop parsing of tokens here but return stopping
|
|
token in unused tokens
|
|
nextSor=False
|
|
next is S or other like , or / (CSSValue)
|
|
mayEnd = False
|
|
no token must follow even defined by Sequence.
|
|
Used for operator ',/ ' currently only
|
|
|
|
storeToken = None
|
|
if True toStore saves simple token tuple and not and Item object
|
|
to store. Old style processing, TODO: resolve
|
|
|
|
exception = None
|
|
exception to be raised in case of error, normaly SyntaxErr
|
|
"""
|
|
self._name = name
|
|
self.match = match
|
|
self.optional = optional
|
|
self.stop = stop
|
|
self.stopAndKeep = stopAndKeep
|
|
self.nextSor = nextSor
|
|
self.mayEnd = mayEnd
|
|
self.storeToken = storeToken
|
|
self.exception = exception
|
|
|
|
def makeToStore(key):
|
|
"Return a function used by toStore."
|
|
def toStore(store, item):
|
|
"Set or append store item."
|
|
if key in store:
|
|
_v = store[key]
|
|
if not isinstance(_v, list):
|
|
store[key] = [_v]
|
|
store[key].append(item)
|
|
else:
|
|
store[key] = item
|
|
return toStore
|
|
|
|
if toSeq or toSeq is False:
|
|
# called: seq.append(toSeq(value))
|
|
self.toSeq = toSeq
|
|
else:
|
|
self.toSeq = lambda t, tokens: (t[0], t[1])
|
|
|
|
if hasattr(toStore, '__call__'):
|
|
self.toStore = toStore
|
|
elif toStore:
|
|
self.toStore = makeToStore(toStore)
|
|
else:
|
|
# always set!
|
|
self.toStore = None
|
|
|
|
def matches(self, token):
|
|
"""Return if token matches."""
|
|
if not token:
|
|
return False
|
|
type_, val, line, col = token
|
|
return self.match(type_, val)
|
|
|
|
def reset(self):
|
|
pass
|
|
|
|
def __str__(self):
|
|
return self._name
|
|
|
|
def __repr__(self):
|
|
return "<cssutils.prodsparser.%s object name=%r at 0x%x>" % (
|
|
self.__class__.__name__, self._name, id(self))
|
|
|
|
|
|
# global tokenizer as there is only one!
|
|
tokenizer = cssutils.tokenize2.Tokenizer()
|
|
|
|
class ProdParser(object):
|
|
"""Productions parser."""
|
|
def __init__(self, clear=True):
|
|
self.types = cssutils.cssproductions.CSSProductions
|
|
self._log = cssutils.log
|
|
if clear:
|
|
tokenizer.clear()
|
|
|
|
def _texttotokens(self, text):
|
|
"""Build a generator which is the only thing that is parsed!
|
|
old classes may use lists etc
|
|
"""
|
|
if isinstance(text, basestring):
|
|
# DEFAULT, to tokenize strip space
|
|
return tokenizer.tokenize(text.strip())
|
|
|
|
elif isinstance(text, tuple):
|
|
# OLD: (token, tokens) or a single token
|
|
if len(text) == 2:
|
|
# (token, tokens)
|
|
chain([token], tokens)
|
|
else:
|
|
# single token
|
|
return iter([text])
|
|
|
|
elif isinstance(text, list):
|
|
# OLD: generator from list
|
|
return iter(text)
|
|
|
|
else:
|
|
# DEFAULT, already tokenized, assume generator
|
|
return text
|
|
|
|
def _SorTokens(self, tokens, until=',/'):
|
|
"""New tokens generator which has S tokens removed,
|
|
if followed by anything in ``until``, normally a ``,``."""
|
|
for token in tokens:
|
|
if token[0] == self.types.S:
|
|
try:
|
|
next_ = tokens.next()
|
|
except StopIteration:
|
|
yield token
|
|
else:
|
|
if next_[1] in until:
|
|
# omit S as e.g. ``,`` has been found
|
|
yield next_
|
|
elif next_[0] == self.types.COMMENT:
|
|
# pass COMMENT
|
|
yield next_
|
|
else:
|
|
yield token
|
|
yield next_
|
|
|
|
elif token[0] == self.types.COMMENT:
|
|
# pass COMMENT
|
|
yield token
|
|
else:
|
|
yield token
|
|
break
|
|
# normal mode again
|
|
for token in tokens:
|
|
yield token
|
|
|
|
|
|
def parse(self, text, name, productions, keepS=False, store=None):
|
|
"""
|
|
text (or token generator)
|
|
to parse, will be tokenized if not a generator yet
|
|
|
|
may be:
|
|
- a string to be tokenized
|
|
- a single token, a tuple
|
|
- a tuple of (token, tokensGenerator)
|
|
- already tokenized so a tokens generator
|
|
|
|
name
|
|
used for logging
|
|
productions
|
|
used to parse tokens
|
|
keepS
|
|
if WS should be added to Seq or just be ignored
|
|
store UPDATED
|
|
If a Prod defines ``toStore`` the key defined there
|
|
is a key in store to be set or if store[key] is a list
|
|
the next Item is appended here.
|
|
|
|
TODO: NEEDED? :
|
|
Key ``raw`` is always added and holds all unprocessed
|
|
values found
|
|
|
|
returns
|
|
:wellformed: True or False
|
|
:seq: a filled cssutils.util.Seq object which is NOT readonly yet
|
|
:store: filled keys defined by Prod.toStore
|
|
:unusedtokens: token generator containing tokens not used yet
|
|
"""
|
|
tokens = self._texttotokens(text)
|
|
if not tokens:
|
|
self._log.error(u'No content to parse.')
|
|
# TODO: return???
|
|
|
|
seq = cssutils.util.Seq(readonly=False)
|
|
if not store: # store for specific values
|
|
store = {}
|
|
prods = [productions] # stack of productions
|
|
wellformed = True
|
|
|
|
# while no real token is found any S are ignored
|
|
started = False
|
|
stopall = False
|
|
prod = None
|
|
# flag if default S handling should be done
|
|
defaultS = True
|
|
while True:
|
|
try:
|
|
token = tokens.next()
|
|
except StopIteration:
|
|
break
|
|
type_, val, line, col = token
|
|
|
|
# default productions
|
|
if type_ == self.types.COMMENT:
|
|
# always append COMMENT
|
|
seq.append(cssutils.css.CSSComment(val),
|
|
cssutils.css.CSSComment, line, col)
|
|
elif defaultS and type_ == self.types.S:
|
|
# append S (but ignore starting ones)
|
|
if not keepS or not started:
|
|
continue
|
|
else:
|
|
seq.append(val, type_, line, col)
|
|
# elif type_ == self.types.ATKEYWORD:
|
|
# # @rule
|
|
# r = cssutils.css.CSSUnknownRule(cssText=val)
|
|
# seq.append(r, type(r), line, col)
|
|
elif type_ == self.types.INVALID:
|
|
# invalidate parse
|
|
wellformed = False
|
|
self._log.error(u'Invalid token: %r' % (token,))
|
|
break
|
|
elif type_ == 'EOF':
|
|
# do nothing? (self.types.EOF == True!)
|
|
pass
|
|
else:
|
|
started = True # check S now
|
|
nextSor = False # reset
|
|
|
|
try:
|
|
while True:
|
|
# find next matching production
|
|
try:
|
|
prod = prods[-1].nextProd(token)
|
|
except (Exhausted, NoMatch), e:
|
|
# try next
|
|
prod = None
|
|
if isinstance(prod, Prod):
|
|
# found actual Prod, not a Choice or Sequence
|
|
break
|
|
elif prod:
|
|
# nested Sequence, Choice
|
|
prods.append(prod)
|
|
else:
|
|
# nested exhausted, try in parent
|
|
if len(prods) > 1:
|
|
prods.pop()
|
|
else:
|
|
raise ParseError('No match')
|
|
except ParseError, e:
|
|
wellformed = False
|
|
self._log.error(u'%s: %s: %r' % (name, e, token))
|
|
break
|
|
else:
|
|
# process prod
|
|
if prod.toSeq and not prod.stopAndKeep:
|
|
type_, val = prod.toSeq(token, tokens)
|
|
if val is not None:
|
|
seq.append(val, type_, line, col)
|
|
if prod.toStore:
|
|
if not prod.storeToken:
|
|
prod.toStore(store, seq[-1])
|
|
else:
|
|
# workaround for now for old style token
|
|
# parsing!
|
|
# TODO: remove when all new style
|
|
prod.toStore(store, token)
|
|
|
|
if prod.stop: # EOF?
|
|
# stop here and ignore following tokens
|
|
break
|
|
|
|
if prod.stopAndKeep: # e.g. ;
|
|
# stop here and ignore following tokens
|
|
# but keep this token for next run
|
|
tokenizer.push(token)
|
|
stopall = True
|
|
break
|
|
|
|
if prod.nextSor:
|
|
# following is S or other token (e.g. ",")?
|
|
# remove S if
|
|
tokens = self._SorTokens(tokens, ',/')
|
|
defaultS = False
|
|
else:
|
|
defaultS = True
|
|
|
|
lastprod = prod
|
|
|
|
if not stopall:
|
|
# stop immediately
|
|
while True:
|
|
# all productions exhausted?
|
|
try:
|
|
prod = prods[-1].nextProd(token=None)
|
|
except Done, e:
|
|
# ok
|
|
prod = None
|
|
|
|
except Missing, e:
|
|
prod = None
|
|
# last was a S operator which may End a Sequence, then ok
|
|
if hasattr(lastprod, 'mayEnd') and not lastprod.mayEnd:
|
|
wellformed = False
|
|
self._log.error(u'%s: %s' % (name, e))
|
|
|
|
except ParseError, e:
|
|
prod = None
|
|
wellformed = False
|
|
self._log.error(u'%s: %s' % (name, e))
|
|
|
|
else:
|
|
if prods[-1].optional:
|
|
prod = None
|
|
elif prod and prod.optional:
|
|
# ignore optional
|
|
continue
|
|
|
|
if prod and not prod.optional:
|
|
wellformed = False
|
|
self._log.error(u'%s: Missing token for production %r'
|
|
% (name, str(prod)))
|
|
break
|
|
elif len(prods) > 1:
|
|
# nested exhausted, next in parent
|
|
prods.pop()
|
|
else:
|
|
break
|
|
|
|
# trim S from end
|
|
seq.rstrip()
|
|
return wellformed, seq, store, tokens
|
|
|
|
|
|
class PreDef(object):
|
|
"""Predefined Prod definition for use in productions definition
|
|
for ProdParser instances.
|
|
"""
|
|
types = cssutils.cssproductions.CSSProductions
|
|
reHexcolor = re.compile(r'^\#(?:[0-9abcdefABCDEF]{3}|[0-9abcdefABCDEF]{6})$')
|
|
|
|
@staticmethod
|
|
def calc(toSeq=None, nextSor=False):
|
|
return Prod(name=u'calcfunction',
|
|
match=lambda t, v: u'calc(' == cssutils.helper.normalize(v),
|
|
toSeq=toSeq,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def char(name='char', char=u',', toSeq=None,
|
|
stop=False, stopAndKeep=False,
|
|
optional=True, nextSor=False):
|
|
"any CHAR"
|
|
return Prod(name=name, match=lambda t, v: v == char, toSeq=toSeq,
|
|
stop=stop, stopAndKeep=stopAndKeep, optional=optional,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def comma():
|
|
return PreDef.char(u'comma', u',')
|
|
|
|
@staticmethod
|
|
def dimension(nextSor=False, stop=False):
|
|
return Prod(name=u'dimension',
|
|
match=lambda t, v: t == PreDef.types.DIMENSION,
|
|
toSeq=lambda t, tokens: (t[0], cssutils.helper.normalize(t[1])),
|
|
stop=stop,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def function(toSeq=None, nextSor=False):
|
|
return Prod(name=u'function',
|
|
match=lambda t, v: t == PreDef.types.FUNCTION,
|
|
toSeq=toSeq,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def funcEnd(stop=False):
|
|
")"
|
|
return PreDef.char(u'end FUNC ")"', u')',
|
|
stop=stop)
|
|
|
|
@staticmethod
|
|
def hexcolor(stop=False, nextSor=False):
|
|
"#123 or #123456"
|
|
return Prod(name='HEX color',
|
|
match=lambda t, v: (
|
|
t == PreDef.types.HASH and
|
|
PreDef.reHexcolor.match(v)
|
|
),
|
|
stop=stop,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def ident(stop=False, toStore=None, nextSor=False):
|
|
return Prod(name=u'ident',
|
|
match=lambda t, v: t == PreDef.types.IDENT,
|
|
stop=stop,
|
|
toStore=toStore,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def number(stop=False, toSeq=None, nextSor=False):
|
|
return Prod(name=u'number',
|
|
match=lambda t, v: t == PreDef.types.NUMBER,
|
|
stop=stop,
|
|
toSeq=toSeq,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def percentage(stop=False, toSeq=None, nextSor=False):
|
|
return Prod(name=u'percentage',
|
|
match=lambda t, v: t == PreDef.types.PERCENTAGE,
|
|
stop=stop,
|
|
toSeq=toSeq,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def string(stop=False, nextSor=False):
|
|
"string delimiters are removed by default"
|
|
return Prod(name=u'string',
|
|
match=lambda t, v: t == PreDef.types.STRING,
|
|
toSeq=lambda t, tokens: (t[0], cssutils.helper.stringvalue(t[1])),
|
|
stop=stop,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def S(name=u'whitespace', toSeq=None, optional=False):
|
|
return Prod(name=name,
|
|
match=lambda t, v: t == PreDef.types.S,
|
|
toSeq=toSeq,
|
|
optional=optional,
|
|
mayEnd=True)
|
|
|
|
@staticmethod
|
|
def unary(stop=False, toSeq=None, nextSor=False):
|
|
"+ or -"
|
|
return Prod(name=u'unary +-', match=lambda t, v: v in (u'+', u'-'),
|
|
optional=True,
|
|
stop=stop,
|
|
toSeq=toSeq,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def uri(stop=False, nextSor=False):
|
|
"'url(' and ')' are removed and URI is stripped"
|
|
return Prod(name=u'URI',
|
|
match=lambda t, v: t == PreDef.types.URI,
|
|
toSeq=lambda t, tokens: (t[0], cssutils.helper.urivalue(t[1])),
|
|
stop=stop,
|
|
nextSor=nextSor)
|
|
|
|
@staticmethod
|
|
def unicode_range(stop=False, nextSor=False):
|
|
"u+123456-abc normalized to lower `u`"
|
|
return Prod(name='unicode-range',
|
|
match=lambda t, v: t == PreDef.types.UNICODE_RANGE,
|
|
toSeq=lambda t, tokens: (t[0], t[1].lower()),
|
|
stop=stop,
|
|
nextSor=nextSor
|
|
)
|
|
|
|
@staticmethod
|
|
def variable(toSeq=None, stop=False, nextSor=False):
|
|
return Prod(name=u'variable',
|
|
match=lambda t, v: u'var(' == cssutils.helper.normalize(v),
|
|
toSeq=toSeq,
|
|
stop=stop,
|
|
nextSor=nextSor)
|
|
|
|
# used for MarginRule for now:
|
|
@staticmethod
|
|
def unknownrule(name=u'@', toStore=None):
|
|
"""@rule dummy (matches ATKEYWORD to remove unknown rule tokens from
|
|
stream::
|
|
|
|
@x;
|
|
@x {...}
|
|
|
|
no nested yet!
|
|
"""
|
|
def rule(tokens):
|
|
saved = []
|
|
for t in tokens:
|
|
saved.append(t)
|
|
if (t[1] == u'}' or t[1] == u';'):
|
|
return cssutils.css.CSSUnknownRule(saved)
|
|
|
|
return Prod(name=name,
|
|
match=lambda t, v: t == u'ATKEYWORD',
|
|
toSeq=lambda t, tokens: (u'CSSUnknownRule',
|
|
rule(pushtoken(t, tokens))
|
|
),
|
|
toStore=toStore
|
|
)
|
|
|