Package pyparsing ::
Module pyparsing
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.5"
62 __versionTime__ = "12 Aug 2010 03:56"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor',
92 ]
93
94 """
95 Detect if we are running version 3.X and make appropriate changes
96 Robert A. Clark
97 """
98 _PY3K = sys.version_info[0] > 2
99 if _PY3K:
100 _MAX_INT = sys.maxsize
101 basestring = str
102 unichr = chr
103 _ustr = str
104 _str2dict = set
105 alphas = string.ascii_lowercase + string.ascii_uppercase
106 else:
107 _MAX_INT = sys.maxint
108 range = xrange
109
111 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
112 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
113 then < returns the unicode object | encodes it with the default encoding | ... >.
114 """
115 if isinstance(obj,unicode):
116 return obj
117
118 try:
119
120
121 return str(obj)
122
123 except UnicodeEncodeError:
124
125
126
127
128
129 return unicode(obj)
130
131
132
133
134
135
136
138 return dict( [(c,0) for c in strg] )
139
140 alphas = string.lowercase + string.uppercase
141
142
143 singleArgBuiltins = []
144 import __builtin__
145 for fname in "sum len enumerate sorted reversed list tuple set any all".split():
146 try:
147 singleArgBuiltins.append(getattr(__builtin__,fname))
148 except AttributeError:
149 continue
150
152 """Escape &, <, >, ", ', etc. in a string of data."""
153
154
155 from_symbols = '&><"\''
156 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
157 for from_,to_ in zip(from_symbols, to_symbols):
158 data = data.replace(from_, to_)
159 return data
160
163
164 nums = string.digits
165 hexnums = nums + "ABCDEFabcdef"
166 alphanums = alphas + nums
167 _bslash = chr(92)
168 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
169
171 """base exception class for all parsing runtime exceptions"""
172
173
174 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
175 self.loc = loc
176 if msg is None:
177 self.msg = pstr
178 self.pstr = ""
179 else:
180 self.msg = msg
181 self.pstr = pstr
182 self.parserElement = elem
183
185 """supported attributes by name are:
186 - lineno - returns the line number of the exception text
187 - col - returns the column number of the exception text
188 - line - returns the line containing the exception text
189 """
190 if( aname == "lineno" ):
191 return lineno( self.loc, self.pstr )
192 elif( aname in ("col", "column") ):
193 return col( self.loc, self.pstr )
194 elif( aname == "line" ):
195 return line( self.loc, self.pstr )
196 else:
197 raise AttributeError(aname)
198
200 return "%s (at char %d), (line:%d, col:%d)" % \
201 ( self.msg, self.loc, self.lineno, self.column )
215 return "loc msg pstr parserElement lineno col line " \
216 "markInputLine __str__ __repr__".split()
217
219 """exception thrown when parse expressions don't match class;
220 supported attributes by name are:
221 - lineno - returns the line number of the exception text
222 - col - returns the column number of the exception text
223 - line - returns the line containing the exception text
224 """
225 pass
226
228 """user-throwable exception thrown when inconsistent parse content
229 is found; stops all parsing immediately"""
230 pass
231
233 """just like C{ParseFatalException}, but thrown internally when an
234 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because
235 an unbacktrackable syntax error has been found"""
239
240
241
242
243
244
245
246
247
248
249
250
251
252
254 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
255 - def __init__( self, parseElementList ):
256 self.parseElementTrace = parseElementList
257
259 return "RecursiveGrammarException: %s" % self.parseElementTrace
260
267 return repr(self.tup)
269 self.tup = (self.tup[0],i)
270
272 """Structured parse results, to provide multiple means of access to the parsed data:
273 - as a list (C{len(results)})
274 - by list index (C{results[0], results[1]}, etc.)
275 - by attribute (C{results.<resultsName>})
276 """
277
278 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
279 if isinstance(toklist, cls):
280 return toklist
281 retobj = object.__new__(cls)
282 retobj.__doinit = True
283 return retobj
284
285
286
287 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
288 if self.__doinit:
289 self.__doinit = False
290 self.__name = None
291 self.__parent = None
292 self.__accumNames = {}
293 if isinstance(toklist, list):
294 self.__toklist = toklist[:]
295 else:
296 self.__toklist = [toklist]
297 self.__tokdict = dict()
298
299 if name is not None and name:
300 if not modal:
301 self.__accumNames[name] = 0
302 if isinstance(name,int):
303 name = _ustr(name)
304 self.__name = name
305 if not toklist in (None,'',[]):
306 if isinstance(toklist,basestring):
307 toklist = [ toklist ]
308 if asList:
309 if isinstance(toklist,ParseResults):
310 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
311 else:
312 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
313 self[name].__name = name
314 else:
315 try:
316 self[name] = toklist[0]
317 except (KeyError,TypeError,IndexError):
318 self[name] = toklist
319
321 if isinstance( i, (int,slice) ):
322 return self.__toklist[i]
323 else:
324 if i not in self.__accumNames:
325 return self.__tokdict[i][-1][0]
326 else:
327 return ParseResults([ v[0] for v in self.__tokdict[i] ])
328
330 if isinstance(v,_ParseResultsWithOffset):
331 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
332 sub = v[0]
333 elif isinstance(k,int):
334 self.__toklist[k] = v
335 sub = v
336 else:
337 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
338 sub = v
339 if isinstance(sub,ParseResults):
340 sub.__parent = wkref(self)
341
343 if isinstance(i,(int,slice)):
344 mylen = len( self.__toklist )
345 del self.__toklist[i]
346
347
348 if isinstance(i, int):
349 if i < 0:
350 i += mylen
351 i = slice(i, i+1)
352
353 removed = list(range(*i.indices(mylen)))
354 removed.reverse()
355
356 for name in self.__tokdict:
357 occurrences = self.__tokdict[name]
358 for j in removed:
359 for k, (value, position) in enumerate(occurrences):
360 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
361 else:
362 del self.__tokdict[i]
363
365 return k in self.__tokdict
366
367 - def __len__( self ): return len( self.__toklist )
368 - def __bool__(self): return len( self.__toklist ) > 0
369 __nonzero__ = __bool__
370 - def __iter__( self ): return iter( self.__toklist )
371 - def __reversed__( self ): return iter( self.__toklist[::-1] )
373 """Returns all named result keys."""
374 return self.__tokdict.keys()
375
376 - def pop( self, index=-1 ):
377 """Removes and returns item at specified index (default=last).
378 Will work with either numeric indices or dict-key indicies."""
379 ret = self[index]
380 del self[index]
381 return ret
382
383 - def get(self, key, defaultValue=None):
384 """Returns named result matching the given key, or if there is no
385 such name, then returns the given C{defaultValue} or C{None} if no
386 C{defaultValue} is specified."""
387 if key in self:
388 return self[key]
389 else:
390 return defaultValue
391
392 - def insert( self, index, insStr ):
393 """Inserts new element at location index in the list of parsed tokens."""
394 self.__toklist.insert(index, insStr)
395
396 for name in self.__tokdict:
397 occurrences = self.__tokdict[name]
398 for k, (value, position) in enumerate(occurrences):
399 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
400
402 """Returns all named result keys and values as a list of tuples."""
403 return [(k,self[k]) for k in self.__tokdict]
404
406 """Returns all named result values."""
407 return [ v[-1][0] for v in self.__tokdict.values() ]
408
410 if True:
411 if name in self.__tokdict:
412 if name not in self.__accumNames:
413 return self.__tokdict[name][-1][0]
414 else:
415 return ParseResults([ v[0] for v in self.__tokdict[name] ])
416 else:
417 return ""
418 return None
419
421 ret = self.copy()
422 ret += other
423 return ret
424
426 if other.__tokdict:
427 offset = len(self.__toklist)
428 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
429 otheritems = other.__tokdict.items()
430 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
431 for (k,vlist) in otheritems for v in vlist]
432 for k,v in otherdictitems:
433 self[k] = v
434 if isinstance(v[0],ParseResults):
435 v[0].__parent = wkref(self)
436
437 self.__toklist += other.__toklist
438 self.__accumNames.update( other.__accumNames )
439 return self
440
442 if isinstance(other,int) and other == 0:
443 return self.copy()
444
446 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
447
449 out = "["
450 sep = ""
451 for i in self.__toklist:
452 if isinstance(i, ParseResults):
453 out += sep + _ustr(i)
454 else:
455 out += sep + repr(i)
456 sep = ", "
457 out += "]"
458 return out
459
461 out = []
462 for item in self.__toklist:
463 if out and sep:
464 out.append(sep)
465 if isinstance( item, ParseResults ):
466 out += item._asStringList()
467 else:
468 out.append( _ustr(item) )
469 return out
470
472 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
473 out = []
474 for res in self.__toklist:
475 if isinstance(res,ParseResults):
476 out.append( res.asList() )
477 else:
478 out.append( res )
479 return out
480
482 """Returns the named parse results as dictionary."""
483 return dict( self.items() )
484
486 """Returns a new copy of a C{ParseResults} object."""
487 ret = ParseResults( self.__toklist )
488 ret.__tokdict = self.__tokdict.copy()
489 ret.__parent = self.__parent
490 ret.__accumNames.update( self.__accumNames )
491 ret.__name = self.__name
492 return ret
493
494 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
495 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
496 nl = "\n"
497 out = []
498 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
499 for v in vlist ] )
500 nextLevelIndent = indent + " "
501
502
503 if not formatted:
504 indent = ""
505 nextLevelIndent = ""
506 nl = ""
507
508 selfTag = None
509 if doctag is not None:
510 selfTag = doctag
511 else:
512 if self.__name:
513 selfTag = self.__name
514
515 if not selfTag:
516 if namedItemsOnly:
517 return ""
518 else:
519 selfTag = "ITEM"
520
521 out += [ nl, indent, "<", selfTag, ">" ]
522
523 worklist = self.__toklist
524 for i,res in enumerate(worklist):
525 if isinstance(res,ParseResults):
526 if i in namedItems:
527 out += [ res.asXML(namedItems[i],
528 namedItemsOnly and doctag is None,
529 nextLevelIndent,
530 formatted)]
531 else:
532 out += [ res.asXML(None,
533 namedItemsOnly and doctag is None,
534 nextLevelIndent,
535 formatted)]
536 else:
537
538 resTag = None
539 if i in namedItems:
540 resTag = namedItems[i]
541 if not resTag:
542 if namedItemsOnly:
543 continue
544 else:
545 resTag = "ITEM"
546 xmlBodyText = _xml_escape(_ustr(res))
547 out += [ nl, nextLevelIndent, "<", resTag, ">",
548 xmlBodyText,
549 "</", resTag, ">" ]
550
551 out += [ nl, indent, "</", selfTag, ">" ]
552 return "".join(out)
553
555 for k,vlist in self.__tokdict.items():
556 for v,loc in vlist:
557 if sub is v:
558 return k
559 return None
560
562 """Returns the results name for this token expression."""
563 if self.__name:
564 return self.__name
565 elif self.__parent:
566 par = self.__parent()
567 if par:
568 return par.__lookup(self)
569 else:
570 return None
571 elif (len(self) == 1 and
572 len(self.__tokdict) == 1 and
573 self.__tokdict.values()[0][0][1] in (0,-1)):
574 return self.__tokdict.keys()[0]
575 else:
576 return None
577
578 - def dump(self,indent='',depth=0):
579 """Diagnostic method for listing out the contents of a C{ParseResults}.
580 Accepts an optional C{indent} argument so that this string can be embedded
581 in a nested display of other data."""
582 out = []
583 out.append( indent+_ustr(self.asList()) )
584 keys = self.items()
585 keys.sort()
586 for k,v in keys:
587 if out:
588 out.append('\n')
589 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
590 if isinstance(v,ParseResults):
591 if v.keys():
592 out.append( v.dump(indent,depth+1) )
593 else:
594 out.append(_ustr(v))
595 else:
596 out.append(_ustr(v))
597 return "".join(out)
598
599
601 return ( self.__toklist,
602 ( self.__tokdict.copy(),
603 self.__parent is not None and self.__parent() or None,
604 self.__accumNames,
605 self.__name ) )
606
608 self.__toklist = state[0]
609 self.__tokdict, \
610 par, \
611 inAccumNames, \
612 self.__name = state[1]
613 self.__accumNames = {}
614 self.__accumNames.update(inAccumNames)
615 if par is not None:
616 self.__parent = wkref(par)
617 else:
618 self.__parent = None
619
622
624 """Returns current column within a string, counting newlines as line separators.
625 The first column is number 1.
626
627 Note: the default parsing behavior is to expand tabs in the input string
628 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
629 on parsing strings containing <TAB>s, and suggested methods to maintain a
630 consistent view of the parsed string, the parse location, and line and column
631 positions within the parsed string.
632 """
633 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
634
636 """Returns current line number within a string, counting newlines as line separators.
637 The first line is number 1.
638
639 Note: the default parsing behavior is to expand tabs in the input string
640 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
641 on parsing strings containing <TAB>s, and suggested methods to maintain a
642 consistent view of the parsed string, the parse location, and line and column
643 positions within the parsed string.
644 """
645 return strg.count("\n",0,loc) + 1
646
647 -def line( loc, strg ):
648 """Returns the line of text containing loc within a string, counting newlines as line separators.
649 """
650 lastCR = strg.rfind("\n", 0, loc)
651 nextCR = strg.find("\n", loc)
652 if nextCR >= 0:
653 return strg[lastCR+1:nextCR]
654 else:
655 return strg[lastCR+1:]
656
658 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
659
661 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
662
664 print ("Exception raised:" + _ustr(exc))
665
667 """'Do-nothing' debug action, to suppress debugging output during parsing."""
668 pass
669
671 """Abstract base level parser element class."""
672 DEFAULT_WHITE_CHARS = " \n\t\r"
673 verbose_stacktrace = False
674
679 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
680
682 self.parseAction = list()
683 self.failAction = None
684
685 self.strRepr = None
686 self.resultsName = None
687 self.saveAsList = savelist
688 self.skipWhitespace = True
689 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
690 self.copyDefaultWhiteChars = True
691 self.mayReturnEmpty = False
692 self.keepTabs = False
693 self.ignoreExprs = list()
694 self.debug = False
695 self.streamlined = False
696 self.mayIndexError = True
697 self.errmsg = ""
698 self.modalResults = True
699 self.debugActions = ( None, None, None )
700 self.re = None
701 self.callPreparse = True
702 self.callDuringTry = False
703
705 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
706 for the same parsing pattern, using copies of the original parse element."""
707 cpy = copy.copy( self )
708 cpy.parseAction = self.parseAction[:]
709 cpy.ignoreExprs = self.ignoreExprs[:]
710 if self.copyDefaultWhiteChars:
711 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
712 return cpy
713
715 """Define name for this expression, for use in debugging."""
716 self.name = name
717 self.errmsg = "Expected " + self.name
718 if hasattr(self,"exception"):
719 self.exception.msg = self.errmsg
720 return self
721
723 """Define name for referencing matching tokens as a nested attribute
724 of the returned parse results.
725 NOTE: this returns a *copy* of the original C{ParserElement} object;
726 this is so that the client can define a basic element, such as an
727 integer, and reference it in multiple places with different names.
728
729 You can also set results names using the abbreviated syntax,
730 C{expr("name")} in place of C{expr.setResultsName("name")} -
731 see L{I{__call__}<__call__>}.
732 """
733 newself = self.copy()
734 newself.resultsName = name
735 newself.modalResults = not listAllMatches
736 return newself
737
739 """Method to invoke the Python pdb debugger when this element is
740 about to be parsed. Set C{breakFlag} to True to enable, False to
741 disable.
742 """
743 if breakFlag:
744 _parseMethod = self._parse
745 def breaker(instring, loc, doActions=True, callPreParse=True):
746 import pdb
747 pdb.set_trace()
748 return _parseMethod( instring, loc, doActions, callPreParse )
749 breaker._originalParseMethod = _parseMethod
750 self._parse = breaker
751 else:
752 if hasattr(self._parse,"_originalParseMethod"):
753 self._parse = self._parse._originalParseMethod
754 return self
755
757 """Internal method used to decorate parse actions that take fewer than 3 arguments,
758 so that all parse actions can be called as C{f(s,l,t)}."""
759 STAR_ARGS = 4
760
761
762 if (f in singleArgBuiltins):
763 numargs = 1
764 else:
765 try:
766 restore = None
767 if isinstance(f,type):
768 restore = f
769 f = f.__init__
770 if not _PY3K:
771 codeObj = f.func_code
772 else:
773 codeObj = f.code
774 if codeObj.co_flags & STAR_ARGS:
775 return f
776 numargs = codeObj.co_argcount
777 if not _PY3K:
778 if hasattr(f,"im_self"):
779 numargs -= 1
780 else:
781 if hasattr(f,"__self__"):
782 numargs -= 1
783 if restore:
784 f = restore
785 except AttributeError:
786 try:
787 if not _PY3K:
788 call_im_func_code = f.__call__.im_func.func_code
789 else:
790 call_im_func_code = f.__code__
791
792
793
794 if call_im_func_code.co_flags & STAR_ARGS:
795 return f
796 numargs = call_im_func_code.co_argcount
797 if not _PY3K:
798 if hasattr(f.__call__,"im_self"):
799 numargs -= 1
800 else:
801 if hasattr(f.__call__,"__self__"):
802 numargs -= 0
803 except AttributeError:
804 if not _PY3K:
805 call_func_code = f.__call__.func_code
806 else:
807 call_func_code = f.__call__.__code__
808
809 if call_func_code.co_flags & STAR_ARGS:
810 return f
811 numargs = call_func_code.co_argcount
812 if not _PY3K:
813 if hasattr(f.__call__,"im_self"):
814 numargs -= 1
815 else:
816 if hasattr(f.__call__,"__self__"):
817 numargs -= 1
818
819
820
821 if numargs == 3:
822 return f
823 else:
824 if numargs > 3:
825 def tmp(s,l,t):
826 return f(f.__call__.__self__, s,l,t)
827 if numargs == 2:
828 def tmp(s,l,t):
829 return f(l,t)
830 elif numargs == 1:
831 def tmp(s,l,t):
832 return f(t)
833 else:
834 def tmp(s,l,t):
835 return f()
836 try:
837 tmp.__name__ = f.__name__
838 except (AttributeError,TypeError):
839
840 pass
841 try:
842 tmp.__doc__ = f.__doc__
843 except (AttributeError,TypeError):
844
845 pass
846 try:
847 tmp.__dict__.update(f.__dict__)
848 except (AttributeError,TypeError):
849
850 pass
851 return tmp
852 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
853
855 """Define action to perform when successfully matching parse element definition.
856 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
857 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
858 - s = the original string being parsed (see note below)
859 - loc = the location of the matching substring
860 - toks = a list of the matched tokens, packaged as a ParseResults object
861 If the functions in fns modify the tokens, they can return them as the return
862 value from fn, and the modified list of tokens will replace the original.
863 Otherwise, fn does not need to return any value.
864
865 Note: the default parsing behavior is to expand tabs in the input string
866 before starting the parsing process. See L{I{parseString}<parseString>} for more information
867 on parsing strings containing <TAB>s, and suggested methods to maintain a
868 consistent view of the parsed string, the parse location, and line and column
869 positions within the parsed string.
870 """
871 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
872 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
873 return self
874
876 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
877 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
878 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
879 return self
880
882 """Define action to perform if parsing fails at this expression.
883 Fail acton fn is a callable function that takes the arguments
884 C{fn(s,loc,expr,err)} where:
885 - s = string being parsed
886 - loc = location where expression match was attempted and failed
887 - expr = the parse expression that failed
888 - err = the exception thrown
889 The function returns no value. It may throw C{ParseFatalException}
890 if it is desired to stop parsing immediately."""
891 self.failAction = fn
892 return self
893
895 exprsFound = True
896 while exprsFound:
897 exprsFound = False
898 for e in self.ignoreExprs:
899 try:
900 while 1:
901 loc,dummy = e._parse( instring, loc )
902 exprsFound = True
903 except ParseException:
904 pass
905 return loc
906
908 if self.ignoreExprs:
909 loc = self._skipIgnorables( instring, loc )
910
911 if self.skipWhitespace:
912 wt = self.whiteChars
913 instrlen = len(instring)
914 while loc < instrlen and instring[loc] in wt:
915 loc += 1
916
917 return loc
918
919 - def parseImpl( self, instring, loc, doActions=True ):
921
922 - def postParse( self, instring, loc, tokenlist ):
924
925
926 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
927 debugging = ( self.debug )
928
929 if debugging or self.failAction:
930
931 if (self.debugActions[0] ):
932 self.debugActions[0]( instring, loc, self )
933 if callPreParse and self.callPreparse:
934 preloc = self.preParse( instring, loc )
935 else:
936 preloc = loc
937 tokensStart = preloc
938 try:
939 try:
940 loc,tokens = self.parseImpl( instring, preloc, doActions )
941 except IndexError:
942 raise ParseException( instring, len(instring), self.errmsg, self )
943 except ParseBaseException:
944
945 err = None
946 if self.debugActions[2]:
947 err = sys.exc_info()[1]
948 self.debugActions[2]( instring, tokensStart, self, err )
949 if self.failAction:
950 if err is None:
951 err = sys.exc_info()[1]
952 self.failAction( instring, tokensStart, self, err )
953 raise
954 else:
955 if callPreParse and self.callPreparse:
956 preloc = self.preParse( instring, loc )
957 else:
958 preloc = loc
959 tokensStart = preloc
960 if self.mayIndexError or loc >= len(instring):
961 try:
962 loc,tokens = self.parseImpl( instring, preloc, doActions )
963 except IndexError:
964 raise ParseException( instring, len(instring), self.errmsg, self )
965 else:
966 loc,tokens = self.parseImpl( instring, preloc, doActions )
967
968 tokens = self.postParse( instring, loc, tokens )
969
970 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
971 if self.parseAction and (doActions or self.callDuringTry):
972 if debugging:
973 try:
974 for fn in self.parseAction:
975 tokens = fn( instring, tokensStart, retTokens )
976 if tokens is not None:
977 retTokens = ParseResults( tokens,
978 self.resultsName,
979 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
980 modal=self.modalResults )
981 except ParseBaseException:
982
983 if (self.debugActions[2] ):
984 err = sys.exc_info()[1]
985 self.debugActions[2]( instring, tokensStart, self, err )
986 raise
987 else:
988 for fn in self.parseAction:
989 tokens = fn( instring, tokensStart, retTokens )
990 if tokens is not None:
991 retTokens = ParseResults( tokens,
992 self.resultsName,
993 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
994 modal=self.modalResults )
995
996 if debugging:
997
998 if (self.debugActions[1] ):
999 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1000
1001 return loc, retTokens
1002
1008
1009
1010
1011 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1027
1028 _parse = _parseNoCache
1029
1030
1031 _exprArgCache = {}
1034 resetCache = staticmethod(resetCache)
1035
1036 _packratEnabled = False
1038 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1039 Repeated parse attempts at the same string location (which happens
1040 often in many complex grammars) can immediately return a cached value,
1041 instead of re-executing parsing/validating code. Memoizing is done of
1042 both valid results and parsing exceptions.
1043
1044 This speedup may break existing programs that use parse actions that
1045 have side-effects. For this reason, packrat parsing is disabled when
1046 you first import pyparsing. To activate the packrat feature, your
1047 program must call the class method C{ParserElement.enablePackrat()}. If
1048 your program uses C{psyco} to "compile as you go", you must call
1049 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1050 Python will crash. For best results, call C{enablePackrat()} immediately
1051 after importing pyparsing.
1052 """
1053 if not ParserElement._packratEnabled:
1054 ParserElement._packratEnabled = True
1055 ParserElement._parse = ParserElement._parseCache
1056 enablePackrat = staticmethod(enablePackrat)
1057
1059 """Execute the parse expression with the given string.
1060 This is the main interface to the client code, once the complete
1061 expression has been built.
1062
1063 If you want the grammar to require that the entire input string be
1064 successfully parsed, then set C{parseAll} to True (equivalent to ending
1065 the grammar with C{StringEnd()}).
1066
1067 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1068 in order to report proper column numbers in parse actions.
1069 If the input string contains tabs and
1070 the grammar uses parse actions that use the C{loc} argument to index into the
1071 string being parsed, you can ensure you have a consistent view of the input
1072 string by:
1073 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1074 (see L{I{parseWithTabs}<parseWithTabs>})
1075 - define your parse action using the full C{(s,loc,toks)} signature, and
1076 reference the input string using the parse action's C{s} argument
1077 - explictly expand the tabs in your input string before calling
1078 C{parseString}
1079 """
1080 ParserElement.resetCache()
1081 if not self.streamlined:
1082 self.streamline()
1083
1084 for e in self.ignoreExprs:
1085 e.streamline()
1086 if not self.keepTabs:
1087 instring = instring.expandtabs()
1088 try:
1089 loc, tokens = self._parse( instring, 0 )
1090 if parseAll:
1091
1092 se = StringEnd()
1093 se._parse( instring, loc )
1094 except ParseBaseException:
1095 if ParserElement.verbose_stacktrace:
1096 raise
1097 else:
1098
1099 exc = sys.exc_info()[1]
1100 raise exc
1101 else:
1102 return tokens
1103
1105 """Scan the input string for expression matches. Each match will return the
1106 matching tokens, start location, and end location. May be called with optional
1107 C{maxMatches} argument, to clip scanning after 'n' matches are found.
1108
1109 Note that the start and end locations are reported relative to the string
1110 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1111 strings with embedded tabs."""
1112 if not self.streamlined:
1113 self.streamline()
1114 for e in self.ignoreExprs:
1115 e.streamline()
1116
1117 if not self.keepTabs:
1118 instring = _ustr(instring).expandtabs()
1119 instrlen = len(instring)
1120 loc = 0
1121 preparseFn = self.preParse
1122 parseFn = self._parse
1123 ParserElement.resetCache()
1124 matches = 0
1125 try:
1126 while loc <= instrlen and matches < maxMatches:
1127 try:
1128 preloc = preparseFn( instring, loc )
1129 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1130 except ParseException:
1131 loc = preloc+1
1132 else:
1133 if nextLoc > loc:
1134 matches += 1
1135 yield tokens, preloc, nextLoc
1136 loc = nextLoc
1137 else:
1138 loc = preloc+1
1139 except ParseBaseException:
1140 if ParserElement.verbose_stacktrace:
1141 raise
1142 else:
1143
1144 exc = sys.exc_info()[1]
1145 raise exc
1146
1179
1181 """Another extension to C{scanString}, simplifying the access to the tokens found
1182 to match the given parse expression. May be called with optional
1183 C{maxMatches} argument, to clip searching after 'n' matches are found.
1184 """
1185 try:
1186 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1187 except ParseBaseException:
1188 if ParserElement.verbose_stacktrace:
1189 raise
1190 else:
1191
1192 exc = sys.exc_info()[1]
1193 raise exc
1194
1196 """Implementation of + operator - returns And"""
1197 if isinstance( other, basestring ):
1198 other = Literal( other )
1199 if not isinstance( other, ParserElement ):
1200 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1201 SyntaxWarning, stacklevel=2)
1202 return None
1203 return And( [ self, other ] )
1204
1206 """Implementation of + operator when left operand is not a C{ParserElement}"""
1207 if isinstance( other, basestring ):
1208 other = Literal( other )
1209 if not isinstance( other, ParserElement ):
1210 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1211 SyntaxWarning, stacklevel=2)
1212 return None
1213 return other + self
1214
1216 """Implementation of - operator, returns C{And} with error stop"""
1217 if isinstance( other, basestring ):
1218 other = Literal( other )
1219 if not isinstance( other, ParserElement ):
1220 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1221 SyntaxWarning, stacklevel=2)
1222 return None
1223 return And( [ self, And._ErrorStop(), other ] )
1224
1226 """Implementation of - operator when left operand is not a C{ParserElement}"""
1227 if isinstance( other, basestring ):
1228 other = Literal( other )
1229 if not isinstance( other, ParserElement ):
1230 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1231 SyntaxWarning, stacklevel=2)
1232 return None
1233 return other - self
1234
1236 """Implementation of * operator, allows use of C{expr * 3} in place of
1237 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1238 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1239 may also include C{None} as in:
1240 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1241 to C{expr*n + ZeroOrMore(expr)}
1242 (read as "at least n instances of C{expr}")
1243 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1244 (read as "0 to n instances of C{expr}")
1245 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)}
1246 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)}
1247
1248 Note that C{expr*(None,n)} does not raise an exception if
1249 more than n exprs exist in the input stream; that is,
1250 C{expr*(None,n)} does not enforce a maximum number of expr
1251 occurrences. If this behavior is desired, then write
1252 C{expr*(None,n) + ~expr}
1253
1254 """
1255 if isinstance(other,int):
1256 minElements, optElements = other,0
1257 elif isinstance(other,tuple):
1258 other = (other + (None, None))[:2]
1259 if other[0] is None:
1260 other = (0, other[1])
1261 if isinstance(other[0],int) and other[1] is None:
1262 if other[0] == 0:
1263 return ZeroOrMore(self)
1264 if other[0] == 1:
1265 return OneOrMore(self)
1266 else:
1267 return self*other[0] + ZeroOrMore(self)
1268 elif isinstance(other[0],int) and isinstance(other[1],int):
1269 minElements, optElements = other
1270 optElements -= minElements
1271 else:
1272 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1273 else:
1274 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1275
1276 if minElements < 0:
1277 raise ValueError("cannot multiply ParserElement by negative value")
1278 if optElements < 0:
1279 raise ValueError("second tuple value must be greater or equal to first tuple value")
1280 if minElements == optElements == 0:
1281 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1282
1283 if (optElements):
1284 def makeOptionalList(n):
1285 if n>1:
1286 return Optional(self + makeOptionalList(n-1))
1287 else:
1288 return Optional(self)
1289 if minElements:
1290 if minElements == 1:
1291 ret = self + makeOptionalList(optElements)
1292 else:
1293 ret = And([self]*minElements) + makeOptionalList(optElements)
1294 else:
1295 ret = makeOptionalList(optElements)
1296 else:
1297 if minElements == 1:
1298 ret = self
1299 else:
1300 ret = And([self]*minElements)
1301 return ret
1302
1305
1307 """Implementation of | operator - returns C{MatchFirst}"""
1308 if isinstance( other, basestring ):
1309 other = Literal( other )
1310 if not isinstance( other, ParserElement ):
1311 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1312 SyntaxWarning, stacklevel=2)
1313 return None
1314 return MatchFirst( [ self, other ] )
1315
1317 """Implementation of | operator when left operand is not a C{ParserElement}"""
1318 if isinstance( other, basestring ):
1319 other = Literal( other )
1320 if not isinstance( other, ParserElement ):
1321 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1322 SyntaxWarning, stacklevel=2)
1323 return None
1324 return other | self
1325
1327 """Implementation of ^ operator - returns C{Or}"""
1328 if isinstance( other, basestring ):
1329 other = Literal( other )
1330 if not isinstance( other, ParserElement ):
1331 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1332 SyntaxWarning, stacklevel=2)
1333 return None
1334 return Or( [ self, other ] )
1335
1337 """Implementation of ^ operator when left operand is not a C{ParserElement}"""
1338 if isinstance( other, basestring ):
1339 other = Literal( other )
1340 if not isinstance( other, ParserElement ):
1341 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1342 SyntaxWarning, stacklevel=2)
1343 return None
1344 return other ^ self
1345
1347 """Implementation of & operator - returns C{Each}"""
1348 if isinstance( other, basestring ):
1349 other = Literal( other )
1350 if not isinstance( other, ParserElement ):
1351 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1352 SyntaxWarning, stacklevel=2)
1353 return None
1354 return Each( [ self, other ] )
1355
1357 """Implementation of & operator when left operand is not a C{ParserElement}"""
1358 if isinstance( other, basestring ):
1359 other = Literal( other )
1360 if not isinstance( other, ParserElement ):
1361 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1362 SyntaxWarning, stacklevel=2)
1363 return None
1364 return other & self
1365
1367 """Implementation of ~ operator - returns C{NotAny}"""
1368 return NotAny( self )
1369
1371 """Shortcut for C{setResultsName}, with C{listAllMatches=default}::
1372 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1373 could be written as::
1374 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1375 """
1376 return self.setResultsName(name)
1377
1379 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1380 cluttering up returned output.
1381 """
1382 return Suppress( self )
1383
1385 """Disables the skipping of whitespace before matching the characters in the
1386 C{ParserElement}'s defined pattern. This is normally only used internally by
1387 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1388 """
1389 self.skipWhitespace = False
1390 return self
1391
1393 """Overrides the default whitespace chars
1394 """
1395 self.skipWhitespace = True
1396 self.whiteChars = chars
1397 self.copyDefaultWhiteChars = False
1398 return self
1399
1401 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1402 Must be called before C{parseString} when the input grammar contains elements that
1403 match <TAB> characters."""
1404 self.keepTabs = True
1405 return self
1406
1408 """Define expression to be ignored (e.g., comments) while doing pattern
1409 matching; may be called repeatedly, to define multiple comment or other
1410 ignorable patterns.
1411 """
1412 if isinstance( other, Suppress ):
1413 if other not in self.ignoreExprs:
1414 self.ignoreExprs.append( other.copy() )
1415 else:
1416 self.ignoreExprs.append( Suppress( other.copy() ) )
1417 return self
1418
1419 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1420 """Enable display of debugging messages while doing pattern matching."""
1421 self.debugActions = (startAction or _defaultStartDebugAction,
1422 successAction or _defaultSuccessDebugAction,
1423 exceptionAction or _defaultExceptionDebugAction)
1424 self.debug = True
1425 return self
1426
1428 """Enable display of debugging messages while doing pattern matching.
1429 Set C{flag} to True to enable, False to disable."""
1430 if flag:
1431 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1432 else:
1433 self.debug = False
1434 return self
1435
1438
1441
1443 self.streamlined = True
1444 self.strRepr = None
1445 return self
1446
1449
1450 - def validate( self, validateTrace=[] ):
1451 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1452 self.checkRecursion( [] )
1453
1454 - def parseFile( self, file_or_filename, parseAll=False ):
1455 """Execute the parse expression on the given file or filename.
1456 If a filename is specified (instead of a file object),
1457 the entire file is opened, read, and closed before parsing.
1458 """
1459 try:
1460 file_contents = file_or_filename.read()
1461 except AttributeError:
1462 f = open(file_or_filename, "rb")
1463 file_contents = f.read()
1464 f.close()
1465 try:
1466 return self.parseString(file_contents, parseAll)
1467 except ParseBaseException:
1468
1469 exc = sys.exc_info()[1]
1470 raise exc
1471
1474
1476 if aname == "myException":
1477 self.myException = ret = self.getException();
1478 return ret;
1479 else:
1480 raise AttributeError("no such attribute " + aname)
1481
1483 if isinstance(other, ParserElement):
1484 return self is other or self.__dict__ == other.__dict__
1485 elif isinstance(other, basestring):
1486 try:
1487 self.parseString(_ustr(other), parseAll=True)
1488 return True
1489 except ParseBaseException:
1490 return False
1491 else:
1492 return super(ParserElement,self)==other
1493
1495 return not (self == other)
1496
1498 return hash(id(self))
1499
1501 return self == other
1502
1504 return not (self == other)
1505
1506
1507 -class Token(ParserElement):
1508 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1511
1512
1514 s = super(Token,self).setName(name)
1515 self.errmsg = "Expected " + self.name
1516
1517 return s
1518
1519
1521 """An empty token, will always match."""
1523 super(Empty,self).__init__()
1524 self.name = "Empty"
1525 self.mayReturnEmpty = True
1526 self.mayIndexError = False
1527
1528
1530 """A token that will never match."""
1532 super(NoMatch,self).__init__()
1533 self.name = "NoMatch"
1534 self.mayReturnEmpty = True
1535 self.mayIndexError = False
1536 self.errmsg = "Unmatchable token"
1537
1538
1539 - def parseImpl( self, instring, loc, doActions=True ):
1540 exc = self.myException
1541 exc.loc = loc
1542 exc.pstr = instring
1543 raise exc
1544
1545
1547 """Token to exactly match a specified string."""
1549 super(Literal,self).__init__()
1550 self.match = matchString
1551 self.matchLen = len(matchString)
1552 try:
1553 self.firstMatchChar = matchString[0]
1554 except IndexError:
1555 warnings.warn("null string passed to Literal; use Empty() instead",
1556 SyntaxWarning, stacklevel=2)
1557 self.__class__ = Empty
1558 self.name = '"%s"' % _ustr(self.match)
1559 self.errmsg = "Expected " + self.name
1560 self.mayReturnEmpty = False
1561
1562 self.mayIndexError = False
1563
1564
1565
1566
1567
1568 - def parseImpl( self, instring, loc, doActions=True ):
1569 if (instring[loc] == self.firstMatchChar and
1570 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1571 return loc+self.matchLen, self.match
1572
1573 exc = self.myException
1574 exc.loc = loc
1575 exc.pstr = instring
1576 raise exc
1577 _L = Literal
1578
1580 """Token to exactly match a specified string as a keyword, that is, it must be
1581 immediately followed by a non-keyword character. Compare with C{Literal}::
1582 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1583 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1584 Accepts two optional constructor arguments in addition to the keyword string:
1585 C{identChars} is a string of characters that would be valid identifier characters,
1586 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1587 matching, default is False.
1588 """
1589 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1590
1592 super(Keyword,self).__init__()
1593 self.match = matchString
1594 self.matchLen = len(matchString)
1595 try:
1596 self.firstMatchChar = matchString[0]
1597 except IndexError:
1598 warnings.warn("null string passed to Keyword; use Empty() instead",
1599 SyntaxWarning, stacklevel=2)
1600 self.name = '"%s"' % self.match
1601 self.errmsg = "Expected " + self.name
1602 self.mayReturnEmpty = False
1603
1604 self.mayIndexError = False
1605 self.caseless = caseless
1606 if caseless:
1607 self.caselessmatch = matchString.upper()
1608 identChars = identChars.upper()
1609 self.identChars = _str2dict(identChars)
1610
1611 - def parseImpl( self, instring, loc, doActions=True ):
1612 if self.caseless:
1613 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1614 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1615 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1616 return loc+self.matchLen, self.match
1617 else:
1618 if (instring[loc] == self.firstMatchChar and
1619 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1620 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1621 (loc == 0 or instring[loc-1] not in self.identChars) ):
1622 return loc+self.matchLen, self.match
1623
1624 exc = self.myException
1625 exc.loc = loc
1626 exc.pstr = instring
1627 raise exc
1628
1633
1638 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1639
1641 """Token to match a specified string, ignoring case of letters.
1642 Note: the matched results will always be in the case of the given
1643 match string, NOT the case of the input text.
1644 """
1646 super(CaselessLiteral,self).__init__( matchString.upper() )
1647
1648 self.returnString = matchString
1649 self.name = "'%s'" % self.returnString
1650 self.errmsg = "Expected " + self.name
1651
1652
1653 - def parseImpl( self, instring, loc, doActions=True ):
1654 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1655 return loc+self.matchLen, self.returnString
1656
1657 exc = self.myException
1658 exc.loc = loc
1659 exc.pstr = instring
1660 raise exc
1661
1665
1666 - def parseImpl( self, instring, loc, doActions=True ):
1667 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1668 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1669 return loc+self.matchLen, self.match
1670
1671 exc = self.myException
1672 exc.loc = loc
1673 exc.pstr = instring
1674 raise exc
1675
1677 """Token for matching words composed of allowed character sets.
1678 Defined with string containing all allowed initial characters,
1679 an optional string containing allowed body characters (if omitted,
1680 defaults to the initial character set), and an optional minimum,
1681 maximum, and/or exact length. The default value for C{min} is 1 (a
1682 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1683 are 0, meaning no maximum or exact length restriction.
1684 """
1685 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1686 super(Word,self).__init__()
1687 self.initCharsOrig = initChars
1688 self.initChars = _str2dict(initChars)
1689 if bodyChars :
1690 self.bodyCharsOrig = bodyChars
1691 self.bodyChars = _str2dict(bodyChars)
1692 else:
1693 self.bodyCharsOrig = initChars
1694 self.bodyChars = _str2dict(initChars)
1695
1696 self.maxSpecified = max > 0
1697
1698 if min < 1:
1699 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1700
1701 self.minLen = min
1702
1703 if max > 0:
1704 self.maxLen = max
1705 else:
1706 self.maxLen = _MAX_INT
1707
1708 if exact > 0:
1709 self.maxLen = exact
1710 self.minLen = exact
1711
1712 self.name = _ustr(self)
1713 self.errmsg = "Expected " + self.name
1714
1715 self.mayIndexError = False
1716 self.asKeyword = asKeyword
1717
1718 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1719 if self.bodyCharsOrig == self.initCharsOrig:
1720 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1721 elif len(self.bodyCharsOrig) == 1:
1722 self.reString = "%s[%s]*" % \
1723 (re.escape(self.initCharsOrig),
1724 _escapeRegexRangeChars(self.bodyCharsOrig),)
1725 else:
1726 self.reString = "[%s][%s]*" % \
1727 (_escapeRegexRangeChars(self.initCharsOrig),
1728 _escapeRegexRangeChars(self.bodyCharsOrig),)
1729 if self.asKeyword:
1730 self.reString = r"\b"+self.reString+r"\b"
1731 try:
1732 self.re = re.compile( self.reString )
1733 except:
1734 self.re = None
1735
1736 - def parseImpl( self, instring, loc, doActions=True ):
1737 if self.re:
1738 result = self.re.match(instring,loc)
1739 if not result:
1740 exc = self.myException
1741 exc.loc = loc
1742 exc.pstr = instring
1743 raise exc
1744
1745 loc = result.end()
1746 return loc,result.group()
1747
1748 if not(instring[ loc ] in self.initChars):
1749
1750 exc = self.myException
1751 exc.loc = loc
1752 exc.pstr = instring
1753 raise exc
1754 start = loc
1755 loc += 1
1756 instrlen = len(instring)
1757 bodychars = self.bodyChars
1758 maxloc = start + self.maxLen
1759 maxloc = min( maxloc, instrlen )
1760 while loc < maxloc and instring[loc] in bodychars:
1761 loc += 1
1762
1763 throwException = False
1764 if loc - start < self.minLen:
1765 throwException = True
1766 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1767 throwException = True
1768 if self.asKeyword:
1769 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1770 throwException = True
1771
1772 if throwException:
1773
1774 exc = self.myException
1775 exc.loc = loc
1776 exc.pstr = instring
1777 raise exc
1778
1779 return loc, instring[start:loc]
1780
1782 try:
1783 return super(Word,self).__str__()
1784 except:
1785 pass
1786
1787
1788 if self.strRepr is None:
1789
1790 def charsAsStr(s):
1791 if len(s)>4:
1792 return s[:4]+"..."
1793 else:
1794 return s
1795
1796 if ( self.initCharsOrig != self.bodyCharsOrig ):
1797 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1798 else:
1799 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1800
1801 return self.strRepr
1802
1803
1805 """Token for matching strings that match a given regular expression.
1806 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1807 """
1808 compiledREtype = type(re.compile("[A-Z]"))
1809 - def __init__( self, pattern, flags=0):
1810 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1811 super(Regex,self).__init__()
1812
1813 if isinstance(pattern, basestring):
1814 if len(pattern) == 0:
1815 warnings.warn("null string passed to Regex; use Empty() instead",
1816 SyntaxWarning, stacklevel=2)
1817
1818 self.pattern = pattern
1819 self.flags = flags
1820
1821 try:
1822 self.re = re.compile(self.pattern, self.flags)
1823 self.reString = self.pattern
1824 except sre_constants.error:
1825 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1826 SyntaxWarning, stacklevel=2)
1827 raise
1828
1829 elif isinstance(pattern, Regex.compiledREtype):
1830 self.re = pattern
1831 self.pattern = \
1832 self.reString = str(pattern)
1833 self.flags = flags
1834
1835 else:
1836 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1837
1838 self.name = _ustr(self)
1839 self.errmsg = "Expected " + self.name
1840
1841 self.mayIndexError = False
1842 self.mayReturnEmpty = True
1843
1844 - def parseImpl( self, instring, loc, doActions=True ):
1845 result = self.re.match(instring,loc)
1846 if not result:
1847 exc = self.myException
1848 exc.loc = loc
1849 exc.pstr = instring
1850 raise exc
1851
1852 loc = result.end()
1853 d = result.groupdict()
1854 ret = ParseResults(result.group())
1855 if d:
1856 for k in d:
1857 ret[k] = d[k]
1858 return loc,ret
1859
1861 try:
1862 return super(Regex,self).__str__()
1863 except:
1864 pass
1865
1866 if self.strRepr is None:
1867 self.strRepr = "Re:(%s)" % repr(self.pattern)
1868
1869 return self.strRepr
1870
1871
1873 """Token for matching strings that are delimited by quoting characters.
1874 """
1875 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1876 """
1877 Defined with the following parameters:
1878 - quoteChar - string of one or more characters defining the quote delimiting string
1879 - escChar - character to escape quotes, typically backslash (default=None)
1880 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1881 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1882 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1883 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1884 """
1885 super(QuotedString,self).__init__()
1886
1887
1888 quoteChar = quoteChar.strip()
1889 if len(quoteChar) == 0:
1890 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1891 raise SyntaxError()
1892
1893 if endQuoteChar is None:
1894 endQuoteChar = quoteChar
1895 else:
1896 endQuoteChar = endQuoteChar.strip()
1897 if len(endQuoteChar) == 0:
1898 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1899 raise SyntaxError()
1900
1901 self.quoteChar = quoteChar
1902 self.quoteCharLen = len(quoteChar)
1903 self.firstQuoteChar = quoteChar[0]
1904 self.endQuoteChar = endQuoteChar
1905 self.endQuoteCharLen = len(endQuoteChar)
1906 self.escChar = escChar
1907 self.escQuote = escQuote
1908 self.unquoteResults = unquoteResults
1909
1910 if multiline:
1911 self.flags = re.MULTILINE | re.DOTALL
1912 self.pattern = r'%s(?:[^%s%s]' % \
1913 ( re.escape(self.quoteChar),
1914 _escapeRegexRangeChars(self.endQuoteChar[0]),
1915 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1916 else:
1917 self.flags = 0
1918 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1919 ( re.escape(self.quoteChar),
1920 _escapeRegexRangeChars(self.endQuoteChar[0]),
1921 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1922 if len(self.endQuoteChar) > 1:
1923 self.pattern += (
1924 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1925 _escapeRegexRangeChars(self.endQuoteChar[i]))
1926 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1927 )
1928 if escQuote:
1929 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1930 if escChar:
1931 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1932 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1933 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1934
1935 try:
1936 self.re = re.compile(self.pattern, self.flags)
1937 self.reString = self.pattern
1938 except sre_constants.error:
1939 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1940 SyntaxWarning, stacklevel=2)
1941 raise
1942
1943 self.name = _ustr(self)
1944 self.errmsg = "Expected " + self.name
1945
1946 self.mayIndexError = False
1947 self.mayReturnEmpty = True
1948
1949 - def parseImpl( self, instring, loc, doActions=True ):
1950 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1951 if not result:
1952 exc = self.myException
1953 exc.loc = loc
1954 exc.pstr = instring
1955 raise exc
1956
1957 loc = result.end()
1958 ret = result.group()
1959
1960 if self.unquoteResults:
1961
1962
1963 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1964
1965 if isinstance(ret,basestring):
1966
1967 if self.escChar:
1968 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1969
1970
1971 if self.escQuote:
1972 ret = ret.replace(self.escQuote, self.endQuoteChar)
1973
1974 return loc, ret
1975
1977 try:
1978 return super(QuotedString,self).__str__()
1979 except:
1980 pass
1981
1982 if self.strRepr is None:
1983 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1984
1985 return self.strRepr
1986
1987
1989 """Token for matching words composed of characters *not* in a given set.
1990 Defined with string containing all disallowed characters, and an optional
1991 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
1992 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1993 are 0, meaning no maximum or exact length restriction.
1994 """
1995 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1996 super(CharsNotIn,self).__init__()
1997 self.skipWhitespace = False
1998 self.notChars = notChars
1999
2000 if min < 1:
2001 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2002
2003 self.minLen = min
2004
2005 if max > 0:
2006 self.maxLen = max
2007 else:
2008 self.maxLen = _MAX_INT
2009
2010 if exact > 0:
2011 self.maxLen = exact
2012 self.minLen = exact
2013
2014 self.name = _ustr(self)
2015 self.errmsg = "Expected " + self.name
2016 self.mayReturnEmpty = ( self.minLen == 0 )
2017
2018 self.mayIndexError = False
2019
2020 - def parseImpl( self, instring, loc, doActions=True ):
2021 if instring[loc] in self.notChars:
2022
2023 exc = self.myException
2024 exc.loc = loc
2025 exc.pstr = instring
2026 raise exc
2027
2028 start = loc
2029 loc += 1
2030 notchars = self.notChars
2031 maxlen = min( start+self.maxLen, len(instring) )
2032 while loc < maxlen and \
2033 (instring[loc] not in notchars):
2034 loc += 1
2035
2036 if loc - start < self.minLen:
2037
2038 exc = self.myException
2039 exc.loc = loc
2040 exc.pstr = instring
2041 raise exc
2042
2043 return loc, instring[start:loc]
2044
2046 try:
2047 return super(CharsNotIn, self).__str__()
2048 except:
2049 pass
2050
2051 if self.strRepr is None:
2052 if len(self.notChars) > 4:
2053 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2054 else:
2055 self.strRepr = "!W:(%s)" % self.notChars
2056
2057 return self.strRepr
2058
2060 """Special matching class for matching whitespace. Normally, whitespace is ignored
2061 by pyparsing grammars. This class is included when some whitespace structures
2062 are significant. Define with a string containing the whitespace characters to be
2063 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2064 as defined for the C{Word} class."""
2065 whiteStrs = {
2066 " " : "<SPC>",
2067 "\t": "<TAB>",
2068 "\n": "<LF>",
2069 "\r": "<CR>",
2070 "\f": "<FF>",
2071 }
2072 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2073 super(White,self).__init__()
2074 self.matchWhite = ws
2075 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2076
2077 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2078 self.mayReturnEmpty = True
2079 self.errmsg = "Expected " + self.name
2080
2081
2082 self.minLen = min
2083
2084 if max > 0:
2085 self.maxLen = max
2086 else:
2087 self.maxLen = _MAX_INT
2088
2089 if exact > 0:
2090 self.maxLen = exact
2091 self.minLen = exact
2092
2093 - def parseImpl( self, instring, loc, doActions=True ):
2094 if not(instring[ loc ] in self.matchWhite):
2095
2096 exc = self.myException
2097 exc.loc = loc
2098 exc.pstr = instring
2099 raise exc
2100 start = loc
2101 loc += 1
2102 maxloc = start + self.maxLen
2103 maxloc = min( maxloc, len(instring) )
2104 while loc < maxloc and instring[loc] in self.matchWhite:
2105 loc += 1
2106
2107 if loc - start < self.minLen:
2108
2109 exc = self.myException
2110 exc.loc = loc
2111 exc.pstr = instring
2112 raise exc
2113
2114 return loc, instring[start:loc]
2115
2116
2119 super(_PositionToken,self).__init__()
2120 self.name=self.__class__.__name__
2121 self.mayReturnEmpty = True
2122 self.mayIndexError = False
2123
2125 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2129
2131 if col(loc,instring) != self.col:
2132 instrlen = len(instring)
2133 if self.ignoreExprs:
2134 loc = self._skipIgnorables( instring, loc )
2135 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2136 loc += 1
2137 return loc
2138
2139 - def parseImpl( self, instring, loc, doActions=True ):
2140 thiscol = col( loc, instring )
2141 if thiscol > self.col:
2142 raise ParseException( instring, loc, "Text not in expected column", self )
2143 newloc = loc + self.col - thiscol
2144 ret = instring[ loc: newloc ]
2145 return newloc, ret
2146
2148 """Matches if current position is at the beginning of a line within the parse string"""
2153
2154
2156 preloc = super(LineStart,self).preParse(instring,loc)
2157 if instring[preloc] == "\n":
2158 loc += 1
2159 return loc
2160
2161 - def parseImpl( self, instring, loc, doActions=True ):
2162 if not( loc==0 or
2163 (loc == self.preParse( instring, 0 )) or
2164 (instring[loc-1] == "\n") ):
2165
2166 exc = self.myException
2167 exc.loc = loc
2168 exc.pstr = instring
2169 raise exc
2170 return loc, []
2171
2173 """Matches if current position is at the end of a line within the parse string"""
2178
2179
2180 - def parseImpl( self, instring, loc, doActions=True ):
2181 if loc<len(instring):
2182 if instring[loc] == "\n":
2183 return loc+1, "\n"
2184 else:
2185
2186 exc = self.myException
2187 exc.loc = loc
2188 exc.pstr = instring
2189 raise exc
2190 elif loc == len(instring):
2191 return loc+1, []
2192 else:
2193 exc = self.myException
2194 exc.loc = loc
2195 exc.pstr = instring
2196 raise exc
2197
2199 """Matches if current position is at the beginning of the parse string"""
2203
2204
2205 - def parseImpl( self, instring, loc, doActions=True ):
2206 if loc != 0:
2207
2208 if loc != self.preParse( instring, 0 ):
2209
2210 exc = self.myException
2211 exc.loc = loc
2212 exc.pstr = instring
2213 raise exc
2214 return loc, []
2215
2217 """Matches if current position is at the end of the parse string"""
2221
2222
2223 - def parseImpl( self, instring, loc, doActions=True ):
2224 if loc < len(instring):
2225
2226 exc = self.myException
2227 exc.loc = loc
2228 exc.pstr = instring
2229 raise exc
2230 elif loc == len(instring):
2231 return loc+1, []
2232 elif loc > len(instring):
2233 return loc, []
2234 else:
2235 exc = self.myException
2236 exc.loc = loc
2237 exc.pstr = instring
2238 raise exc
2239
2241 """Matches if the current position is at the beginning of a Word, and
2242 is not preceded by any character in a given set of wordChars
2243 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2244 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2245 the string being parsed, or at the beginning of a line.
2246 """
2248 super(WordStart,self).__init__()
2249 self.wordChars = _str2dict(wordChars)
2250 self.errmsg = "Not at the start of a word"
2251
2252 - def parseImpl(self, instring, loc, doActions=True ):
2253 if loc != 0:
2254 if (instring[loc-1] in self.wordChars or
2255 instring[loc] not in self.wordChars):
2256 exc = self.myException
2257 exc.loc = loc
2258 exc.pstr = instring
2259 raise exc
2260 return loc, []
2261
2263 """Matches if the current position is at the end of a Word, and
2264 is not followed by any character in a given set of wordChars
2265 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2266 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2267 the string being parsed, or at the end of a line.
2268 """
2270 super(WordEnd,self).__init__()
2271 self.wordChars = _str2dict(wordChars)
2272 self.skipWhitespace = False
2273 self.errmsg = "Not at the end of a word"
2274
2275 - def parseImpl(self, instring, loc, doActions=True ):
2276 instrlen = len(instring)
2277 if instrlen>0 and loc<instrlen:
2278 if (instring[loc] in self.wordChars or
2279 instring[loc-1] not in self.wordChars):
2280
2281 exc = self.myException
2282 exc.loc = loc
2283 exc.pstr = instring
2284 raise exc
2285 return loc, []
2286
2287
2289 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2290 - def __init__( self, exprs, savelist = False ):
2291 super(ParseExpression,self).__init__(savelist)
2292 if isinstance( exprs, list ):
2293 self.exprs = exprs
2294 elif isinstance( exprs, basestring ):
2295 self.exprs = [ Literal( exprs ) ]
2296 else:
2297 try:
2298 self.exprs = list( exprs )
2299 except TypeError:
2300 self.exprs = [ exprs ]
2301 self.callPreparse = False
2302
2304 return self.exprs[i]
2305
2307 self.exprs.append( other )
2308 self.strRepr = None
2309 return self
2310
2312 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2313 all contained expressions."""
2314 self.skipWhitespace = False
2315 self.exprs = [ e.copy() for e in self.exprs ]
2316 for e in self.exprs:
2317 e.leaveWhitespace()
2318 return self
2319
2321 if isinstance( other, Suppress ):
2322 if other not in self.ignoreExprs:
2323 super( ParseExpression, self).ignore( other )
2324 for e in self.exprs:
2325 e.ignore( self.ignoreExprs[-1] )
2326 else:
2327 super( ParseExpression, self).ignore( other )
2328 for e in self.exprs:
2329 e.ignore( self.ignoreExprs[-1] )
2330 return self
2331
2333 try:
2334 return super(ParseExpression,self).__str__()
2335 except:
2336 pass
2337
2338 if self.strRepr is None:
2339 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2340 return self.strRepr
2341
2343 super(ParseExpression,self).streamline()
2344
2345 for e in self.exprs:
2346 e.streamline()
2347
2348
2349
2350
2351 if ( len(self.exprs) == 2 ):
2352 other = self.exprs[0]
2353 if ( isinstance( other, self.__class__ ) and
2354 not(other.parseAction) and
2355 other.resultsName is None and
2356 not other.debug ):
2357 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2358 self.strRepr = None
2359 self.mayReturnEmpty |= other.mayReturnEmpty
2360 self.mayIndexError |= other.mayIndexError
2361
2362 other = self.exprs[-1]
2363 if ( isinstance( other, self.__class__ ) and
2364 not(other.parseAction) and
2365 other.resultsName is None and
2366 not other.debug ):
2367 self.exprs = self.exprs[:-1] + other.exprs[:]
2368 self.strRepr = None
2369 self.mayReturnEmpty |= other.mayReturnEmpty
2370 self.mayIndexError |= other.mayIndexError
2371
2372 return self
2373
2377
2378 - def validate( self, validateTrace=[] ):
2379 tmp = validateTrace[:]+[self]
2380 for e in self.exprs:
2381 e.validate(tmp)
2382 self.checkRecursion( [] )
2383
2384 -class And(ParseExpression):
2385 """Requires all given C{ParseExpressions} to be found in the given order.
2386 Expressions may be separated by whitespace.
2387 May be constructed using the '+' operator.
2388 """
2389
2394
2395 - def __init__( self, exprs, savelist = True ):
2396 super(And,self).__init__(exprs, savelist)
2397 self.mayReturnEmpty = True
2398 for e in self.exprs:
2399 if not e.mayReturnEmpty:
2400 self.mayReturnEmpty = False
2401 break
2402 self.setWhitespaceChars( exprs[0].whiteChars )
2403 self.skipWhitespace = exprs[0].skipWhitespace
2404 self.callPreparse = True
2405
2406 - def parseImpl( self, instring, loc, doActions=True ):
2407
2408
2409 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2410 errorStop = False
2411 for e in self.exprs[1:]:
2412 if isinstance(e, And._ErrorStop):
2413 errorStop = True
2414 continue
2415 if errorStop:
2416 try:
2417 loc, exprtokens = e._parse( instring, loc, doActions )
2418 except ParseSyntaxException:
2419 raise
2420 except ParseBaseException:
2421 pe = sys.exc_info()[1]
2422 raise ParseSyntaxException(pe)
2423 except IndexError:
2424 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2425 else:
2426 loc, exprtokens = e._parse( instring, loc, doActions )
2427 if exprtokens or exprtokens.keys():
2428 resultlist += exprtokens
2429 return loc, resultlist
2430
2432 if isinstance( other, basestring ):
2433 other = Literal( other )
2434 return self.append( other )
2435
2437 subRecCheckList = parseElementList[:] + [ self ]
2438 for e in self.exprs:
2439 e.checkRecursion( subRecCheckList )
2440 if not e.mayReturnEmpty:
2441 break
2442
2444 if hasattr(self,"name"):
2445 return self.name
2446
2447 if self.strRepr is None:
2448 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2449
2450 return self.strRepr
2451
2452
2453 -class Or(ParseExpression):
2454 """Requires that at least one C{ParseExpression} is found.
2455 If two expressions match, the expression that matches the longest string will be used.
2456 May be constructed using the '^' operator.
2457 """
2458 - def __init__( self, exprs, savelist = False ):
2459 super(Or,self).__init__(exprs, savelist)
2460 self.mayReturnEmpty = False
2461 for e in self.exprs:
2462 if e.mayReturnEmpty:
2463 self.mayReturnEmpty = True
2464 break
2465
2466 - def parseImpl( self, instring, loc, doActions=True ):
2467 maxExcLoc = -1
2468 maxMatchLoc = -1
2469 maxException = None
2470 for e in self.exprs:
2471 try:
2472 loc2 = e.tryParse( instring, loc )
2473 except ParseException:
2474 err = sys.exc_info()[1]
2475 if err.loc > maxExcLoc:
2476 maxException = err
2477 maxExcLoc = err.loc
2478 except IndexError:
2479 if len(instring) > maxExcLoc:
2480 maxException = ParseException(instring,len(instring),e.errmsg,self)
2481 maxExcLoc = len(instring)
2482 else:
2483 if loc2 > maxMatchLoc:
2484 maxMatchLoc = loc2
2485 maxMatchExp = e
2486
2487 if maxMatchLoc < 0:
2488 if maxException is not None:
2489 raise maxException
2490 else:
2491 raise ParseException(instring, loc, "no defined alternatives to match", self)
2492
2493 return maxMatchExp._parse( instring, loc, doActions )
2494
2496 if isinstance( other, basestring ):
2497 other = Literal( other )
2498 return self.append( other )
2499
2501 if hasattr(self,"name"):
2502 return self.name
2503
2504 if self.strRepr is None:
2505 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2506
2507 return self.strRepr
2508
2510 subRecCheckList = parseElementList[:] + [ self ]
2511 for e in self.exprs:
2512 e.checkRecursion( subRecCheckList )
2513
2514
2516 """Requires that at least one C{ParseExpression} is found.
2517 If two expressions match, the first one listed is the one that will match.
2518 May be constructed using the '|' operator.
2519 """
2520 - def __init__( self, exprs, savelist = False ):
2521 super(MatchFirst,self).__init__(exprs, savelist)
2522 if exprs:
2523 self.mayReturnEmpty = False
2524 for e in self.exprs:
2525 if e.mayReturnEmpty:
2526 self.mayReturnEmpty = True
2527 break
2528 else:
2529 self.mayReturnEmpty = True
2530
2531 - def parseImpl( self, instring, loc, doActions=True ):
2532 maxExcLoc = -1
2533 maxException = None
2534 for e in self.exprs:
2535 try:
2536 ret = e._parse( instring, loc, doActions )
2537 return ret
2538 except ParseException, err:
2539 if err.loc > maxExcLoc:
2540 maxException = err
2541 maxExcLoc = err.loc
2542 except IndexError:
2543 if len(instring) > maxExcLoc:
2544 maxException = ParseException(instring,len(instring),e.errmsg,self)
2545 maxExcLoc = len(instring)
2546
2547
2548 else:
2549 if maxException is not None:
2550 raise maxException
2551 else:
2552 raise ParseException(instring, loc, "no defined alternatives to match", self)
2553
2555 if isinstance( other, basestring ):
2556 other = Literal( other )
2557 return self.append( other )
2558
2560 if hasattr(self,"name"):
2561 return self.name
2562
2563 if self.strRepr is None:
2564 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2565
2566 return self.strRepr
2567
2569 subRecCheckList = parseElementList[:] + [ self ]
2570 for e in self.exprs:
2571 e.checkRecursion( subRecCheckList )
2572
2573
2574 -class Each(ParseExpression):
2575 """Requires all given C{ParseExpressions} to be found, but in any order.
2576 Expressions may be separated by whitespace.
2577 May be constructed using the '&' operator.
2578 """
2579 - def __init__( self, exprs, savelist = True ):
2580 super(Each,self).__init__(exprs, savelist)
2581 self.mayReturnEmpty = True
2582 for e in self.exprs:
2583 if not e.mayReturnEmpty:
2584 self.mayReturnEmpty = False
2585 break
2586 self.skipWhitespace = True
2587 self.initExprGroups = True
2588
2589 - def parseImpl( self, instring, loc, doActions=True ):
2590 if self.initExprGroups:
2591 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2592 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
2593 self.optionals = opt1 + opt2
2594 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2595 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2596 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2597 self.required += self.multirequired
2598 self.initExprGroups = False
2599 tmpLoc = loc
2600 tmpReqd = self.required[:]
2601 tmpOpt = self.optionals[:]
2602 matchOrder = []
2603
2604 keepMatching = True
2605 while keepMatching:
2606 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2607 failed = []
2608 for e in tmpExprs:
2609 try:
2610 tmpLoc = e.tryParse( instring, tmpLoc )
2611 except ParseException:
2612 failed.append(e)
2613 else:
2614 matchOrder.append(e)
2615 if e in tmpReqd:
2616 tmpReqd.remove(e)
2617 elif e in tmpOpt:
2618 tmpOpt.remove(e)
2619 if len(failed) == len(tmpExprs):
2620 keepMatching = False
2621
2622 if tmpReqd:
2623 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2624 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2625
2626
2627 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2628
2629 resultlist = []
2630 for e in matchOrder:
2631 loc,results = e._parse(instring,loc,doActions)
2632 resultlist.append(results)
2633
2634 finalResults = ParseResults([])
2635 for r in resultlist:
2636 dups = {}
2637 for k in r.keys():
2638 if k in finalResults.keys():
2639 tmp = ParseResults(finalResults[k])
2640 tmp += ParseResults(r[k])
2641 dups[k] = tmp
2642 finalResults += ParseResults(r)
2643 for k,v in dups.items():
2644 finalResults[k] = v
2645 return loc, finalResults
2646
2648 if hasattr(self,"name"):
2649 return self.name
2650
2651 if self.strRepr is None:
2652 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2653
2654 return self.strRepr
2655
2657 subRecCheckList = parseElementList[:] + [ self ]
2658 for e in self.exprs:
2659 e.checkRecursion( subRecCheckList )
2660
2661
2663 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2664 - def __init__( self, expr, savelist=False ):
2665 super(ParseElementEnhance,self).__init__(savelist)
2666 if isinstance( expr, basestring ):
2667 expr = Literal(expr)
2668 self.expr = expr
2669 self.strRepr = None
2670 if expr is not None:
2671 self.mayIndexError = expr.mayIndexError
2672 self.mayReturnEmpty = expr.mayReturnEmpty
2673 self.setWhitespaceChars( expr.whiteChars )
2674 self.skipWhitespace = expr.skipWhitespace
2675 self.saveAsList = expr.saveAsList
2676 self.callPreparse = expr.callPreparse
2677 self.ignoreExprs.extend(expr.ignoreExprs)
2678
2679 - def parseImpl( self, instring, loc, doActions=True ):
2680 if self.expr is not None:
2681 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2682 else:
2683 raise ParseException("",loc,self.errmsg,self)
2684
2686 self.skipWhitespace = False
2687 self.expr = self.expr.copy()
2688 if self.expr is not None:
2689 self.expr.leaveWhitespace()
2690 return self
2691
2693 if isinstance( other, Suppress ):
2694 if other not in self.ignoreExprs:
2695 super( ParseElementEnhance, self).ignore( other )
2696 if self.expr is not None:
2697 self.expr.ignore( self.ignoreExprs[-1] )
2698 else:
2699 super( ParseElementEnhance, self).ignore( other )
2700 if self.expr is not None:
2701 self.expr.ignore( self.ignoreExprs[-1] )
2702 return self
2703
2709
2711 if self in parseElementList:
2712 raise RecursiveGrammarException( parseElementList+[self] )
2713 subRecCheckList = parseElementList[:] + [ self ]
2714 if self.expr is not None:
2715 self.expr.checkRecursion( subRecCheckList )
2716
2717 - def validate( self, validateTrace=[] ):
2718 tmp = validateTrace[:]+[self]
2719 if self.expr is not None:
2720 self.expr.validate(tmp)
2721 self.checkRecursion( [] )
2722
2724 try:
2725 return super(ParseElementEnhance,self).__str__()
2726 except:
2727 pass
2728
2729 if self.strRepr is None and self.expr is not None:
2730 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2731 return self.strRepr
2732
2733
2735 """Lookahead matching of the given parse expression. C{FollowedBy}
2736 does *not* advance the parsing position within the input string, it only
2737 verifies that the specified parse expression matches at the current
2738 position. C{FollowedBy} always returns a null token list."""
2742
2743 - def parseImpl( self, instring, loc, doActions=True ):
2744 self.expr.tryParse( instring, loc )
2745 return loc, []
2746
2747
2748 -class NotAny(ParseElementEnhance):
2749 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2750 does *not* advance the parsing position within the input string, it only
2751 verifies that the specified parse expression does *not* match at the current
2752 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2753 always returns a null token list. May be constructed using the '~' operator."""
2755 super(NotAny,self).__init__(expr)
2756
2757 self.skipWhitespace = False
2758 self.mayReturnEmpty = True
2759 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2760
2761
2762 - def parseImpl( self, instring, loc, doActions=True ):
2763 try:
2764 self.expr.tryParse( instring, loc )
2765 except (ParseException,IndexError):
2766 pass
2767 else:
2768
2769 exc = self.myException
2770 exc.loc = loc
2771 exc.pstr = instring
2772 raise exc
2773 return loc, []
2774
2776 if hasattr(self,"name"):
2777 return self.name
2778
2779 if self.strRepr is None:
2780 self.strRepr = "~{" + _ustr(self.expr) + "}"
2781
2782 return self.strRepr
2783
2784
2786 """Optional repetition of zero or more of the given expression."""
2790
2791 - def parseImpl( self, instring, loc, doActions=True ):
2792 tokens = []
2793 try:
2794 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2795 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2796 while 1:
2797 if hasIgnoreExprs:
2798 preloc = self._skipIgnorables( instring, loc )
2799 else:
2800 preloc = loc
2801 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2802 if tmptokens or tmptokens.keys():
2803 tokens += tmptokens
2804 except (ParseException,IndexError):
2805 pass
2806
2807 return loc, tokens
2808
2810 if hasattr(self,"name"):
2811 return self.name
2812
2813 if self.strRepr is None:
2814 self.strRepr = "[" + _ustr(self.expr) + "]..."
2815
2816 return self.strRepr
2817
2822
2823
2825 """Repetition of one or more of the given expression."""
2826 - def parseImpl( self, instring, loc, doActions=True ):
2827
2828 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2829 try:
2830 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2831 while 1:
2832 if hasIgnoreExprs:
2833 preloc = self._skipIgnorables( instring, loc )
2834 else:
2835 preloc = loc
2836 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2837 if tmptokens or tmptokens.keys():
2838 tokens += tmptokens
2839 except (ParseException,IndexError):
2840 pass
2841
2842 return loc, tokens
2843
2845 if hasattr(self,"name"):
2846 return self.name
2847
2848 if self.strRepr is None:
2849 self.strRepr = "{" + _ustr(self.expr) + "}..."
2850
2851 return self.strRepr
2852
2857
2864
2865 _optionalNotMatched = _NullToken()
2867 """Optional matching of the given expression.
2868 A default return string can also be specified, if the optional expression
2869 is not found.
2870 """
2872 super(Optional,self).__init__( exprs, savelist=False )
2873 self.defaultValue = default
2874 self.mayReturnEmpty = True
2875
2876 - def parseImpl( self, instring, loc, doActions=True ):
2877 try:
2878 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2879 except (ParseException,IndexError):
2880 if self.defaultValue is not _optionalNotMatched:
2881 if self.expr.resultsName:
2882 tokens = ParseResults([ self.defaultValue ])
2883 tokens[self.expr.resultsName] = self.defaultValue
2884 else:
2885 tokens = [ self.defaultValue ]
2886 else:
2887 tokens = []
2888 return loc, tokens
2889
2891 if hasattr(self,"name"):
2892 return self.name
2893
2894 if self.strRepr is None:
2895 self.strRepr = "[" + _ustr(self.expr) + "]"
2896
2897 return self.strRepr
2898
2899
2900 -class SkipTo(ParseElementEnhance):
2901 """Token for skipping over all undefined text until the matched expression is found.
2902 If C{include} is set to true, the matched expression is also parsed (the skipped text
2903 and matched expression are returned as a 2-element list). The C{ignore}
2904 argument is used to define grammars (typically quoted strings and comments) that
2905 might contain false matches.
2906 """
2907 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2908 super( SkipTo, self ).__init__( other )
2909 self.ignoreExpr = ignore
2910 self.mayReturnEmpty = True
2911 self.mayIndexError = False
2912 self.includeMatch = include
2913 self.asList = False
2914 if failOn is not None and isinstance(failOn, basestring):
2915 self.failOn = Literal(failOn)
2916 else:
2917 self.failOn = failOn
2918 self.errmsg = "No match found for "+_ustr(self.expr)
2919
2920
2921 - def parseImpl( self, instring, loc, doActions=True ):
2922 startLoc = loc
2923 instrlen = len(instring)
2924 expr = self.expr
2925 failParse = False
2926 while loc <= instrlen:
2927 try:
2928 if self.failOn:
2929 try:
2930 self.failOn.tryParse(instring, loc)
2931 except ParseBaseException:
2932 pass
2933 else:
2934 failParse = True
2935 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2936 failParse = False
2937 if self.ignoreExpr is not None:
2938 while 1:
2939 try:
2940 loc = self.ignoreExpr.tryParse(instring,loc)
2941
2942 except ParseBaseException:
2943 break
2944 expr._parse( instring, loc, doActions=False, callPreParse=False )
2945 skipText = instring[startLoc:loc]
2946 if self.includeMatch:
2947 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2948 if mat:
2949 skipRes = ParseResults( skipText )
2950 skipRes += mat
2951 return loc, [ skipRes ]
2952 else:
2953 return loc, [ skipText ]
2954 else:
2955 return loc, [ skipText ]
2956 except (ParseException,IndexError):
2957 if failParse:
2958 raise
2959 else:
2960 loc += 1
2961 exc = self.myException
2962 exc.loc = loc
2963 exc.pstr = instring
2964 raise exc
2965
2966 -class Forward(ParseElementEnhance):
2967 """Forward declaration of an expression to be defined later -
2968 used for recursive grammars, such as algebraic infix notation.
2969 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
2970
2971 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
2972 Specifically, '|' has a lower precedence than '<<', so that::
2973 fwdExpr << a | b | c
2974 will actually be evaluated as::
2975 (fwdExpr << a) | b | c
2976 thereby leaving b and c out as parseable alternatives. It is recommended that you
2977 explicitly group the values inserted into the C{Forward}::
2978 fwdExpr << (a | b | c)
2979 """
2982
2984 if isinstance( other, basestring ):
2985 other = Literal(other)
2986 self.expr = other
2987 self.mayReturnEmpty = other.mayReturnEmpty
2988 self.strRepr = None
2989 self.mayIndexError = self.expr.mayIndexError
2990 self.mayReturnEmpty = self.expr.mayReturnEmpty
2991 self.setWhitespaceChars( self.expr.whiteChars )
2992 self.skipWhitespace = self.expr.skipWhitespace
2993 self.saveAsList = self.expr.saveAsList
2994 self.ignoreExprs.extend(self.expr.ignoreExprs)
2995 return None
2996
2998 self.skipWhitespace = False
2999 return self
3000
3002 if not self.streamlined:
3003 self.streamlined = True
3004 if self.expr is not None:
3005 self.expr.streamline()
3006 return self
3007
3008 - def validate( self, validateTrace=[] ):
3009 if self not in validateTrace:
3010 tmp = validateTrace[:]+[self]
3011 if self.expr is not None:
3012 self.expr.validate(tmp)
3013 self.checkRecursion([])
3014
3016 if hasattr(self,"name"):
3017 return self.name
3018
3019 self._revertClass = self.__class__
3020 self.__class__ = _ForwardNoRecurse
3021 try:
3022 if self.expr is not None:
3023 retString = _ustr(self.expr)
3024 else:
3025 retString = "None"
3026 finally:
3027 self.__class__ = self._revertClass
3028 return self.__class__.__name__ + ": " + retString
3029
3031 if self.expr is not None:
3032 return super(Forward,self).copy()
3033 else:
3034 ret = Forward()
3035 ret << self
3036 return ret
3037
3041
3043 """Abstract subclass of ParseExpression, for converting parsed results."""
3044 - def __init__( self, expr, savelist=False ):
3047
3048 -class Upcase(TokenConverter):
3049 """Converter to upper case all matching tokens."""
3051 super(Upcase,self).__init__(*args)
3052 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
3053 DeprecationWarning,stacklevel=2)
3054
3055 - def postParse( self, instring, loc, tokenlist ):
3056 return list(map( string.upper, tokenlist ))
3057
3058
3060 """Converter to concatenate all matching tokens to a single string.
3061 By default, the matching patterns must also be contiguous in the input string;
3062 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3063 """
3064 - def __init__( self, expr, joinString="", adjacent=True ):
3065 super(Combine,self).__init__( expr )
3066
3067 if adjacent:
3068 self.leaveWhitespace()
3069 self.adjacent = adjacent
3070 self.skipWhitespace = True
3071 self.joinString = joinString
3072 self.callPreparse = True
3073
3080
3081 - def postParse( self, instring, loc, tokenlist ):
3082 retToks = tokenlist.copy()
3083 del retToks[:]
3084 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3085
3086 if self.resultsName and len(retToks.keys())>0:
3087 return [ retToks ]
3088 else:
3089 return retToks
3090
3091 -class Group(TokenConverter):
3092 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3094 super(Group,self).__init__( expr )
3095 self.saveAsList = True
3096
3097 - def postParse( self, instring, loc, tokenlist ):
3098 return [ tokenlist ]
3099
3100 -class Dict(TokenConverter):
3101 """Converter to return a repetitive expression as a list, but also as a dictionary.
3102 Each element can also be referenced using the first token in the expression as its key.
3103 Useful for tabular report scraping when the first column can be used as a item key.
3104 """
3106 super(Dict,self).__init__( exprs )
3107 self.saveAsList = True
3108
3109 - def postParse( self, instring, loc, tokenlist ):
3110 for i,tok in enumerate(tokenlist):
3111 if len(tok) == 0:
3112 continue
3113 ikey = tok[0]
3114 if isinstance(ikey,int):
3115 ikey = _ustr(tok[0]).strip()
3116 if len(tok)==1:
3117 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3118 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3119 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3120 else:
3121 dictvalue = tok.copy()
3122 del dictvalue[0]
3123 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3124 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3125 else:
3126 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3127
3128 if self.resultsName:
3129 return [ tokenlist ]
3130 else:
3131 return tokenlist
3132
3133
3135 """Converter for ignoring the results of a parsed expression."""
3136 - def postParse( self, instring, loc, tokenlist ):
3138
3141
3142
3144 """Wrapper for parse actions, to ensure they are only called once."""
3146 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3147 self.called = False
3149 if not self.called:
3150 results = self.callable(s,l,t)
3151 self.called = True
3152 return results
3153 raise ParseException(s,l,"")
3156
3158 """Decorator for debugging parse actions."""
3159 f = ParserElement._normalizeParseActionArgs(f)
3160 def z(*paArgs):
3161 thisFunc = f.func_name
3162 s,l,t = paArgs[-3:]
3163 if len(paArgs)>3:
3164 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3165 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3166 try:
3167 ret = f(*paArgs)
3168 except Exception:
3169 exc = sys.exc_info()[1]
3170 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3171 raise
3172 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3173 return ret
3174 try:
3175 z.__name__ = f.__name__
3176 except AttributeError:
3177 pass
3178 return z
3179
3180
3181
3182
3184 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3185 By default, the list elements and delimiters can have intervening whitespace, and
3186 comments, but this can be overridden by passing C{combine=True} in the constructor.
3187 If C{combine} is set to True, the matching tokens are returned as a single token
3188 string, with the delimiters included; otherwise, the matching tokens are returned
3189 as a list of tokens, with the delimiters suppressed.
3190 """
3191 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3192 if combine:
3193 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3194 else:
3195 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3196
3198 """Helper to define a counted list of expressions.
3199 This helper defines a pattern of the form::
3200 integer expr expr expr...
3201 where the leading integer tells how many expr expressions follow.
3202 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3203 """
3204 arrayExpr = Forward()
3205 def countFieldParseAction(s,l,t):
3206 n = int(t[0])
3207 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3208 return []
3209 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3210
3212 if type(L) is not list: return [L]
3213 if L == []: return L
3214 return _flatten(L[0]) + _flatten(L[1:])
3215
3217 """Helper to define an expression that is indirectly defined from
3218 the tokens matched in a previous expression, that is, it looks
3219 for a 'repeat' of a previous expression. For example::
3220 first = Word(nums)
3221 second = matchPreviousLiteral(first)
3222 matchExpr = first + ":" + second
3223 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3224 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3225 If this is not desired, use C{matchPreviousExpr}.
3226 Do *not* use with packrat parsing enabled.
3227 """
3228 rep = Forward()
3229 def copyTokenToRepeater(s,l,t):
3230 if t:
3231 if len(t) == 1:
3232 rep << t[0]
3233 else:
3234
3235 tflat = _flatten(t.asList())
3236 rep << And( [ Literal(tt) for tt in tflat ] )
3237 else:
3238 rep << Empty()
3239 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3240 return rep
3241
3243 """Helper to define an expression that is indirectly defined from
3244 the tokens matched in a previous expression, that is, it looks
3245 for a 'repeat' of a previous expression. For example::
3246 first = Word(nums)
3247 second = matchPreviousExpr(first)
3248 matchExpr = first + ":" + second
3249 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3250 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3251 the expressions are evaluated first, and then compared, so
3252 C{"1"} is compared with C{"10"}.
3253 Do *not* use with packrat parsing enabled.
3254 """
3255 rep = Forward()
3256 e2 = expr.copy()
3257 rep << e2
3258 def copyTokenToRepeater(s,l,t):
3259 matchTokens = _flatten(t.asList())
3260 def mustMatchTheseTokens(s,l,t):
3261 theseTokens = _flatten(t.asList())
3262 if theseTokens != matchTokens:
3263 raise ParseException("",0,"")
3264 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3265 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3266 return rep
3267
3269
3270 for c in r"\^-]":
3271 s = s.replace(c,_bslash+c)
3272 s = s.replace("\n",r"\n")
3273 s = s.replace("\t",r"\t")
3274 return _ustr(s)
3275
3276 -def oneOf( strs, caseless=False, useRegex=True ):
3277 """Helper to quickly define a set of alternative Literals, and makes sure to do
3278 longest-first testing when there is a conflict, regardless of the input order,
3279 but returns a C{MatchFirst} for best performance.
3280
3281 Parameters:
3282 - strs - a string of space-delimited literals, or a list of string literals
3283 - caseless - (default=False) - treat all literals as caseless
3284 - useRegex - (default=True) - as an optimization, will generate a Regex
3285 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3286 if creating a C{Regex} raises an exception)
3287 """
3288 if caseless:
3289 isequal = ( lambda a,b: a.upper() == b.upper() )
3290 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3291 parseElementClass = CaselessLiteral
3292 else:
3293 isequal = ( lambda a,b: a == b )
3294 masks = ( lambda a,b: b.startswith(a) )
3295 parseElementClass = Literal
3296
3297 if isinstance(strs,(list,tuple)):
3298 symbols = list(strs[:])
3299 elif isinstance(strs,basestring):
3300 symbols = strs.split()
3301 else:
3302 warnings.warn("Invalid argument to oneOf, expected string or list",
3303 SyntaxWarning, stacklevel=2)
3304
3305 i = 0
3306 while i < len(symbols)-1:
3307 cur = symbols[i]
3308 for j,other in enumerate(symbols[i+1:]):
3309 if ( isequal(other, cur) ):
3310 del symbols[i+j+1]
3311 break
3312 elif ( masks(cur, other) ):
3313 del symbols[i+j+1]
3314 symbols.insert(i,other)
3315 cur = other
3316 break
3317 else:
3318 i += 1
3319
3320 if not caseless and useRegex:
3321
3322 try:
3323 if len(symbols)==len("".join(symbols)):
3324 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3325 else:
3326 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3327 except:
3328 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3329 SyntaxWarning, stacklevel=2)
3330
3331
3332
3333 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3334
3336 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3337 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens
3338 in the proper order. The key pattern can include delimiting markers or punctuation,
3339 as long as they are suppressed, thereby leaving the significant key text. The value
3340 pattern can include named results, so that the C{Dict} results can include named token
3341 fields.
3342 """
3343 return Dict( ZeroOrMore( Group ( key + value ) ) )
3344
3345 -def originalTextFor(expr, asString=True):
3346 """Helper to return the original, untokenized text for a given expression. Useful to
3347 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3348 revert separate tokens with intervening whitespace back to the original matching
3349 input text. Simpler to use than the parse action C{keepOriginalText}, and does not
3350 require the inspect module to chase up the call stack. By default, returns a
3351 string containing the original parsed text.
3352
3353 If the optional C{asString} argument is passed as False, then the return value is a
3354 C{ParseResults} containing any results names that were originally matched, and a
3355 single token containing the original matched text from the input string. So if
3356 the expression passed to C{originalTextFor} contains expressions with defined
3357 results names, you must set C{asString} to False if you want to preserve those
3358 results name values."""
3359 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3360 endlocMarker = locMarker.copy()
3361 endlocMarker.callPreparse = False
3362 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3363 if asString:
3364 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3365 else:
3366 def extractText(s,l,t):
3367 del t[:]
3368 t.insert(0, s[t._original_start:t._original_end])
3369 del t["_original_start"]
3370 del t["_original_end"]
3371 matchExpr.setParseAction(extractText)
3372 return matchExpr
3373
3374
3375 empty = Empty().setName("empty")
3376 lineStart = LineStart().setName("lineStart")
3377 lineEnd = LineEnd().setName("lineEnd")
3378 stringStart = StringStart().setName("stringStart")
3379 stringEnd = StringEnd().setName("stringEnd")
3380
3381 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3382 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3383 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3384 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3385 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3386 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3387 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3388
3389 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3390
3392 r"""Helper to easily define string ranges for use in Word construction. Borrows
3393 syntax from regexp '[]' string range definitions::
3394 srange("[0-9]") -> "0123456789"
3395 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3396 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3397 The input string must be enclosed in []'s, and the returned string is the expanded
3398 character set joined into a single string.
3399 The values enclosed in the []'s may be::
3400 a single character
3401 an escaped character with a leading backslash (such as \- or \])
3402 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3403 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3404 a range of any of the above, separated by a dash ('a-z', etc.)
3405 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3406 """
3407 try:
3408 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3409 except:
3410 return ""
3411
3413 """Helper method for defining parse actions that require matching at a specific
3414 column in the input text.
3415 """
3416 def verifyCol(strg,locn,toks):
3417 if col(locn,strg) != n:
3418 raise ParseException(strg,locn,"matched token not at column %d" % n)
3419 return verifyCol
3420
3422 """Helper method for common parse actions that simply return a literal value. Especially
3423 useful when used with C{transformString()}.
3424 """
3425 def _replFunc(*args):
3426 return [replStr]
3427 return _replFunc
3428
3430 """Helper parse action for removing quotation marks from parsed quoted strings.
3431 To use, add this parse action to quoted string using::
3432 quotedString.setParseAction( removeQuotes )
3433 """
3434 return t[0][1:-1]
3435
3437 """Helper parse action to convert tokens to upper case."""
3438 return [ tt.upper() for tt in map(_ustr,t) ]
3439
3441 """Helper parse action to convert tokens to lower case."""
3442 return [ tt.lower() for tt in map(_ustr,t) ]
3443
3444 -def keepOriginalText(s,startLoc,t):
3445 """DEPRECATED - use new helper method C{originalTextFor}.
3446 Helper parse action to preserve original parsed text,
3447 overriding any nested parse actions."""
3448 try:
3449 endloc = getTokensEndLoc()
3450 except ParseException:
3451 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3452 del t[:]
3453 t += ParseResults(s[startLoc:endloc])
3454 return t
3455
3457 """Method to be called from within a parse action to determine the end
3458 location of the parsed tokens."""
3459 import inspect
3460 fstack = inspect.stack()
3461 try:
3462
3463 for f in fstack[2:]:
3464 if f[3] == "_parseNoCache":
3465 endloc = f[0].f_locals["loc"]
3466 return endloc
3467 else:
3468 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3469 finally:
3470 del fstack
3471
3500
3504
3508
3510 """Helper to create a validating parse action to be used with start tags created
3511 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3512 with a required attribute value, to avoid false matches on common tags such as
3513 <TD> or <DIV>.
3514
3515 Call withAttribute with a series of attribute names and values. Specify the list
3516 of filter attributes names and values as:
3517 - keyword arguments, as in (class="Customer",align="right"), or
3518 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3519 For attribute names with a namespace prefix, you must use the second form. Attribute
3520 names are matched insensitive to upper/lower case.
3521
3522 To verify that the attribute exists, but without specifying a value, pass
3523 withAttribute.ANY_VALUE as the value.
3524 """
3525 if args:
3526 attrs = args[:]
3527 else:
3528 attrs = attrDict.items()
3529 attrs = [(k,v) for k,v in attrs]
3530 def pa(s,l,tokens):
3531 for attrName,attrValue in attrs:
3532 if attrName not in tokens:
3533 raise ParseException(s,l,"no matching attribute " + attrName)
3534 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3535 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3536 (attrName, tokens[attrName], attrValue))
3537 return pa
3538 withAttribute.ANY_VALUE = object()
3539
3540 opAssoc = _Constants()
3541 opAssoc.LEFT = object()
3542 opAssoc.RIGHT = object()
3543
3545 """Helper method for constructing grammars of expressions made up of
3546 operators working in a precedence hierarchy. Operators may be unary or
3547 binary, left- or right-associative. Parse actions can also be attached
3548 to operator expressions.
3549
3550 Parameters:
3551 - baseExpr - expression representing the most basic element for the nested
3552 - opList - list of tuples, one for each operator precedence level in the
3553 expression grammar; each tuple is of the form
3554 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3555 - opExpr is the pyparsing expression for the operator;
3556 may also be a string, which will be converted to a Literal;
3557 if numTerms is 3, opExpr is a tuple of two expressions, for the
3558 two operators separating the 3 terms
3559 - numTerms is the number of terms for this operator (must
3560 be 1, 2, or 3)
3561 - rightLeftAssoc is the indicator whether the operator is
3562 right or left associative, using the pyparsing-defined
3563 constants opAssoc.RIGHT and opAssoc.LEFT.
3564 - parseAction is the parse action to be associated with
3565 expressions matching this operator expression (the
3566 parse action tuple member may be omitted)
3567 """
3568 ret = Forward()
3569 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3570 for i,operDef in enumerate(opList):
3571 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3572 if arity == 3:
3573 if opExpr is None or len(opExpr) != 2:
3574 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3575 opExpr1, opExpr2 = opExpr
3576 thisExpr = Forward()
3577 if rightLeftAssoc == opAssoc.LEFT:
3578 if arity == 1:
3579 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3580 elif arity == 2:
3581 if opExpr is not None:
3582 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3583 else:
3584 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3585 elif arity == 3:
3586 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3587 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3588 else:
3589 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3590 elif rightLeftAssoc == opAssoc.RIGHT:
3591 if arity == 1:
3592
3593 if not isinstance(opExpr, Optional):
3594 opExpr = Optional(opExpr)
3595 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3596 elif arity == 2:
3597 if opExpr is not None:
3598 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3599 else:
3600 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3601 elif arity == 3:
3602 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3603 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3604 else:
3605 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3606 else:
3607 raise ValueError("operator must indicate right or left associativity")
3608 if pa:
3609 matchExpr.setParseAction( pa )
3610 thisExpr << ( matchExpr | lastExpr )
3611 lastExpr = thisExpr
3612 ret << lastExpr
3613 return ret
3614
3615 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3616 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3617 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3618 unicodeString = Combine(_L('u') + quotedString.copy())
3619
3621 """Helper method for defining nested lists enclosed in opening and closing
3622 delimiters ("(" and ")" are the default).
3623
3624 Parameters:
3625 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3626 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3627 - content - expression for items within the nested lists (default=None)
3628 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3629
3630 If an expression is not provided for the content argument, the nested
3631 expression will capture all whitespace-delimited content between delimiters
3632 as a list of separate values.
3633
3634 Use the ignoreExpr argument to define expressions that may contain
3635 opening or closing characters that should not be treated as opening
3636 or closing characters for nesting, such as quotedString or a comment
3637 expression. Specify multiple expressions using an Or or MatchFirst.
3638 The default is quotedString, but if no expressions are to be ignored,
3639 then pass None for this argument.
3640 """
3641 if opener == closer:
3642 raise ValueError("opening and closing strings cannot be the same")
3643 if content is None:
3644 if isinstance(opener,basestring) and isinstance(closer,basestring):
3645 if len(opener) == 1 and len(closer)==1:
3646 if ignoreExpr is not None:
3647 content = (Combine(OneOrMore(~ignoreExpr +
3648 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3649 ).setParseAction(lambda t:t[0].strip()))
3650 else:
3651 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3652 ).setParseAction(lambda t:t[0].strip()))
3653 else:
3654 if ignoreExpr is not None:
3655 content = (Combine(OneOrMore(~ignoreExpr +
3656 ~Literal(opener) + ~Literal(closer) +
3657 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3658 ).setParseAction(lambda t:t[0].strip()))
3659 else:
3660 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3661 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3662 ).setParseAction(lambda t:t[0].strip()))
3663 else:
3664 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3665 ret = Forward()
3666 if ignoreExpr is not None:
3667 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3668 else:
3669 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3670 return ret
3671
3672 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3673 """Helper method for defining space-delimited indentation blocks, such as
3674 those used to define block statements in Python source code.
3675
3676 Parameters:
3677 - blockStatementExpr - expression defining syntax of statement that
3678 is repeated within the indented block
3679 - indentStack - list created by caller to manage indentation stack
3680 (multiple statementWithIndentedBlock expressions within a single grammar
3681 should share a common indentStack)
3682 - indent - boolean indicating whether block must be indented beyond the
3683 the current level; set to False for block of left-most statements
3684 (default=True)
3685
3686 A valid block must contain at least one blockStatement.
3687 """
3688 def checkPeerIndent(s,l,t):
3689 if l >= len(s): return
3690 curCol = col(l,s)
3691 if curCol != indentStack[-1]:
3692 if curCol > indentStack[-1]:
3693 raise ParseFatalException(s,l,"illegal nesting")
3694 raise ParseException(s,l,"not a peer entry")
3695
3696 def checkSubIndent(s,l,t):
3697 curCol = col(l,s)
3698 if curCol > indentStack[-1]:
3699 indentStack.append( curCol )
3700 else:
3701 raise ParseException(s,l,"not a subentry")
3702
3703 def checkUnindent(s,l,t):
3704 if l >= len(s): return
3705 curCol = col(l,s)
3706 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3707 raise ParseException(s,l,"not an unindent")
3708 indentStack.pop()
3709
3710 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3711 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3712 PEER = Empty().setParseAction(checkPeerIndent)
3713 UNDENT = Empty().setParseAction(checkUnindent)
3714 if indent:
3715 smExpr = Group( Optional(NL) +
3716
3717 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3718 else:
3719 smExpr = Group( Optional(NL) +
3720 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3721 blockStatementExpr.ignore(_bslash + LineEnd())
3722 return smExpr
3723
3724 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3725 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3726
3727 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3728 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3729 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3730 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3731
3732
3733 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3734
3735 htmlComment = Regex(r"<!--[\s\S]*?-->")
3736 restOfLine = Regex(r".*").leaveWhitespace()
3737 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3738 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3739
3740 javaStyleComment = cppStyleComment
3741 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3742 _noncomma = "".join( [ c for c in printables if c != "," ] )
3743 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3744 Optional( Word(" \t") +
3745 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3746 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3747
3748
3749 if __name__ == "__main__":
3750
3751 - def test( teststring ):
3752 try:
3753 tokens = simpleSQL.parseString( teststring )
3754 tokenlist = tokens.asList()
3755 print (teststring + "->" + str(tokenlist))
3756 print ("tokens = " + str(tokens))
3757 print ("tokens.columns = " + str(tokens.columns))
3758 print ("tokens.tables = " + str(tokens.tables))
3759 print (tokens.asXML("SQL",True))
3760 except ParseBaseException:
3761 err = sys.exc_info()[1]
3762 print (teststring + "->")
3763 print (err.line)
3764 print (" "*(err.column-1) + "^")
3765 print (err)
3766 print()
3767
3768 selectToken = CaselessLiteral( "select" )
3769 fromToken = CaselessLiteral( "from" )
3770
3771 ident = Word( alphas, alphanums + "_$" )
3772 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3773 columnNameList = Group( delimitedList( columnName ) )
3774 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3775 tableNameList = Group( delimitedList( tableName ) )
3776 simpleSQL = ( selectToken + \
3777 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3778 fromToken + \
3779 tableNameList.setResultsName( "tables" ) )
3780
3781 test( "SELECT * from XYZZY, ABC" )
3782 test( "select * from SYS.XYZZY" )
3783 test( "Select A from Sys.dual" )
3784 test( "Select AA,BB,CC from Sys.dual" )
3785 test( "Select A, B, C from Sys.dual" )
3786 test( "Select A, B, C from Sys.dual" )
3787 test( "Xelect A, B, C from Sys.dual" )
3788 test( "Select A, B, C frox Sys.dual" )
3789 test( "Select" )
3790 test( "Select ^^^ frox Sys.dual" )
3791 test( "Select A, B, C from Sys.dual, Table2 " )
3792