Package creoleparser :: Module elements
[hide private]
[frames] | no frames]

Source Code for Module creoleparser.elements

   1  # elements.py 
   2  # 
   3  # Copyright (c) 2007 Stephen Day 
   4  # 
   5  # This module is part of Creoleparser and is released under 
   6  # the MIT License: http://www.opensource.org/licenses/mit-license.php 
   7  # 
   8   
   9  import re 
  10  import urlparse 
  11   
  12  import genshi.builder as bldr 
  13  from genshi.core import Stream 
  14  from genshi.filters import HTMLSanitizer 
  15   
  16  from core import escape_char, esc_neg_look, fragmentize  
  17   
  18  sanitizer = HTMLSanitizer() 
  19   
  20  __docformat__ = 'restructuredtext en' 
  21   
22 -class WikiElement(object):
23 24 """Baseclass for all wiki elements.""" 25 26 append_newline = False 27 """Determines if newlines are appended to Element(s) during processing. 28 Should only affect readability of source xml. 29 """ 30
31 - def __init__(self, tag, token, child_tags):
32 """Constructor for WikiElement objects. 33 34 Subclasses may have other keyword arguments. 35 36 :parameters: 37 tag 38 The xhtml tag associated with the element. 39 token 40 The character string (or strings) that identifies the element 41 in wiki markup. 42 child_tags 43 A list of wiki_elements that will be searched for in the body of the 44 element. The order of these elements matters, because if an element is 45 found before the element that encloses it, the enclosing element will 46 never be found. In cases where this imposes limits (e.g, ``strong`` and 47 ``em`` should be allowed to nest each other), place the conflicting 48 elements in a sublist. The parser will then find which comes first. 49 """ 50 self.tag = tag 51 self.token = token 52 self.child_tags = child_tags
53
54 - def _build(self,mo,element_store):
55 """Returns a genshi Element that has ``self.tag`` as the 56 outermost tag. 57 58 This methods if called exclusively by ``_process`` 59 60 :parameters: 61 mo 62 match object, usually the one returned by 63 self.regexp.search(s) 64 """ 65 return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(1), 66 self.child_tags, 67 element_store))
68
69 - def re_string(self):
70 """The regular expression pattern that is compiled into ``self.regexp``. 71 72 The regular expression must consume the entire wiki element, 73 including the tokens. For block elements, the newline on the last 74 line must be consumed also. group(1) should normally be the 75 entire string inside the tokens. If not, a custom ``_build`` 76 method will be needed. 77 """ 78 pass
79
80 - def _process(self, mo, text, wiki_elements,element_store):
81 """Returns genshi Fragments (Elements and text) 82 83 This is mainly for block level markup. See InlineElement 84 for the other method. 85 """ 86 frags = [] 87 # call again for leading text and extend the result list 88 if mo.start(): 89 frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:], 90 element_store)) 91 # append the found wiki element to the result list 92 frags.append(self._build(mo,element_store)) 93 # make the source output easier to read 94 if self.append_newline: 95 frags.append('\n') 96 # call again for trailing text and extend the result list 97 if mo.end() < len(text): 98 frags.extend(fragmentize(text[mo.end():],wiki_elements, 99 element_store)) 100 return frags
101
102 - def __repr__(self):
103 return "<WikiElement "+str(self.tag)+">"
104
105 -class InlineElement(WikiElement):
106 107 r"""For finding generic inline elements like ``strong`` and ``em``. 108 109 >>> em = InlineElement('em','//',[]) 110 >>> mo1 = em.regexp.search('a //word// in a line') 111 >>> mo2 = em.regexp.search('a //word in a line\n or two\n') 112 >>> mo1.group(0),mo1.group(1) 113 ('//word//', 'word') 114 >>> mo2.group(0),mo2.group(1) 115 ('//word in a line\n or two', 'word in a line\n or two') 116 117 Use a list for the ``token`` argument to have different start 118 and end strings. These must be closed. 119 120 >>> foo = InlineElement('foo',['<<','>>'],[]) 121 >>> mo = foo.regexp.search('blaa <<here it is >>\n') 122 >>> mo.group(1) 123 'here it is ' 124 125 """ 126
127 - def __init__(self, tag, token, child_tags=[]):
128 super(InlineElement,self).__init__(tag,token , child_tags) 129 self.regexp = re.compile(self.re_string(),re.DOTALL)
130
131 - def re_string(self):
132 if isinstance(self.token,str): 133 content = '(.+?)' 134 end = '(' + esc_neg_look + re.escape(self.token) + r'|$)' 135 return esc_neg_look + re.escape(self.token) + content + end 136 else: 137 content = '(.+?)' 138 return esc_neg_look + re.escape(self.token[0]) + content + esc_neg_look + re.escape(self.token[1])
139
140 - def _process(self, mo, text, wiki_elements, element_store):
141 """Returns genshi Fragments (Elements and text)""" 142 processed = self._build(mo,element_store) 143 store_id = str(id(processed)) 144 element_store[store_id] = processed 145 text = ''.join([text[:mo.start()],'<<<',store_id,'>>>', 146 text[mo.end():]]) 147 frags = fragmentize(text,wiki_elements,element_store) 148 return frags
149 150 151 macro_name = r'([a-zA-Z]+([-.]?[a-zA-Z0-9]+)*)' 152 """allows any number of non-repeating hyphens or periods. 153 Underscore is not included because hyphen is""" 154
155 -class Macro(WikiElement):
156 r"""Finds and processes inline macro elements.""" 157
158 - def __init__(self, tag, token, child_tags,func):
159 super(Macro,self).__init__(tag,token , child_tags) 160 self.func = func 161 self.regexp = re.compile(self.re_string())
162 163
164 - def _process(self, mo, text, wiki_elements,element_store):
165 """Returns genshi Fragments (Elements and text)""" 166 processed = self._build(mo,element_store) 167 if isinstance(processed, basestring): 168 text = ''.join([text[:mo.start()],processed, 169 text[mo.end():]]) 170 else: 171 store_id = str(id(processed)) 172 element_store[store_id] = processed 173 text = ''.join([text[:mo.start()],'<<<',store_id,'>>>', 174 text[mo.end():]]) 175 frags = fragmentize(text,wiki_elements,element_store) 176 return frags
177 178
179 - def re_string(self):
180 content = '(.*?)' 181 return esc_neg_look + re.escape(self.token[0]) + r'(' + macro_name + \ 182 content + ')' + esc_neg_look + re.escape(self.token[1])
183
184 - def _build(self,mo,element_store):
185 if self.func: 186 value = self.func(mo.group(2),mo.group(4),None,False) 187 else: 188 value = None 189 if value is None: 190 return bldr.tag(self.token[0] + mo.group(1) + self.token[1]) 191 elif isinstance(value,basestring): 192 return value 193 elif isinstance(value, (bldr.Element, Stream)): 194 return [value] 195 else: 196 raise "Marcos can only return strings and Genshi Streams"
197 198
199 -class BodiedMacro(Macro):
200 """Finds and processes macros with bodies. 201 202 Does not span across top level block markup 203 (see BodiedBlockMacro's for that).""" 204
205 - def __init__(self, tag, token, child_tags,func):
206 super(BodiedMacro,self).__init__(tag,token , child_tags,func) 207 self.func = func 208 self.regexp = re.compile(self.re_string(),re.DOTALL)
209
210 - def re_string(self):
211 content = r'([ \S]*?)' 212 #macro_name = r'([a-zA-Z]+([-.]?[a-zA-Z0-9]+)*)' 213 body = '(.+?)' 214 return esc_neg_look + re.escape(self.token[0]) + r'(' + macro_name + \ 215 content + ')'+ esc_neg_look + re.escape(self.token[1]) + \ 216 body + esc_neg_look + re.escape(self.token[0]) + \ 217 r'/\2' + re.escape(self.token[1])
218
219 - def _build(self,mo,element_store):
220 if self.func: 221 value = self.func(mo.group(2),mo.group(4),mo.group(5),False) 222 else: 223 value = None 224 if value is None: 225 return bldr.tag(self.token[0] + mo.group(1) + self.token[1] 226 + mo.group(5) + self.token[0] + '/' 227 + mo.group(1) + self.token[1]) 228 elif isinstance(value, basestring): 229 return value 230 elif isinstance(value, (bldr.Element, Stream)): 231 return [value] 232 else: 233 raise "macros can only return strings and genshi Streams"
234
235 -class BlockMacro(WikiElement):
236 """Finds a block macros. 237 238 Macro must be on a line alone without leading spaces. Resulting 239 output with not be enclosed in paragraph marks or consumed by 240 other markup (except pre blocks and BodiedBlockMacro's) 241 """ 242
243 - def __init__(self, tag, token, child_tags,func):
244 super(BlockMacro,self).__init__(tag,token , child_tags) 245 self.func = func 246 self.regexp = re.compile(self.re_string(),re.MULTILINE)
247
248 - def _process(self, mo, text, wiki_elements,element_store):
249 """Returns genshi Fragments (Elements and text) 250 251 This is mainly for block level markup. See InlineElement 252 for the other method. 253 """ 254 255 processed = self._build(mo,element_store) 256 if isinstance(processed, basestring): 257 #print '_process', repr(processed) 258 text = ''.join([text[:mo.start()],processed, 259 text[mo.end():]]) 260 frags = fragmentize(text,wiki_elements,element_store) 261 else: 262 263 frags = [] 264 # call again for leading text and extend the result list 265 if mo.start(): 266 frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:], 267 element_store)) 268 # append the found wiki element to the result list 269 frags.append(processed) 270 # make the source output easier to read 271 if self.append_newline: 272 frags.append('\n') 273 # call again for trailing text and extend the result list 274 if mo.end() < len(text): 275 frags.extend(fragmentize(text[mo.end():],wiki_elements, 276 element_store)) 277 return frags
278 279
280 - def re_string(self):
281 arg_string = '((?!.*>>.*>>).*?)' 282 283 284 #macro_name = r'([a-zA-Z]+([-.]?[a-zA-Z0-9]+)*)' 285 # allows any number of non-repeating hyphens or periods 286 # underscore is not included because hyphen is 287 start = r'(^\s*?\n|\A)' + re.escape(self.token[0]) 288 end = re.escape(self.token[1]) + r'\s*?\n(\s*?\n|$)' 289 290 return start + '(' + macro_name + arg_string + ')' + end
291 292
293 - def _build(self,mo,element_store):
294 #print 'block_macro', mo.groups() 295 if self.func: 296 value = self.func(mo.group(3),mo.group(5),None,True) 297 else: 298 value = None 299 if value is None: 300 return bldr.tag(self.token[0] + mo.group(2) + self.token[1]) 301 elif isinstance(value,basestring): 302 return ''.join([value.rstrip(),'\n']) 303 elif isinstance(value, (bldr.Element, Stream)): 304 return [value] 305 else: 306 raise "Marcos can only return strings and Genshi Streams"
307 308
309 -class BodiedBlockMacro(BlockMacro):
310 """Finds and processes block macros with bodies. 311 312 The opening and closing tokens must be are each on a line alone without 313 leading spaces. These macros can enclose other block level markup 314 including pre blocks and other BodiedBlockMacro's.""" 315
316 - def __init__(self, tag, token, child_tags,func):
317 super(BodiedBlockMacro,self).__init__(tag,token , child_tags,func) 318 self.func = func 319 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
320
321 - def re_string(self):
322 arg_string = r'((?![^\n]*>>[^\n]*>>)[ \S]*?)' 323 start = '^' + re.escape(self.token[0]) 324 #macro_name = r'([a-zA-Z]+([-.]?[a-zA-Z0-9]+)*)' 325 body = r'(.*?\n)' 326 end = re.escape(self.token[0]) + \ 327 r'/\2' + re.escape(self.token[1]) + r'\s*?\n' 328 329 return start + '(' + macro_name + arg_string + ')' + re.escape(self.token[1]) + \ 330 r'\s*?\n' + body + end
331
332 - def _build(self,mo,element_store):
333 #print 'block_bodied_macro', mo.groups() 334 if self.func: 335 value = self.func(mo.group(2),mo.group(4),mo.group(5),True) 336 else: 337 value = None 338 if value is None: 339 return bldr.tag(self.token[0] + mo.group(1) + self.token[1] 340 + mo.group(5) + self.token[0] + '/' 341 + mo.group(1) + self.token[1]) 342 elif isinstance(value, basestring): 343 return value 344 elif isinstance(value, (bldr.Element, Stream)): 345 return [value] 346 else: 347 raise "macros can only return strings and genshi Streams"
348 349 393 394 439 440 441 511 512 513 572 573 574 575
576 -class BlockElement(WikiElement):
577 578 """Block elements inherit form this class 579 580 Wiki elements wanting ``append_newline = True`` should use this 581 as the base also. 582 583 """ 584 585 append_newline = True
586 587
588 -class List(BlockElement):
589 590 """Finds list (ordered, unordered, and definition) wiki elements. 591 592 group(1) of the match object includes all lines from the list 593 including newline characters. 594 595 """ 596
597 - def __init__(self, tag, token,child_tags,stop_tokens):
598 super(List,self).__init__(tag, token, child_tags) 599 self.stop_tokens = stop_tokens 600 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
601
602 - def re_string(self):
603 """This re_string is for finding generic block elements like 604 lists (ordered, unordered, and definition) that start with a 605 single token. 606 """ 607 leading_whitespace = r'^([ \t]*' 608 only_one_token = re.escape(self.token)+'[^'+ re.escape(self.token) + ']' 609 rest_of_list = r'.*?\n)' 610 ## only_one_other_token = re.escape(self.other_token)+'(?!'+ \ 611 ## re.escape(self.other_token) + ')' 612 only_one_stop_token = '([' + re.escape(self.stop_tokens) + r'])(?!\3)' 613 look_ahead = '(?=([ \t]*' + only_one_stop_token + '|$))' 614 return leading_whitespace + only_one_token + rest_of_list + \ 615 look_ahead
616 617
618 -class ListItem(WikiElement):
619 r"""Matches the current list item. 620 621 Everything up to the next same-level list item is matched. 622 623 >>> list_item = ListItem('li',[],'#*') 624 >>> mo = list_item.regexp.search("*one\n**one.1\n**one.2\n*two\n") 625 >>> mo.group(2) 626 'one\n**one.1\n**one.2' 627 >>> mo.group(0) 628 '*one\n**one.1\n**one.2' 629 630 """ 631 632 append_newline = False 633
634 - def __init__(self, tag, child_tags, list_tokens):
635 """Constructor for list items. 636 637 :parameters" 638 list_tokens 639 A string that includes the tokens used for lists 640 """ 641 super(ListItem,self).__init__(tag, token=None, 642 child_tags=child_tags) 643 self.list_tokens = list_tokens 644 self.regexp = re.compile(self.re_string(),re.DOTALL)
645
646 - def re_string(self):
647 whitespace = r'[ \t]*' 648 item_start = '([*#]+)' 649 rest_of_item = r'(.*?)\n?' 650 start_of_same_level_item = r'\1(?![*#])' 651 look_ahead = r'(?=(\n' + whitespace + start_of_same_level_item + '|$))' 652 return whitespace + item_start + whitespace + \ 653 rest_of_item + look_ahead
654
655 - def _build(self,mo,element_store):
656 return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(2), 657 self.child_tags, 658 element_store))
659 660
661 -class NestedList(WikiElement):
662 663 r"""Finds a list in the current list item. 664 665 >>> nested_ul = NestedList('ul','*',[]) 666 >>> mo = nested_ul.regexp.search('one\n**one.1\n**one.2\n') 667 >>> mo.group(1) 668 '**one.1\n**one.2\n' 669 >>> mo.group(0) == mo.group(1) 670 True 671 672 """ 673
674 - def __init__(self, tag, token,child_tags):
675 super(NestedList,self).__init__(tag, token, child_tags) 676 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
677
678 - def re_string(self):
679 look_behind = r'(?<=\n)' # have to avoid finding a list on the first line 680 whitespace = r'(\s*' 681 rest_of_list = '.*$)' 682 return look_behind + '^' + whitespace + re.escape(self.token) + \ 683 rest_of_list
684 685
686 -class DefinitionTerm(BlockElement):
687 688 r"""Processes definition terms. 689 690 >>> term = DefinitionTerm('dt',';',[],stop_token=':') 691 >>> mo1,mo2 = term.regexp.finditer(";term1\n:def1\n;term2:def2\n") 692 >>> mo1.group(1), mo2.group(1) 693 ('term1', 'term2') 694 >>> mo1.group(0), mo2.group(0) 695 (';term1\n', ';term2') 696 697 group(1) of the match object is the term line or up to the first ':' 698 699 """ 700
701 - def __init__(self, tag, token,child_tags,stop_token):
702 super(DefinitionTerm,self).__init__(tag, token, child_tags) 703 self.stop_token = stop_token 704 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
705
706 - def re_string(self):
707 leading_whitespace = r'^([ \t]*' 708 #only_one_token = re.escape(self.token)+'[^'+ re.escape(self.token) + ']' 709 rest_of_list = r'.*?\n)' 710 #only_one_stop_token = '([' + re.escape(self.stop_tokens) + r'])(?!\3)' 711 #look_ahead = r'(?=([ \t]*' + only_one_stop_token + '|$))' 712 return r'^[ \t]*' + re.escape(self.token) + r'[ \t]*(.*?' + \ 713 re.escape(self.stop_token) + '?)\s*(\n|(?=(' + \ 714 esc_neg_look + re.escape(self.stop_token) + r'|$)))'
715 716
717 -class DefinitionDef(BlockElement):
718 719 r"""Processes definitions. 720 721 >>> definition = DefinitionDef('dd',':',[]) 722 >>> mo1,mo2 = definition.regexp.finditer(":def1a\ndef1b\n:def2\n") 723 >>> mo1.group(1), mo2.group(1) 724 ('def1a\ndef1b', 'def2') 725 >>> mo1.group(0), mo2.group(0) 726 (':def1a\ndef1b\n', ':def2\n') 727 728 group(1) of the match object includes all lines from the defintion 729 up to the next definition. 730 731 """ 732
733 - def __init__(self, tag, token,child_tags):
734 super(DefinitionDef,self).__init__(tag, token, child_tags) 735 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
736
737 - def re_string(self):
738 leading_whitespace = r'^([ \t]*' 739 rest_of_list = r'.*?\n)' 740 look_ahead = r'(?=([ \t]*' + re.escape(self.token) + r')|$)' 741 return r'^[ \t]*' + re.escape(self.token) + r'?[ \t]*(.+?)\s*\n(?=([ \t]*' + \ 742 re.escape(self.token) + r')|$)'
743 744
745 -class Paragraph(BlockElement):
746 747 """"This should be the last outer level wiki element to be "searched". 748 749 Anything that is left over will be placed in paragraphs. 750 751 """ 752
753 - def __init__(self, tag, child_tags):
754 super(Paragraph,self).__init__(tag,token=None, child_tags=child_tags) 755 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
756
757 - def re_string(self):
758 return r'^(.*)\n'
759 760
761 -class Heading(BlockElement):
762 763 r"""Finds heading wiki elements. 764 765 >>> h1 = Heading('h1','=',[]) 766 >>> mo = h1.regexp.search('before\n = An important thing = \n after') 767 >>> mo.group(1) 768 'An important thing' 769 >>> mo.group(0) 770 ' = An important thing = \n' 771 772 """ 773
774 - def __init__(self, tag, token, child_tags):
775 super(Heading,self).__init__(tag,token , child_tags) 776 self.regexp = re.compile(self.re_string(),re.MULTILINE)
777
778 - def re_string(self):
779 whitespace = r'[ \t]*' 780 neg_look_ahead = '(?!' + re.escape(self.token[0]) + ')' 781 content = '(.*?)' 782 trailing_markup = '(' + re.escape(self.token[0]) + r'+[ \t]*)?\n' 783 return '^' + whitespace + re.escape(self.token) + neg_look_ahead + \ 784 whitespace + content + whitespace + trailing_markup
785 786
787 -class Table(BlockElement):
788 789 r"""Find tables. 790 791 >>> table = Table('table','|',[]) 792 >>> mo = table.regexp.search("before\n | one | two |\n|one|two \n hi") 793 >>> mo.group(1) 794 ' | one | two |\n|one|two \n' 795 >>> mo.group(0) == mo.group(1) 796 True 797 798 """ 799
800 - def __init__(self, tag, token, child_tags=[]):
801 super(Table,self).__init__(tag,token , child_tags) 802 self.regexp = re.compile(self.re_string(),re.MULTILINE)
803
804 - def re_string(self):
805 whitespace = r'[ \t]*' 806 rest_of_line = r'.*?\n' 807 return '^((' + whitespace + re.escape(self.token) + \ 808 rest_of_line + ')+)'
809 810
811 -class TableRow(BlockElement):
812 813 r"""Finds rows in a table. 814 815 >>> row = TableRow('tr','|',[]) 816 >>> mo = row.regexp.search(' | one | two |\n|one|two \n') 817 >>> mo.group(1) 818 '| one | two ' 819 >>> mo.group(0) 820 ' | one | two |\n' 821 822 """ 823
824 - def __init__(self, tag, token, child_tags=[]):
825 super(TableRow,self).__init__(tag,token , child_tags) 826 self.regexp = re.compile(self.re_string(),re.MULTILINE)
827
828 - def re_string(self):
829 whitespace = r'[ \t]*' 830 content = '(' + re.escape(self.token) + '.*?)' 831 trailing_token = re.escape(self.token) + '?' 832 return '^' + whitespace + content + trailing_token + \ 833 whitespace + r'\n'
834 835
836 -class TableCell(WikiElement):
837 838 r"""Finds cells in a table row. 839 840 >>> cell = TableCell('td','|',[]) 841 >>> mo = cell.regexp.search('| one | two ') 842 >>> mo.group(1) 843 'one' 844 >>> mo.group(0) 845 '| one ' 846 847 """ 848
849 - def __init__(self, tag, token, child_tags=[]):
850 super(TableCell,self).__init__(tag,token , child_tags) 851 self.regexp = re.compile(self.re_string())
852
853 - def re_string(self):
854 whitespace = r'[ \t]*' 855 content = '(.*?)' 856 look_ahead = '((?=' + esc_neg_look + re.escape(self.token[0]) + ')|$)' 857 return esc_neg_look + re.escape(self.token) + whitespace + \ 858 content + whitespace + look_ahead
859 860 861 878
879 -class Image(InlineElement):
880 881 """Processes image elements. 882 883 >>> img = Image('img',('{{','}}'),[], delimiter='|') 884 >>> mo = img.regexp.search('{{ picture.jpg | An image of a house }}') 885 >>> img._build(mo,{}).generate().render() 886 '<img src="picture.jpg" alt="An image of a house"/>' 887 888 """ 889
890 - def __init__(self, tag, token, child_tags,delimiter):
891 super(Image,self).__init__(tag,token , child_tags) 892 self.regexp = re.compile(self.re_string()) 893 self.delimiter = delimiter 894 self.src_regexp = re.compile(r'^\s*(\S+)\s*$')
895
896 - def _build(self,mo,element_store):
897 body = mo.group(1).split(self.delimiter,1) 898 src_mo = self.src_regexp.search(body[0]) 899 if not src_mo: 900 return bldr.tag.span('Bad Image src') 901 if sanitizer.is_safe_uri(src_mo.group(1)): 902 link = src_mo.group(1) 903 else: 904 link = "unsafe_uri_detected" 905 if len(body) == 1: 906 alias = link 907 else: 908 alias = body[1].strip() 909 return bldr.tag.__getattr__(self.tag)(src=link ,alt=alias)
910 911
912 -class NoWikiElement(InlineElement):
913 914 """Inline no-wiki. 915 916 When two or more end tokens are found together, only last marks 917 the end of the element. 918 919 This element must be on a single line. 920 921 """ 922
923 - def __init__(self, tag, token, child_tags=[]):
924 super(NoWikiElement,self).__init__(tag,token , child_tags) 925 self.regexp = re.compile(self.re_string(),re.DOTALL)
926
927 - def _build(self,mo,element_store):
928 if self.tag: 929 return bldr.tag.__getattr__(self.tag)( 930 fragmentize(mo.group(1), self.child_tags, 931 element_store, 932 remove_escapes=False)) 933 else: 934 return bldr.tag(fragmentize(mo.group(1),self.child_tags, 935 element_store, 936 remove_escapes=False))
937
938 - def re_string(self):
939 if isinstance(self.token,str): 940 content = '(.+?' + re.escape(self.token[-1]) + '*)' 941 return esc_neg_look + re.escape(self.token) + \ 942 content + re.escape(self.token) 943 else: 944 content = '(.+?' + re.escape(self.token[1][-1]) + '*)' 945 return esc_neg_look + re.escape(self.token[0]) + \ 946 content + re.escape(self.token[1])
947 948
949 -class PreBlock(BlockElement):
950 951 """A preformatted block. 952 953 If a closing token is found on a line with a space as the first 954 character, the space will be removed from the output. 955 956 """ 957
958 - def __init__(self, tag, token, child_tags=[]):
959 super(PreBlock,self).__init__(tag,token , child_tags) 960 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) 961 self.regexp2 = re.compile(self.re_string2(),re.MULTILINE)
962
963 - def re_string(self):
964 if isinstance(self.token,str): 965 return '^' + re.escape(self.token) + r'\s*?\n(.*?\n)' + \ 966 re.escape(self.token) + r'\s*?\n' 967 else: 968 start = '^' + re.escape(self.token[0]) + r'\s*?\n' 969 content = r'(.+?\n)' 970 end = re.escape(self.token[1]) + r'\s*?\n' 971 return start + content + end
972
973 - def re_string2(self):
974 """Finds a closing token with a space at the start of the line.""" 975 if isinstance(self.token,str): 976 return r'^ (\s*?' + re.escape(self.token) + r'\s*?\n)' 977 else: 978 return r'^ (\s*?' + re.escape(self.token[1]) + r'\s*?\n)'
979
980 - def _build(self,mo,element_store):
981 match = self.regexp2.sub(r'\1',mo.group(1)) 982 983 return bldr.tag.__getattr__(self.tag)( 984 fragmentize(match,self.child_tags, 985 element_store,remove_escapes=False))
986 987
988 -class LoneElement(BlockElement):
989 990 """Element on a line by itself with no content (e.g., <hr/>)""" 991
992 - def __init__(self, tag, token, child_tags):
993 super(LoneElement,self).__init__(tag,token , child_tags) 994 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
995
996 - def re_string(self):
997 return r'^(\s*?' + re.escape(self.token) + r'\s*?\n)'
998
999 - def _build(self,mo,element_store):
1000 return bldr.tag.__getattr__(self.tag)()
1001
1002 -class LonePlaceHolder(BlockElement):
1003 1004 """A place holder on a line by itself or with other place holders. 1005 This is used to avoid these being enclosed in a paragraph. 1006 1007 """ 1008 append_newline = False
1009 - def __init__(self, tag, token, child_tags):
1010 super(LonePlaceHolder,self).__init__(tag,token , child_tags) 1011 self.regexp = re.compile(self.re_string(),re.MULTILINE)
1012
1013 - def re_string(self):
1014 place_holder = re.escape(self.token[0]) + r'\S*?' + re.escape(self.token[1]) 1015 return r'^\s*?(' + place_holder + r'\s*$)+\s*?\n'
1016
1017 - def _build(self,mo,element_store):
1018 return bldr.tag(fragmentize(mo.group(0),[],element_store))
1019
1020 -class BlankLine(WikiElement):
1021 1022 """Blank lines divide elements but don't add any output.""" 1023
1024 - def __init__(self):
1025 super(BlankLine,self).__init__(tag=None,token='' , child_tags=[]) 1026 self.regexp = re.compile(self.re_string(),re.MULTILINE)
1027
1028 - def re_string(self):
1029 return r'^(\s*\n)+'
1030
1031 - def _build(self,mo,element_store):
1032 return None
1033 1034
1035 -class LineBreak(InlineElement):
1036 1037 """An inline line break.""" 1038 1039 #append_newline = True
1040 - def __init__(self,tag, token, child_tags=[]):
1041 super(LineBreak,self).__init__(tag,token , child_tags) 1042 self.regexp = re.compile(self.re_string(),re.DOTALL)
1043
1044 - def re_string(self):
1045 return esc_neg_look + re.escape(self.token)
1046
1047 - def _build(self,mo,element_store):
1048 return bldr.tag.__getattr__(self.tag)()
1049 1050 1051
1052 -def _test():
1053 import doctest 1054 doctest.testmod()
1055 1056 if __name__ == "__main__": 1057 _test() 1058