1
2
3
4
5
6
7
8
9 import re
10 import urlparse
11
12 import genshi.builder as bldr
13 from genshi.core import Stream
14 from genshi.filters import HTMLSanitizer
15
16 from core import escape_char, esc_neg_look, fragmentize
17
18 sanitizer = HTMLSanitizer()
19
20 __docformat__ = 'restructuredtext en'
21
23
24 """Baseclass for all wiki elements."""
25
26 append_newline = False
27 """Determines if newlines are appended to Element(s) during processing.
28 Should only affect readability of source xml.
29 """
30
31 - def __init__(self, tag, token, child_tags):
32 """Constructor for WikiElement objects.
33
34 Subclasses may have other keyword arguments.
35
36 :parameters:
37 tag
38 The xhtml tag associated with the element.
39 token
40 The character string (or strings) that identifies the element
41 in wiki markup.
42 child_tags
43 A list of wiki_elements that will be searched for in the body of the
44 element. The order of these elements matters, because if an element is
45 found before the element that encloses it, the enclosing element will
46 never be found. In cases where this imposes limits (e.g, ``strong`` and
47 ``em`` should be allowed to nest each other), place the conflicting
48 elements in a sublist. The parser will then find which comes first.
49 """
50 self.tag = tag
51 self.token = token
52 self.child_tags = child_tags
53
54 - def _build(self,mo,element_store):
55 """Returns a genshi Element that has ``self.tag`` as the
56 outermost tag.
57
58 This methods if called exclusively by ``_process``
59
60 :parameters:
61 mo
62 match object, usually the one returned by
63 self.regexp.search(s)
64 """
65 return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(1),
66 self.child_tags,
67 element_store))
68
70 """The regular expression pattern that is compiled into ``self.regexp``.
71
72 The regular expression must consume the entire wiki element,
73 including the tokens. For block elements, the newline on the last
74 line must be consumed also. group(1) should normally be the
75 entire string inside the tokens. If not, a custom ``_build``
76 method will be needed.
77 """
78 pass
79
80 - def _process(self, mo, text, wiki_elements,element_store):
81 """Returns genshi Fragments (Elements and text)
82
83 This is mainly for block level markup. See InlineElement
84 for the other method.
85 """
86 frags = []
87
88 if mo.start():
89 frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:],
90 element_store))
91
92 frags.append(self._build(mo,element_store))
93
94 if self.append_newline:
95 frags.append('\n')
96
97 if mo.end() < len(text):
98 frags.extend(fragmentize(text[mo.end():],wiki_elements,
99 element_store))
100 return frags
101
103 return "<WikiElement "+str(self.tag)+">"
104
106
107 r"""For finding generic inline elements like ``strong`` and ``em``.
108
109 >>> em = InlineElement('em','//',[])
110 >>> mo1 = em.regexp.search('a //word// in a line')
111 >>> mo2 = em.regexp.search('a //word in a line\n or two\n')
112 >>> mo1.group(0),mo1.group(1)
113 ('//word//', 'word')
114 >>> mo2.group(0),mo2.group(1)
115 ('//word in a line\n or two', 'word in a line\n or two')
116
117 Use a list for the ``token`` argument to have different start
118 and end strings. These must be closed.
119
120 >>> foo = InlineElement('foo',['<<','>>'],[])
121 >>> mo = foo.regexp.search('blaa <<here it is >>\n')
122 >>> mo.group(1)
123 'here it is '
124
125 """
126
127 - def __init__(self, tag, token, child_tags=[]):
130
132 if isinstance(self.token,str):
133 content = '(.+?)'
134 end = '(' + esc_neg_look + re.escape(self.token) + r'|$)'
135 return esc_neg_look + re.escape(self.token) + content + end
136 else:
137 content = '(.+?)'
138 return esc_neg_look + re.escape(self.token[0]) + content + esc_neg_look + re.escape(self.token[1])
139
140 - def _process(self, mo, text, wiki_elements, element_store):
141 """Returns genshi Fragments (Elements and text)"""
142 processed = self._build(mo,element_store)
143 store_id = str(id(processed))
144 element_store[store_id] = processed
145 text = ''.join([text[:mo.start()],'<<<',store_id,'>>>',
146 text[mo.end():]])
147 frags = fragmentize(text,wiki_elements,element_store)
148 return frags
149
150
151 macro_name = r'([a-zA-Z]+([-.]?[a-zA-Z0-9]+)*)'
152 """allows any number of non-repeating hyphens or periods.
153 Underscore is not included because hyphen is"""
154
155 -class Macro(WikiElement):
156 r"""Finds and processes inline macro elements."""
157
158 - def __init__(self, tag, token, child_tags,func):
159 super(Macro,self).__init__(tag,token , child_tags)
160 self.func = func
161 self.regexp = re.compile(self.re_string())
162
163
164 - def _process(self, mo, text, wiki_elements,element_store):
165 """Returns genshi Fragments (Elements and text)"""
166 processed = self._build(mo,element_store)
167 if isinstance(processed, basestring):
168 text = ''.join([text[:mo.start()],processed,
169 text[mo.end():]])
170 else:
171 store_id = str(id(processed))
172 element_store[store_id] = processed
173 text = ''.join([text[:mo.start()],'<<<',store_id,'>>>',
174 text[mo.end():]])
175 frags = fragmentize(text,wiki_elements,element_store)
176 return frags
177
178
183
184 - def _build(self,mo,element_store):
185 if self.func:
186 value = self.func(mo.group(2),mo.group(4),None,False)
187 else:
188 value = None
189 if value is None:
190 return bldr.tag(self.token[0] + mo.group(1) + self.token[1])
191 elif isinstance(value,basestring):
192 return value
193 elif isinstance(value, (bldr.Element, Stream)):
194 return [value]
195 else:
196 raise "Marcos can only return strings and Genshi Streams"
197
198
200 """Finds and processes macros with bodies.
201
202 Does not span across top level block markup
203 (see BodiedBlockMacro's for that)."""
204
205 - def __init__(self, tag, token, child_tags,func):
206 super(BodiedMacro,self).__init__(tag,token , child_tags,func)
207 self.func = func
208 self.regexp = re.compile(self.re_string(),re.DOTALL)
209
211 content = r'([ \S]*?)'
212
213 body = '(.+?)'
214 return esc_neg_look + re.escape(self.token[0]) + r'(' + macro_name + \
215 content + ')'+ esc_neg_look + re.escape(self.token[1]) + \
216 body + esc_neg_look + re.escape(self.token[0]) + \
217 r'/\2' + re.escape(self.token[1])
218
219 - def _build(self,mo,element_store):
220 if self.func:
221 value = self.func(mo.group(2),mo.group(4),mo.group(5),False)
222 else:
223 value = None
224 if value is None:
225 return bldr.tag(self.token[0] + mo.group(1) + self.token[1]
226 + mo.group(5) + self.token[0] + '/'
227 + mo.group(1) + self.token[1])
228 elif isinstance(value, basestring):
229 return value
230 elif isinstance(value, (bldr.Element, Stream)):
231 return [value]
232 else:
233 raise "macros can only return strings and genshi Streams"
234
236 """Finds a block macros.
237
238 Macro must be on a line alone without leading spaces. Resulting
239 output with not be enclosed in paragraph marks or consumed by
240 other markup (except pre blocks and BodiedBlockMacro's)
241 """
242
243 - def __init__(self, tag, token, child_tags,func):
244 super(BlockMacro,self).__init__(tag,token , child_tags)
245 self.func = func
246 self.regexp = re.compile(self.re_string(),re.MULTILINE)
247
248 - def _process(self, mo, text, wiki_elements,element_store):
249 """Returns genshi Fragments (Elements and text)
250
251 This is mainly for block level markup. See InlineElement
252 for the other method.
253 """
254
255 processed = self._build(mo,element_store)
256 if isinstance(processed, basestring):
257
258 text = ''.join([text[:mo.start()],processed,
259 text[mo.end():]])
260 frags = fragmentize(text,wiki_elements,element_store)
261 else:
262
263 frags = []
264
265 if mo.start():
266 frags.extend(fragmentize(text[:mo.start()],wiki_elements[1:],
267 element_store))
268
269 frags.append(processed)
270
271 if self.append_newline:
272 frags.append('\n')
273
274 if mo.end() < len(text):
275 frags.extend(fragmentize(text[mo.end():],wiki_elements,
276 element_store))
277 return frags
278
279
281 arg_string = '((?!.*>>.*>>).*?)'
282
283
284
285
286
287 start = r'(^\s*?\n|\A)' + re.escape(self.token[0])
288 end = re.escape(self.token[1]) + r'\s*?\n(\s*?\n|$)'
289
290 return start + '(' + macro_name + arg_string + ')' + end
291
292
293 - def _build(self,mo,element_store):
294
295 if self.func:
296 value = self.func(mo.group(3),mo.group(5),None,True)
297 else:
298 value = None
299 if value is None:
300 return bldr.tag(self.token[0] + mo.group(2) + self.token[1])
301 elif isinstance(value,basestring):
302 return ''.join([value.rstrip(),'\n'])
303 elif isinstance(value, (bldr.Element, Stream)):
304 return [value]
305 else:
306 raise "Marcos can only return strings and Genshi Streams"
307
308
310 """Finds and processes block macros with bodies.
311
312 The opening and closing tokens must be are each on a line alone without
313 leading spaces. These macros can enclose other block level markup
314 including pre blocks and other BodiedBlockMacro's."""
315
316 - def __init__(self, tag, token, child_tags,func):
320
322 arg_string = r'((?![^\n]*>>[^\n]*>>)[ \S]*?)'
323 start = '^' + re.escape(self.token[0])
324
325 body = r'(.*?\n)'
326 end = re.escape(self.token[0]) + \
327 r'/\2' + re.escape(self.token[1]) + r'\s*?\n'
328
329 return start + '(' + macro_name + arg_string + ')' + re.escape(self.token[1]) + \
330 r'\s*?\n' + body + end
331
332 - def _build(self,mo,element_store):
333
334 if self.func:
335 value = self.func(mo.group(2),mo.group(4),mo.group(5),True)
336 else:
337 value = None
338 if value is None:
339 return bldr.tag(self.token[0] + mo.group(1) + self.token[1]
340 + mo.group(5) + self.token[0] + '/'
341 + mo.group(1) + self.token[1])
342 elif isinstance(value, basestring):
343 return value
344 elif isinstance(value, (bldr.Element, Stream)):
345 return [value]
346 else:
347 raise "macros can only return strings and genshi Streams"
348
349
351
352 """Used to find raw urls in wiki text and build xml from them.
353
354 >>> raw_link = RawLink(tag='a')
355 >>> mo = raw_link.regexp.search(" a http://www.google.com url ")
356 >>> raw_link.href(mo)
357 'http://www.google.com'
358 >>> raw_link._build(mo,{}).generate().render()
359 '<a href="http://www.google.com">http://www.google.com</a>'
360
361 """
362 linking_protocols = ['http','https']
363
367
369 escape = '(' + re.escape(escape_char) + ')?'
370 protocol = '((https?|ftp)://'
371 rest_of_url = r'\S+?)'
372
373 look_ahead = r'(?=([,.?!:;"\']|\*\*|//)?(\s|$))'
374 return escape + protocol + rest_of_url + look_ahead
375
376 - def _build(self,mo,element_store):
377 if (not mo.group(1)) and (mo.group(3) in self.linking_protocols):
378 return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store),
379 href=self.href(mo))
380 else:
381 return self.href(mo)
382
384 """Returns the string for the href attribute of the Element."""
385 if sanitizer.is_safe_uri(mo.group(2)):
386 return mo.group(2)
387 else:
388 return "unsafe_uri_detected"
389
390 - def alias(self,mo,element_store):
391 """Returns the string for the content of the Element."""
392 return self.href(mo)
393
394
396
397 """Used to find url type links inside a link.
398
399 The scope of these is within link markup only (i.e., [[url]]
400
401 >>> url_link = URLLink('a','',[],'|')
402 >>> mo = url_link.regexp.search(" http://www.google.com| here ")
403 >>> url_link.href(mo)
404 'http://www.google.com'
405 >>> url_link._build(mo,{}).generate().render()
406 '<a href="http://www.google.com">here</a>'
407
408 """
409
410 - def __init__(self, tag,token,child_tags,delimiter):
411 super(URLLink,self).__init__(tag, token, child_tags)
412 self.delimiter = delimiter
413 self.regexp = re.compile(self.re_string())
414
416 protocol = r'^\s*((\w+?://|/)'
417 rest_of_url = r'\S*?)\s*'
418 alias = r'(' + re.escape(self.delimiter) + r' *(.*?))? *$'
419 return protocol + rest_of_url + alias
420
421 - def _build(self,mo,element_store):
422 return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store),
423 href=self.href(mo))
424
426 """Returns the string for the href attribute of the Element."""
427 if sanitizer.is_safe_uri(mo.group(1)):
428 return mo.group(1)
429 else:
430 return "unsafe_uri_detected"
431
432
433 - def alias(self,mo,element_store):
434 """Returns the string for the content of the Element."""
435 if not mo.group(4):
436 return self.href(mo)
437 else:
438 return fragmentize(mo.group(4),self.child_tags,element_store)
439
440
441
443
444 """Used to match interwiki links inside a link.
445
446 The search scope for these is only inside links.
447
448 >>> interwiki_link = InterWikiLink('a','',[],
449 ... delimiter1=':', delimiter2 = '|',
450 ... base_urls=dict(somewiki='http://somewiki.org/',
451 ... bigwiki='http://bigwiki.net/'),
452 ... links_funcs={},default_space_char='_',
453 ... space_chars={})
454 >>> mo = interwiki_link.regexp.search(" somewiki:Home Page|steve ")
455 >>> interwiki_link.href(mo)
456 'http://somewiki.org/Home_Page'
457 >>> interwiki_link.alias(mo,{})
458 ['steve']
459
460 """
461
462 - def __init__(self, tag, token, child_tags,delimiter1,
463 delimiter2,base_urls,links_funcs,default_space_char,space_chars):
464 super(InterWikiLink,self).__init__(tag, token, child_tags)
465 self.delimiter1 = delimiter1
466 self.delimiter2 = delimiter2
467 self.regexp = re.compile(self.re_string())
468 self.base_urls = base_urls
469 self.links_funcs = links_funcs
470 self.default_space_char = default_space_char
471 self.space_chars = space_chars
472
474 wiki_id = r'(\w+)'
475 optional_spaces = ' *'
476 page_name = r'(\S+?( \S+?)*)'
477 alias = r'(' + re.escape(self.delimiter2) + r' *(.*?))? *$'
478 return wiki_id + optional_spaces + re.escape(self.delimiter1) + \
479 optional_spaces + page_name + optional_spaces + \
480 alias
481
482 - def page_name(self,mo):
483 space_char = self.space_chars.get(mo.group(1),self.default_space_char)
484 return mo.group(2).replace(' ',space_char)
485
487 linktype = mo.group(1)
488 base_url = self.base_urls.get(linktype)
489 link_func = self.links_funcs.get(linktype)
490 if not (link_func or base_url):
491 return None
492 else:
493 href = self.page_name(mo)
494 if link_func:
495 href = link_func(href)
496 if base_url:
497 href = urlparse.urljoin(base_url, href)
498 return href
499
500 - def _build(self,mo,element_store):
501 if not self.href(mo):
502 return '[[' + mo.group(0) + ']]'
503 return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store),
504 href=self.href(mo))
505 - def alias(self,mo,element_store):
506 """Returns the string for the content of the Element."""
507 if not mo.group(5):
508 return ''.join([mo.group(1),self.delimiter1,mo.group(2)])
509 else:
510 return fragmentize(mo.group(5),self.child_tags,element_store)
511
512
513
515
516 """Used to match wiki links inside a link.
517
518 The search scope for these is only inside links.
519
520 >>> wiki_link = WikiLink('a','',[],'|',base_url='http://somewiki.org/',
521 ... space_char='_',class_func=None, path_func=None)
522 >>> mo = wiki_link.regexp.search(" Home Page |Home")
523 >>> wiki_link.href(mo)
524 'http://somewiki.org/Home_Page'
525 >>> wiki_link.alias(mo,{})
526 ['Home']
527
528 """
529
530 - def __init__(self, tag, token, child_tags,delimiter,
531 base_url,space_char,class_func,path_func):
532 super(WikiLink,self).__init__(tag, token, child_tags)
533 self.delimiter = delimiter
534 self.base_url = base_url
535 self.space_char = space_char
536 self.class_func = class_func
537 self.path_func = path_func
538 self.regexp = re.compile(self.re_string())
539
541 optional_spaces = ' *'
542 page_name = r'(\S+?( +\S+?)*?)'
543 alias = r'(' + re.escape(self.delimiter) + r' *(.*?))? *$'
544 return optional_spaces + page_name + optional_spaces + \
545 alias
546
547 - def page_name(self,mo):
548 return mo.group(1).replace(' ',self.space_char)
549
551 if self.path_func:
552 the_path = self.path_func(self.page_name(mo))
553 else:
554 the_path = self.page_name(mo)
555 return urlparse.urljoin(self.base_url, the_path)
556
557 - def _build(self,mo,element_store):
558 if self.class_func:
559 the_class = self.class_func(self.page_name(mo))
560 else:
561 the_class = None
562 return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store),
563 href=self.href(mo),
564 class_=the_class)
565
566 - def alias(self,mo,element_store):
567 """Returns the string for the content of the Element."""
568 if not mo.group(3):
569 return mo.group(1)
570 else:
571 return fragmentize(mo.group(4),self.child_tags,element_store)
572
573
574
575
577
578 """Block elements inherit form this class
579
580 Wiki elements wanting ``append_newline = True`` should use this
581 as the base also.
582
583 """
584
585 append_newline = True
586
587
588 -class List(BlockElement):
589
590 """Finds list (ordered, unordered, and definition) wiki elements.
591
592 group(1) of the match object includes all lines from the list
593 including newline characters.
594
595 """
596
597 - def __init__(self, tag, token,child_tags,stop_tokens):
598 super(List,self).__init__(tag, token, child_tags)
599 self.stop_tokens = stop_tokens
600 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
601
603 """This re_string is for finding generic block elements like
604 lists (ordered, unordered, and definition) that start with a
605 single token.
606 """
607 leading_whitespace = r'^([ \t]*'
608 only_one_token = re.escape(self.token)+'[^'+ re.escape(self.token) + ']'
609 rest_of_list = r'.*?\n)'
610
611
612 only_one_stop_token = '([' + re.escape(self.stop_tokens) + r'])(?!\3)'
613 look_ahead = '(?=([ \t]*' + only_one_stop_token + '|$))'
614 return leading_whitespace + only_one_token + rest_of_list + \
615 look_ahead
616
617
619 r"""Matches the current list item.
620
621 Everything up to the next same-level list item is matched.
622
623 >>> list_item = ListItem('li',[],'#*')
624 >>> mo = list_item.regexp.search("*one\n**one.1\n**one.2\n*two\n")
625 >>> mo.group(2)
626 'one\n**one.1\n**one.2'
627 >>> mo.group(0)
628 '*one\n**one.1\n**one.2'
629
630 """
631
632 append_newline = False
633
634 - def __init__(self, tag, child_tags, list_tokens):
635 """Constructor for list items.
636
637 :parameters"
638 list_tokens
639 A string that includes the tokens used for lists
640 """
641 super(ListItem,self).__init__(tag, token=None,
642 child_tags=child_tags)
643 self.list_tokens = list_tokens
644 self.regexp = re.compile(self.re_string(),re.DOTALL)
645
647 whitespace = r'[ \t]*'
648 item_start = '([*#]+)'
649 rest_of_item = r'(.*?)\n?'
650 start_of_same_level_item = r'\1(?![*#])'
651 look_ahead = r'(?=(\n' + whitespace + start_of_same_level_item + '|$))'
652 return whitespace + item_start + whitespace + \
653 rest_of_item + look_ahead
654
655 - def _build(self,mo,element_store):
656 return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(2),
657 self.child_tags,
658 element_store))
659
660
662
663 r"""Finds a list in the current list item.
664
665 >>> nested_ul = NestedList('ul','*',[])
666 >>> mo = nested_ul.regexp.search('one\n**one.1\n**one.2\n')
667 >>> mo.group(1)
668 '**one.1\n**one.2\n'
669 >>> mo.group(0) == mo.group(1)
670 True
671
672 """
673
674 - def __init__(self, tag, token,child_tags):
677
679 look_behind = r'(?<=\n)'
680 whitespace = r'(\s*'
681 rest_of_list = '.*$)'
682 return look_behind + '^' + whitespace + re.escape(self.token) + \
683 rest_of_list
684
685
687
688 r"""Processes definition terms.
689
690 >>> term = DefinitionTerm('dt',';',[],stop_token=':')
691 >>> mo1,mo2 = term.regexp.finditer(";term1\n:def1\n;term2:def2\n")
692 >>> mo1.group(1), mo2.group(1)
693 ('term1', 'term2')
694 >>> mo1.group(0), mo2.group(0)
695 (';term1\n', ';term2')
696
697 group(1) of the match object is the term line or up to the first ':'
698
699 """
700
701 - def __init__(self, tag, token,child_tags,stop_token):
702 super(DefinitionTerm,self).__init__(tag, token, child_tags)
703 self.stop_token = stop_token
704 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
705
707 leading_whitespace = r'^([ \t]*'
708
709 rest_of_list = r'.*?\n)'
710
711
712 return r'^[ \t]*' + re.escape(self.token) + r'[ \t]*(.*?' + \
713 re.escape(self.stop_token) + '?)\s*(\n|(?=(' + \
714 esc_neg_look + re.escape(self.stop_token) + r'|$)))'
715
716
718
719 r"""Processes definitions.
720
721 >>> definition = DefinitionDef('dd',':',[])
722 >>> mo1,mo2 = definition.regexp.finditer(":def1a\ndef1b\n:def2\n")
723 >>> mo1.group(1), mo2.group(1)
724 ('def1a\ndef1b', 'def2')
725 >>> mo1.group(0), mo2.group(0)
726 (':def1a\ndef1b\n', ':def2\n')
727
728 group(1) of the match object includes all lines from the defintion
729 up to the next definition.
730
731 """
732
733 - def __init__(self, tag, token,child_tags):
736
738 leading_whitespace = r'^([ \t]*'
739 rest_of_list = r'.*?\n)'
740 look_ahead = r'(?=([ \t]*' + re.escape(self.token) + r')|$)'
741 return r'^[ \t]*' + re.escape(self.token) + r'?[ \t]*(.+?)\s*\n(?=([ \t]*' + \
742 re.escape(self.token) + r')|$)'
743
744
746
747 """"This should be the last outer level wiki element to be "searched".
748
749 Anything that is left over will be placed in paragraphs.
750
751 """
752
754 super(Paragraph,self).__init__(tag,token=None, child_tags=child_tags)
755 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
756
759
760
762
763 r"""Finds heading wiki elements.
764
765 >>> h1 = Heading('h1','=',[])
766 >>> mo = h1.regexp.search('before\n = An important thing = \n after')
767 >>> mo.group(1)
768 'An important thing'
769 >>> mo.group(0)
770 ' = An important thing = \n'
771
772 """
773
774 - def __init__(self, tag, token, child_tags):
777
779 whitespace = r'[ \t]*'
780 neg_look_ahead = '(?!' + re.escape(self.token[0]) + ')'
781 content = '(.*?)'
782 trailing_markup = '(' + re.escape(self.token[0]) + r'+[ \t]*)?\n'
783 return '^' + whitespace + re.escape(self.token) + neg_look_ahead + \
784 whitespace + content + whitespace + trailing_markup
785
786
787 -class Table(BlockElement):
788
789 r"""Find tables.
790
791 >>> table = Table('table','|',[])
792 >>> mo = table.regexp.search("before\n | one | two |\n|one|two \n hi")
793 >>> mo.group(1)
794 ' | one | two |\n|one|two \n'
795 >>> mo.group(0) == mo.group(1)
796 True
797
798 """
799
800 - def __init__(self, tag, token, child_tags=[]):
801 super(Table,self).__init__(tag,token , child_tags)
802 self.regexp = re.compile(self.re_string(),re.MULTILINE)
803
805 whitespace = r'[ \t]*'
806 rest_of_line = r'.*?\n'
807 return '^((' + whitespace + re.escape(self.token) + \
808 rest_of_line + ')+)'
809
810
812
813 r"""Finds rows in a table.
814
815 >>> row = TableRow('tr','|',[])
816 >>> mo = row.regexp.search(' | one | two |\n|one|two \n')
817 >>> mo.group(1)
818 '| one | two '
819 >>> mo.group(0)
820 ' | one | two |\n'
821
822 """
823
824 - def __init__(self, tag, token, child_tags=[]):
827
829 whitespace = r'[ \t]*'
830 content = '(' + re.escape(self.token) + '.*?)'
831 trailing_token = re.escape(self.token) + '?'
832 return '^' + whitespace + content + trailing_token + \
833 whitespace + r'\n'
834
835
837
838 r"""Finds cells in a table row.
839
840 >>> cell = TableCell('td','|',[])
841 >>> mo = cell.regexp.search('| one | two ')
842 >>> mo.group(1)
843 'one'
844 >>> mo.group(0)
845 '| one '
846
847 """
848
849 - def __init__(self, tag, token, child_tags=[]):
852
854 whitespace = r'[ \t]*'
855 content = '(.*?)'
856 look_ahead = '((?=' + esc_neg_look + re.escape(self.token[0]) + ')|$)'
857 return esc_neg_look + re.escape(self.token) + whitespace + \
858 content + whitespace + look_ahead
859
860
861
862 -class Link(InlineElement):
863
864 """Finds and builds links."""
865
866 - def __init__(self, tag, token, child_tags):
869
870 - def _build(self,mo,element_store):
871
872 link = fragmentize(mo.group(1),self.child_tags,element_store)
873
874 if link:
875 return bldr.tag(link)
876 else:
877 return token[0] + mo.group(0) + token[-1]
878
879 -class Image(InlineElement):
880
881 """Processes image elements.
882
883 >>> img = Image('img',('{{','}}'),[], delimiter='|')
884 >>> mo = img.regexp.search('{{ picture.jpg | An image of a house }}')
885 >>> img._build(mo,{}).generate().render()
886 '<img src="picture.jpg" alt="An image of a house"/>'
887
888 """
889
890 - def __init__(self, tag, token, child_tags,delimiter):
891 super(Image,self).__init__(tag,token , child_tags)
892 self.regexp = re.compile(self.re_string())
893 self.delimiter = delimiter
894 self.src_regexp = re.compile(r'^\s*(\S+)\s*$')
895
896 - def _build(self,mo,element_store):
897 body = mo.group(1).split(self.delimiter,1)
898 src_mo = self.src_regexp.search(body[0])
899 if not src_mo:
900 return bldr.tag.span('Bad Image src')
901 if sanitizer.is_safe_uri(src_mo.group(1)):
902 link = src_mo.group(1)
903 else:
904 link = "unsafe_uri_detected"
905 if len(body) == 1:
906 alias = link
907 else:
908 alias = body[1].strip()
909 return bldr.tag.__getattr__(self.tag)(src=link ,alt=alias)
910
911
913
914 """Inline no-wiki.
915
916 When two or more end tokens are found together, only last marks
917 the end of the element.
918
919 This element must be on a single line.
920
921 """
922
923 - def __init__(self, tag, token, child_tags=[]):
926
927 - def _build(self,mo,element_store):
928 if self.tag:
929 return bldr.tag.__getattr__(self.tag)(
930 fragmentize(mo.group(1), self.child_tags,
931 element_store,
932 remove_escapes=False))
933 else:
934 return bldr.tag(fragmentize(mo.group(1),self.child_tags,
935 element_store,
936 remove_escapes=False))
937
939 if isinstance(self.token,str):
940 content = '(.+?' + re.escape(self.token[-1]) + '*)'
941 return esc_neg_look + re.escape(self.token) + \
942 content + re.escape(self.token)
943 else:
944 content = '(.+?' + re.escape(self.token[1][-1]) + '*)'
945 return esc_neg_look + re.escape(self.token[0]) + \
946 content + re.escape(self.token[1])
947
948
950
951 """A preformatted block.
952
953 If a closing token is found on a line with a space as the first
954 character, the space will be removed from the output.
955
956 """
957
958 - def __init__(self, tag, token, child_tags=[]):
959 super(PreBlock,self).__init__(tag,token , child_tags)
960 self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE)
961 self.regexp2 = re.compile(self.re_string2(),re.MULTILINE)
962
964 if isinstance(self.token,str):
965 return '^' + re.escape(self.token) + r'\s*?\n(.*?\n)' + \
966 re.escape(self.token) + r'\s*?\n'
967 else:
968 start = '^' + re.escape(self.token[0]) + r'\s*?\n'
969 content = r'(.+?\n)'
970 end = re.escape(self.token[1]) + r'\s*?\n'
971 return start + content + end
972
974 """Finds a closing token with a space at the start of the line."""
975 if isinstance(self.token,str):
976 return r'^ (\s*?' + re.escape(self.token) + r'\s*?\n)'
977 else:
978 return r'^ (\s*?' + re.escape(self.token[1]) + r'\s*?\n)'
979
980 - def _build(self,mo,element_store):
981 match = self.regexp2.sub(r'\1',mo.group(1))
982
983 return bldr.tag.__getattr__(self.tag)(
984 fragmentize(match,self.child_tags,
985 element_store,remove_escapes=False))
986
987
989
990 """Element on a line by itself with no content (e.g., <hr/>)"""
991
992 - def __init__(self, tag, token, child_tags):
995
997 return r'^(\s*?' + re.escape(self.token) + r'\s*?\n)'
998
999 - def _build(self,mo,element_store):
1000 return bldr.tag.__getattr__(self.tag)()
1001
1003
1004 """A place holder on a line by itself or with other place holders.
1005 This is used to avoid these being enclosed in a paragraph.
1006
1007 """
1008 append_newline = False
1009 - def __init__(self, tag, token, child_tags):
1012
1014 place_holder = re.escape(self.token[0]) + r'\S*?' + re.escape(self.token[1])
1015 return r'^\s*?(' + place_holder + r'\s*$)+\s*?\n'
1016
1017 - def _build(self,mo,element_store):
1018 return bldr.tag(fragmentize(mo.group(0),[],element_store))
1019
1021
1022 """Blank lines divide elements but don't add any output."""
1023
1025 super(BlankLine,self).__init__(tag=None,token='' , child_tags=[])
1026 self.regexp = re.compile(self.re_string(),re.MULTILINE)
1027
1030
1031 - def _build(self,mo,element_store):
1033
1034
1036
1037 """An inline line break."""
1038
1039
1040 - def __init__(self,tag, token, child_tags=[]):
1043
1046
1047 - def _build(self,mo,element_store):
1048 return bldr.tag.__getattr__(self.tag)()
1049
1050
1051
1053 import doctest
1054 doctest.testmod()
1055
1056 if __name__ == "__main__":
1057 _test()
1058