1
2
3
4
5
6
7
8
9 import re
10
11 import genshi.builder as bldr
12
13 __docformat__ = 'restructuredtext en'
14
15 escape_char = '~'
16 esc_neg_look = '(?<!' + re.escape(escape_char) + ')'
17 esc_to_remove = re.compile(''.join([r'(?<!',re.escape(escape_char),')',re.escape(escape_char),r'(?!([ \n]|$))']))
18 place_holder_re = re.compile(r'<<<(-?\d+?)>>>')
19 max_blank_lines = 250
20
22 frags = []
23 mo = place_holder_re.search(text)
24 while mo:
25 if mo.start():
26 frags.append(text[:mo.start()])
27 frags.append(element_store.get(mo.group(1),
28 mo.group(1).join(['<<<','>>>'])))
29 if mo.end() < len(text):
30 text = text[mo.end():]
31 else:
32 break
33 mo = place_holder_re.search(text)
34 else:
35 frags.append(text)
36 return frags
37
38
39 -def fragmentize(text,wiki_elements, element_store,remove_escapes=True):
40
41 """Takes a string of wiki markup and outputs a list of genshi
42 Fragments (Elements and strings).
43
44 This recursive function, with help from the WikiElement objects,
45 does almost all the parsing.
46
47 When no WikiElement objects are supplied, escapes are removed from
48 ``text`` (except if remove_escapes=True) and it is
49 returned as-is. This is the only way for recursion to stop.
50
51 :parameters:
52 text
53 the text to be parsed
54 wiki_elements
55 list of WikiElement objects to be searched for
56 remove_escapes
57 If False, escapes will not be removed
58
59 """
60
61 while wiki_elements:
62
63
64 if isinstance(wiki_elements[0],(list,tuple)):
65 x = None
66 mo = None
67 for element in wiki_elements[0]:
68 m = element.regexp.search(text)
69 if m:
70 if x is None:
71 x,wiki_element,mo = m.start(),element,m
72 elif m.start() < x:
73 x,wiki_element,mo = m.start(),element,m
74 else:
75 wiki_element = wiki_elements[0]
76 mo = wiki_element.regexp.search(text)
77
78 if mo:
79 frags = wiki_element._process(mo, text, wiki_elements, element_store)
80 break
81 else:
82 wiki_elements = wiki_elements[1:]
83
84
85 else:
86 if remove_escapes:
87 text = esc_to_remove.sub('',text)
88 frags = fill_from_store(text,element_store)
89
90 return frags
91
92
94
95 """Instantiates a parser with specified behaviour"""
96
97 - def __init__(self,dialect, method='xhtml', strip_whitespace=False, encoding='utf-8'):
98 """Constructor for Parser objects.
99
100 :parameters:
101 dialect
102 A Creole instance
103 method
104 This value is passed to genshies Steam.render(). Possible values
105 include ``xhtml``, ``html``, and ``xml``.
106 strip_whitespace
107 This value is passed Genshies Steam.render().
108 encoding
109 This value is passed Genshies Steam.render().
110 """
111 self.dialect = dialect
112 self.method = method
113 self.strip_whitespace = strip_whitespace
114 self.encoding=encoding
115
116 - def generate(self,text,element_store=None,context='block'):
117 """Returns a Genshi Stream.
118
119 :parameters:
120 text
121 The text to be parsed.
122 context
123 This is useful for marco development where (for example) supression
124 of paragraph tags is desired. Can be 'inline', 'block', or a list
125 of WikiElement objects (use with caution).
126 element_store
127 Internal dictionary that's passed around a lot ;)
128
129 See Genshi documentation for additional keyword arguments.
130
131 """
132 if element_store is None:
133 element_store = {}
134 if not isinstance(context,list):
135 if context == 'block':
136 top_level_elements = self.dialect.block_elements
137 do_preprocess = True
138 elif context == 'inline':
139 top_level_elements = self.dialect.inline_elements
140 do_preprocess = False
141 else:
142 top_level_elements = context
143 do_preprocess = False
144
145 if do_preprocess:
146 chunks = preprocess(text,self.dialect)
147 else:
148 chunks = [text]
149
150 return bldr.tag(*[fragmentize(text,top_level_elements,element_store) for text in chunks]).generate()
151
152 - def render(self,text,element_store=None,context='block',**kwargs):
153 """Returns final output string (e.g., xhtml)
154
155 See generate() (above) and Genshi documentation for keyword arguments.
156 """
157 if element_store is None:
158 element_store = {}
159 return self.generate(text,element_store,context).render(method=self.method,strip_whitespace=self.strip_whitespace,
160 encoding=self.encoding,**kwargs)
161
162 - def __call__(self,text,element_store=None,context='block'):
163 """Wrapper for the render method. Returns final output string.
164
165 See generate() (above) and Genshi documentation for keyword arguments.
166 """
167
168 if element_store is None:
169 element_store = {}
170 return self.render(text,element_store,context)
171
172
174 """This should generally be called before fragmentize().
175
176 :parameters:
177 text
178 text to be processsed.
179 dialect
180 a ``Creole`` object.
181 """
182 text = text.replace("\r\n", "\n")
183 text = text.replace("\r", "\n")
184 text = ''.join([text.rstrip(),'\n'])
185 blank_lines = list(dialect.blank_line.regexp.finditer(text))
186 if len(blank_lines) > max_blank_lines:
187 return chunk(text,blank_lines,[dialect.pre,dialect.bodied_block_macro],max_blank_lines)
188
189 return [text]
190
191
192 -def chunk(text, blank_lines, hard_elements, limit):
193 """Safely breaks large Creole documents into a list of smaller
194 ones (strings)
195 """
196 hard_spans = []
197 for e in hard_elements:
198 for mo in e.regexp.finditer(text):
199 hard_spans.append(mo.span())
200
201 hard_chars = []
202 for x,y in hard_spans:
203 hard_chars.extend(range(x,y))
204 hard_chars = set(hard_chars)
205
206 chunks = []
207 start = 0
208 for i in range(len(blank_lines)/limit):
209 for mo in blank_lines[limit/2 + i*limit:limit*3/2+i*limit:10]:
210 if mo.start() not in hard_chars:
211 chunks.append(text[start:mo.start()])
212 start = mo.end()
213 break
214 chunks.append(text[start:])
215
216 return chunks
217
218
219
221 import doctest
222 doctest.testmod()
223
224 if __name__ == "__main__":
225 _test()
226