##// END OF EJS Templates
Fixed: Wiki section edit escapes pre tags inside pre blocks (#9673)....
Jean-Philippe Lang -
r7856:d66c8faa9bc5
parent child
Show More
@@ -1,1195 +1,1195
1 1 # vim:ts=4:sw=4:
2 2 # = RedCloth - Textile and Markdown Hybrid for Ruby
3 3 #
4 4 # Homepage:: http://whytheluckystiff.net/ruby/redcloth/
5 5 # Author:: why the lucky stiff (http://whytheluckystiff.net/)
6 6 # Copyright:: (cc) 2004 why the lucky stiff (and his puppet organizations.)
7 7 # License:: BSD
8 8 #
9 9 # (see http://hobix.com/textile/ for a Textile Reference.)
10 10 #
11 11 # Based on (and also inspired by) both:
12 12 #
13 13 # PyTextile: http://diveintomark.org/projects/textile/textile.py.txt
14 14 # Textism for PHP: http://www.textism.com/tools/textile/
15 15 #
16 16 #
17 17
18 18 # = RedCloth
19 19 #
20 20 # RedCloth is a Ruby library for converting Textile and/or Markdown
21 21 # into HTML. You can use either format, intermingled or separately.
22 22 # You can also extend RedCloth to honor your own custom text stylings.
23 23 #
24 24 # RedCloth users are encouraged to use Textile if they are generating
25 25 # HTML and to use Markdown if others will be viewing the plain text.
26 26 #
27 27 # == What is Textile?
28 28 #
29 29 # Textile is a simple formatting style for text
30 30 # documents, loosely based on some HTML conventions.
31 31 #
32 32 # == Sample Textile Text
33 33 #
34 34 # h2. This is a title
35 35 #
36 36 # h3. This is a subhead
37 37 #
38 38 # This is a bit of paragraph.
39 39 #
40 40 # bq. This is a blockquote.
41 41 #
42 42 # = Writing Textile
43 43 #
44 44 # A Textile document consists of paragraphs. Paragraphs
45 45 # can be specially formatted by adding a small instruction
46 46 # to the beginning of the paragraph.
47 47 #
48 48 # h[n]. Header of size [n].
49 49 # bq. Blockquote.
50 50 # # Numeric list.
51 51 # * Bulleted list.
52 52 #
53 53 # == Quick Phrase Modifiers
54 54 #
55 55 # Quick phrase modifiers are also included, to allow formatting
56 56 # of small portions of text within a paragraph.
57 57 #
58 58 # \_emphasis\_
59 59 # \_\_italicized\_\_
60 60 # \*strong\*
61 61 # \*\*bold\*\*
62 62 # ??citation??
63 63 # -deleted text-
64 64 # +inserted text+
65 65 # ^superscript^
66 66 # ~subscript~
67 67 # @code@
68 68 # %(classname)span%
69 69 #
70 70 # ==notextile== (leave text alone)
71 71 #
72 72 # == Links
73 73 #
74 74 # To make a hypertext link, put the link text in "quotation
75 75 # marks" followed immediately by a colon and the URL of the link.
76 76 #
77 77 # Optional: text in (parentheses) following the link text,
78 78 # but before the closing quotation mark, will become a Title
79 79 # attribute for the link, visible as a tool tip when a cursor is above it.
80 80 #
81 81 # Example:
82 82 #
83 83 # "This is a link (This is a title) ":http://www.textism.com
84 84 #
85 85 # Will become:
86 86 #
87 87 # <a href="http://www.textism.com" title="This is a title">This is a link</a>
88 88 #
89 89 # == Images
90 90 #
91 91 # To insert an image, put the URL for the image inside exclamation marks.
92 92 #
93 93 # Optional: text that immediately follows the URL in (parentheses) will
94 94 # be used as the Alt text for the image. Images on the web should always
95 95 # have descriptive Alt text for the benefit of readers using non-graphical
96 96 # browsers.
97 97 #
98 98 # Optional: place a colon followed by a URL immediately after the
99 99 # closing ! to make the image into a link.
100 100 #
101 101 # Example:
102 102 #
103 103 # !http://www.textism.com/common/textist.gif(Textist)!
104 104 #
105 105 # Will become:
106 106 #
107 107 # <img src="http://www.textism.com/common/textist.gif" alt="Textist" />
108 108 #
109 109 # With a link:
110 110 #
111 111 # !/common/textist.gif(Textist)!:http://textism.com
112 112 #
113 113 # Will become:
114 114 #
115 115 # <a href="http://textism.com"><img src="/common/textist.gif" alt="Textist" /></a>
116 116 #
117 117 # == Defining Acronyms
118 118 #
119 119 # HTML allows authors to define acronyms via the tag. The definition appears as a
120 120 # tool tip when a cursor hovers over the acronym. A crucial aid to clear writing,
121 121 # this should be used at least once for each acronym in documents where they appear.
122 122 #
123 123 # To quickly define an acronym in Textile, place the full text in (parentheses)
124 124 # immediately following the acronym.
125 125 #
126 126 # Example:
127 127 #
128 128 # ACLU(American Civil Liberties Union)
129 129 #
130 130 # Will become:
131 131 #
132 132 # <acronym title="American Civil Liberties Union">ACLU</acronym>
133 133 #
134 134 # == Adding Tables
135 135 #
136 136 # In Textile, simple tables can be added by seperating each column by
137 137 # a pipe.
138 138 #
139 139 # |a|simple|table|row|
140 140 # |And|Another|table|row|
141 141 #
142 142 # Attributes are defined by style definitions in parentheses.
143 143 #
144 144 # table(border:1px solid black).
145 145 # (background:#ddd;color:red). |{}| | | |
146 146 #
147 147 # == Using RedCloth
148 148 #
149 149 # RedCloth is simply an extension of the String class, which can handle
150 150 # Textile formatting. Use it like a String and output HTML with its
151 151 # RedCloth#to_html method.
152 152 #
153 153 # doc = RedCloth.new "
154 154 #
155 155 # h2. Test document
156 156 #
157 157 # Just a simple test."
158 158 #
159 159 # puts doc.to_html
160 160 #
161 161 # By default, RedCloth uses both Textile and Markdown formatting, with
162 162 # Textile formatting taking precedence. If you want to turn off Markdown
163 163 # formatting, to boost speed and limit the processor:
164 164 #
165 165 # class RedCloth::Textile.new( str )
166 166
167 167 class RedCloth3 < String
168 168
169 169 VERSION = '3.0.4'
170 170 DEFAULT_RULES = [:textile, :markdown]
171 171
172 172 #
173 173 # Two accessor for setting security restrictions.
174 174 #
175 175 # This is a nice thing if you're using RedCloth for
176 176 # formatting in public places (e.g. Wikis) where you
177 177 # don't want users to abuse HTML for bad things.
178 178 #
179 179 # If +:filter_html+ is set, HTML which wasn't
180 180 # created by the Textile processor will be escaped.
181 181 #
182 182 # If +:filter_styles+ is set, it will also disable
183 183 # the style markup specifier. ('{color: red}')
184 184 #
185 185 attr_accessor :filter_html, :filter_styles
186 186
187 187 #
188 188 # Accessor for toggling hard breaks.
189 189 #
190 190 # If +:hard_breaks+ is set, single newlines will
191 191 # be converted to HTML break tags. This is the
192 192 # default behavior for traditional RedCloth.
193 193 #
194 194 attr_accessor :hard_breaks
195 195
196 196 # Accessor for toggling lite mode.
197 197 #
198 198 # In lite mode, block-level rules are ignored. This means
199 199 # that tables, paragraphs, lists, and such aren't available.
200 200 # Only the inline markup for bold, italics, entities and so on.
201 201 #
202 202 # r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
203 203 # r.to_html
204 204 # #=> "And then? She <strong>fell</strong>!"
205 205 #
206 206 attr_accessor :lite_mode
207 207
208 208 #
209 209 # Accessor for toggling span caps.
210 210 #
211 211 # Textile places `span' tags around capitalized
212 212 # words by default, but this wreaks havoc on Wikis.
213 213 # If +:no_span_caps+ is set, this will be
214 214 # suppressed.
215 215 #
216 216 attr_accessor :no_span_caps
217 217
218 218 #
219 219 # Establishes the markup predence. Available rules include:
220 220 #
221 221 # == Textile Rules
222 222 #
223 223 # The following textile rules can be set individually. Or add the complete
224 224 # set of rules with the single :textile rule, which supplies the rule set in
225 225 # the following precedence:
226 226 #
227 227 # refs_textile:: Textile references (i.e. [hobix]http://hobix.com/)
228 228 # block_textile_table:: Textile table block structures
229 229 # block_textile_lists:: Textile list structures
230 230 # block_textile_prefix:: Textile blocks with prefixes (i.e. bq., h2., etc.)
231 231 # inline_textile_image:: Textile inline images
232 232 # inline_textile_link:: Textile inline links
233 233 # inline_textile_span:: Textile inline spans
234 234 # glyphs_textile:: Textile entities (such as em-dashes and smart quotes)
235 235 #
236 236 # == Markdown
237 237 #
238 238 # refs_markdown:: Markdown references (for example: [hobix]: http://hobix.com/)
239 239 # block_markdown_setext:: Markdown setext headers
240 240 # block_markdown_atx:: Markdown atx headers
241 241 # block_markdown_rule:: Markdown horizontal rules
242 242 # block_markdown_bq:: Markdown blockquotes
243 243 # block_markdown_lists:: Markdown lists
244 244 # inline_markdown_link:: Markdown links
245 245 attr_accessor :rules
246 246
247 247 # Returns a new RedCloth object, based on _string_ and
248 248 # enforcing all the included _restrictions_.
249 249 #
250 250 # r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
251 251 # r.to_html
252 252 # #=>"<h1>A &lt;b&gt;bold&lt;/b&gt; man</h1>"
253 253 #
254 254 def initialize( string, restrictions = [] )
255 255 restrictions.each { |r| method( "#{ r }=" ).call( true ) }
256 256 super( string )
257 257 end
258 258
259 259 #
260 260 # Generates HTML from the Textile contents.
261 261 #
262 262 # r = RedCloth.new( "And then? She *fell*!" )
263 263 # r.to_html( true )
264 264 # #=>"And then? She <strong>fell</strong>!"
265 265 #
266 266 def to_html( *rules )
267 267 rules = DEFAULT_RULES if rules.empty?
268 268 # make our working copy
269 269 text = self.dup
270 270
271 271 @urlrefs = {}
272 272 @shelf = []
273 273 textile_rules = [:block_textile_table, :block_textile_lists,
274 274 :block_textile_prefix, :inline_textile_image, :inline_textile_link,
275 275 :inline_textile_code, :inline_textile_span, :glyphs_textile]
276 276 markdown_rules = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
277 277 :block_markdown_bq, :block_markdown_lists,
278 278 :inline_markdown_reflink, :inline_markdown_link]
279 279 @rules = rules.collect do |rule|
280 280 case rule
281 281 when :markdown
282 282 markdown_rules
283 283 when :textile
284 284 textile_rules
285 285 else
286 286 rule
287 287 end
288 288 end.flatten
289 289
290 290 # standard clean up
291 291 incoming_entities text
292 292 clean_white_space text
293 293
294 294 # start processor
295 295 @pre_list = []
296 296 rip_offtags text
297 297 no_textile text
298 298 escape_html_tags text
299 299 # need to do this before #hard_break and #blocks
300 300 block_textile_quotes text unless @lite_mode
301 301 hard_break text
302 302 unless @lite_mode
303 303 refs text
304 304 blocks text
305 305 end
306 306 inline text
307 307 smooth_offtags text
308 308
309 309 retrieve text
310 310
311 311 text.gsub!( /<\/?notextile>/, '' )
312 312 text.gsub!( /x%x%/, '&#38;' )
313 313 clean_html text if filter_html
314 314 text.strip!
315 315 text
316 316
317 317 end
318 318
319 319 #######
320 320 private
321 321 #######
322 322 #
323 323 # Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
324 324 # (from PyTextile)
325 325 #
326 326 TEXTILE_TAGS =
327 327
328 328 [[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
329 329 [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
330 330 [140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
331 331 [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
332 332 [153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
333 333
334 334 collect! do |a, b|
335 335 [a.chr, ( b.zero? and "" or "&#{ b };" )]
336 336 end
337 337
338 338 #
339 339 # Regular expressions to convert to HTML.
340 340 #
341 341 A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
342 342 A_VLGN = /[\-^~]/
343 343 C_CLAS = '(?:\([^)]+\))'
344 344 C_LNGE = '(?:\[[^\[\]]+\])'
345 345 C_STYL = '(?:\{[^}]+\})'
346 346 S_CSPN = '(?:\\\\\d+)'
347 347 S_RSPN = '(?:/\d+)'
348 348 A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
349 349 S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
350 350 C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
351 351 # PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
352 352 PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
353 353 PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
354 354 PUNCT_Q = Regexp::quote( '*-_+^~%' )
355 355 HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
356 356
357 357 # Text markup tags, don't conflict with block tags
358 358 SIMPLE_HTML_TAGS = [
359 359 'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
360 360 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
361 361 'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
362 362 ]
363 363
364 364 QTAGS = [
365 365 ['**', 'b', :limit],
366 366 ['*', 'strong', :limit],
367 367 ['??', 'cite', :limit],
368 368 ['-', 'del', :limit],
369 369 ['__', 'i', :limit],
370 370 ['_', 'em', :limit],
371 371 ['%', 'span', :limit],
372 372 ['+', 'ins', :limit],
373 373 ['^', 'sup', :limit],
374 374 ['~', 'sub', :limit]
375 375 ]
376 376 QTAGS_JOIN = QTAGS.map {|rc, ht, rtype| Regexp::quote rc}.join('|')
377 377
378 378 QTAGS.collect! do |rc, ht, rtype|
379 379 rcq = Regexp::quote rc
380 380 re =
381 381 case rtype
382 382 when :limit
383 383 /(^|[>\s\(]) # sta
384 384 (?!\-\-)
385 385 (#{QTAGS_JOIN}|) # oqs
386 386 (#{rcq}) # qtag
387 387 (\w|[^\s].*?[^\s]) # content
388 388 (?!\-\-)
389 389 #{rcq}
390 390 (#{QTAGS_JOIN}|) # oqa
391 391 (?=[[:punct:]]|<|\s|\)|$)/x
392 392 else
393 393 /(#{rcq})
394 394 (#{C})
395 395 (?::(\S+))?
396 396 (\w|[^\s\-].*?[^\s\-])
397 397 #{rcq}/xm
398 398 end
399 399 [rc, ht, re, rtype]
400 400 end
401 401
402 402 # Elements to handle
403 403 GLYPHS = [
404 404 # [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
405 405 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1&#8217;' ], # single closing
406 406 # [ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '&#8217;' ], # single closing
407 407 # [ /\'/, '&#8216;' ], # single opening
408 408 # [ /</, '&lt;' ], # less-than
409 409 # [ />/, '&gt;' ], # greater-than
410 410 # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
411 411 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1&#8221;' ], # double closing
412 412 # [ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '&#8221;' ], # double closing
413 413 # [ /"/, '&#8220;' ], # double opening
414 414 # [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
415 415 # [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
416 416 # [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
417 417 # [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
418 418 # [ /\s->\s/, ' &rarr; ' ], # right arrow
419 419 # [ /\s-\s/, ' &#8211; ' ], # en dash
420 420 # [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
421 421 # [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
422 422 # [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
423 423 # [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
424 424 ]
425 425
426 426 H_ALGN_VALS = {
427 427 '<' => 'left',
428 428 '=' => 'center',
429 429 '>' => 'right',
430 430 '<>' => 'justify'
431 431 }
432 432
433 433 V_ALGN_VALS = {
434 434 '^' => 'top',
435 435 '-' => 'middle',
436 436 '~' => 'bottom'
437 437 }
438 438
439 439 #
440 440 # Flexible HTML escaping
441 441 #
442 442 def htmlesc( str, mode=:Quotes )
443 443 if str
444 444 str.gsub!( '&', '&amp;' )
445 445 str.gsub!( '"', '&quot;' ) if mode != :NoQuotes
446 446 str.gsub!( "'", '&#039;' ) if mode == :Quotes
447 447 str.gsub!( '<', '&lt;')
448 448 str.gsub!( '>', '&gt;')
449 449 end
450 450 str
451 451 end
452 452
453 453 # Search and replace for Textile glyphs (quotes, dashes, other symbols)
454 454 def pgl( text )
455 455 #GLYPHS.each do |re, resub, tog|
456 456 # next if tog and method( tog ).call
457 457 # text.gsub! re, resub
458 458 #end
459 459 text.gsub!(/\b([A-Z][A-Z0-9]{1,})\b(?:[(]([^)]*)[)])/) do |m|
460 460 "<acronym title=\"#{htmlesc $2}\">#{$1}</acronym>"
461 461 end
462 462 end
463 463
464 464 # Parses Textile attribute lists and builds an HTML attribute string
465 465 def pba( text_in, element = "" )
466 466
467 467 return '' unless text_in
468 468
469 469 style = []
470 470 text = text_in.dup
471 471 if element == 'td'
472 472 colspan = $1 if text =~ /\\(\d+)/
473 473 rowspan = $1 if text =~ /\/(\d+)/
474 474 style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
475 475 end
476 476
477 477 style << "#{ htmlesc $1 };" if text.sub!( /\{([^}]*)\}/, '' ) && !filter_styles
478 478
479 479 lang = $1 if
480 480 text.sub!( /\[([^)]+?)\]/, '' )
481 481
482 482 cls = $1 if
483 483 text.sub!( /\(([^()]+?)\)/, '' )
484 484
485 485 style << "padding-left:#{ $1.length }em;" if
486 486 text.sub!( /([(]+)/, '' )
487 487
488 488 style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
489 489
490 490 style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
491 491
492 492 cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
493 493
494 494 atts = ''
495 495 atts << " style=\"#{ style.join }\"" unless style.empty?
496 496 atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
497 497 atts << " lang=\"#{ lang }\"" if lang
498 498 atts << " id=\"#{ id }\"" if id
499 499 atts << " colspan=\"#{ colspan }\"" if colspan
500 500 atts << " rowspan=\"#{ rowspan }\"" if rowspan
501 501
502 502 atts
503 503 end
504 504
505 505 TABLE_RE = /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
506 506
507 507 # Parses a Textile table block, building HTML from the result.
508 508 def block_textile_table( text )
509 509 text.gsub!( TABLE_RE ) do |matches|
510 510
511 511 tatts, fullrow = $~[1..2]
512 512 tatts = pba( tatts, 'table' )
513 513 tatts = shelve( tatts ) if tatts
514 514 rows = []
515 515
516 516 fullrow.each_line do |row|
517 517 ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
518 518 cells = []
519 519 row.split( /(\|)(?![^\[\|]*\]\])/ )[1..-2].each do |cell|
520 520 next if cell == '|'
521 521 ctyp = 'd'
522 522 ctyp = 'h' if cell =~ /^_/
523 523
524 524 catts = ''
525 525 catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
526 526
527 527 catts = shelve( catts ) if catts
528 528 cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
529 529 end
530 530 ratts = shelve( ratts ) if ratts
531 531 rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
532 532 end
533 533 "\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
534 534 end
535 535 end
536 536
537 537 LISTS_RE = /^([#*]+?#{C} .*?)$(?![^#*])/m
538 538 LISTS_CONTENT_RE = /^([#*]+)(#{A}#{C}) (.*)$/m
539 539
540 540 # Parses Textile lists and generates HTML
541 541 def block_textile_lists( text )
542 542 text.gsub!( LISTS_RE ) do |match|
543 543 lines = match.split( /\n/ )
544 544 last_line = -1
545 545 depth = []
546 546 lines.each_with_index do |line, line_id|
547 547 if line =~ LISTS_CONTENT_RE
548 548 tl,atts,content = $~[1..3]
549 549 if depth.last
550 550 if depth.last.length > tl.length
551 551 (depth.length - 1).downto(0) do |i|
552 552 break if depth[i].length == tl.length
553 553 lines[line_id - 1] << "</li>\n\t</#{ lT( depth[i] ) }l>\n\t"
554 554 depth.pop
555 555 end
556 556 end
557 557 if depth.last and depth.last.length == tl.length
558 558 lines[line_id - 1] << '</li>'
559 559 end
560 560 end
561 561 unless depth.last == tl
562 562 depth << tl
563 563 atts = pba( atts )
564 564 atts = shelve( atts ) if atts
565 565 lines[line_id] = "\t<#{ lT(tl) }l#{ atts }>\n\t<li>#{ content }"
566 566 else
567 567 lines[line_id] = "\t\t<li>#{ content }"
568 568 end
569 569 last_line = line_id
570 570
571 571 else
572 572 last_line = line_id
573 573 end
574 574 if line_id - last_line > 1 or line_id == lines.length - 1
575 575 depth.delete_if do |v|
576 576 lines[last_line] << "</li>\n\t</#{ lT( v ) }l>"
577 577 end
578 578 end
579 579 end
580 580 lines.join( "\n" )
581 581 end
582 582 end
583 583
584 584 QUOTES_RE = /(^>+([^\n]*?)(\n|$))+/m
585 585 QUOTES_CONTENT_RE = /^([> ]+)(.*)$/m
586 586
587 587 def block_textile_quotes( text )
588 588 text.gsub!( QUOTES_RE ) do |match|
589 589 lines = match.split( /\n/ )
590 590 quotes = ''
591 591 indent = 0
592 592 lines.each do |line|
593 593 line =~ QUOTES_CONTENT_RE
594 594 bq,content = $1, $2
595 595 l = bq.count('>')
596 596 if l != indent
597 597 quotes << ("\n\n" + (l>indent ? '<blockquote>' * (l-indent) : '</blockquote>' * (indent-l)) + "\n\n")
598 598 indent = l
599 599 end
600 600 quotes << (content + "\n")
601 601 end
602 602 quotes << ("\n" + '</blockquote>' * indent + "\n\n")
603 603 quotes
604 604 end
605 605 end
606 606
607 607 CODE_RE = /(\W)
608 608 @
609 609 (?:\|(\w+?)\|)?
610 610 (.+?)
611 611 @
612 612 (?=\W)/x
613 613
614 614 def inline_textile_code( text )
615 615 text.gsub!( CODE_RE ) do |m|
616 616 before,lang,code,after = $~[1..4]
617 617 lang = " lang=\"#{ lang }\"" if lang
618 618 rip_offtags( "#{ before }<code#{ lang }>#{ code }</code>#{ after }", false )
619 619 end
620 620 end
621 621
622 622 def lT( text )
623 623 text =~ /\#$/ ? 'o' : 'u'
624 624 end
625 625
626 626 def hard_break( text )
627 627 text.gsub!( /(.)\n(?!\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
628 628 end
629 629
630 630 BLOCKS_GROUP_RE = /\n{2,}(?! )/m
631 631
632 632 def blocks( text, deep_code = false )
633 633 text.replace( text.split( BLOCKS_GROUP_RE ).collect do |blk|
634 634 plain = blk !~ /\A[#*> ]/
635 635
636 636 # skip blocks that are complex HTML
637 637 if blk =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
638 638 blk
639 639 else
640 640 # search for indentation levels
641 641 blk.strip!
642 642 if blk.empty?
643 643 blk
644 644 else
645 645 code_blk = nil
646 646 blk.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
647 647 flush_left iblk
648 648 blocks iblk, plain
649 649 iblk.gsub( /^(\S)/, "\t\\1" )
650 650 if plain
651 651 code_blk = iblk; ""
652 652 else
653 653 iblk
654 654 end
655 655 end
656 656
657 657 block_applied = 0
658 658 @rules.each do |rule_name|
659 659 block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( blk ) )
660 660 end
661 661 if block_applied.zero?
662 662 if deep_code
663 663 blk = "\t<pre><code>#{ blk }</code></pre>"
664 664 else
665 665 blk = "\t<p>#{ blk }</p>"
666 666 end
667 667 end
668 668 # hard_break blk
669 669 blk + "\n#{ code_blk }"
670 670 end
671 671 end
672 672
673 673 end.join( "\n\n" ) )
674 674 end
675 675
676 676 def textile_bq( tag, atts, cite, content )
677 677 cite, cite_title = check_refs( cite )
678 678 cite = " cite=\"#{ cite }\"" if cite
679 679 atts = shelve( atts ) if atts
680 680 "\t<blockquote#{ cite }>\n\t\t<p#{ atts }>#{ content }</p>\n\t</blockquote>"
681 681 end
682 682
683 683 def textile_p( tag, atts, cite, content )
684 684 atts = shelve( atts ) if atts
685 685 "\t<#{ tag }#{ atts }>#{ content }</#{ tag }>"
686 686 end
687 687
688 688 alias textile_h1 textile_p
689 689 alias textile_h2 textile_p
690 690 alias textile_h3 textile_p
691 691 alias textile_h4 textile_p
692 692 alias textile_h5 textile_p
693 693 alias textile_h6 textile_p
694 694
695 695 def textile_fn_( tag, num, atts, cite, content )
696 696 atts << " id=\"fn#{ num }\" class=\"footnote\""
697 697 content = "<sup>#{ num }</sup> #{ content }"
698 698 atts = shelve( atts ) if atts
699 699 "\t<p#{ atts }>#{ content }</p>"
700 700 end
701 701
702 702 BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/m
703 703
704 704 def block_textile_prefix( text )
705 705 if text =~ BLOCK_RE
706 706 tag,tagpre,num,atts,cite,content = $~[1..6]
707 707 atts = pba( atts )
708 708
709 709 # pass to prefix handler
710 710 replacement = nil
711 711 if respond_to? "textile_#{ tag }", true
712 712 replacement = method( "textile_#{ tag }" ).call( tag, atts, cite, content )
713 713 elsif respond_to? "textile_#{ tagpre }_", true
714 714 replacement = method( "textile_#{ tagpre }_" ).call( tagpre, num, atts, cite, content )
715 715 end
716 716 text.gsub!( $& ) { replacement } if replacement
717 717 end
718 718 end
719 719
720 720 SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
721 721 def block_markdown_setext( text )
722 722 if text =~ SETEXT_RE
723 723 tag = if $2 == "="; "h1"; else; "h2"; end
724 724 blk, cont = "<#{ tag }>#{ $1 }</#{ tag }>", $'
725 725 blocks cont
726 726 text.replace( blk + cont )
727 727 end
728 728 end
729 729
730 730 ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
731 731 [ ]*
732 732 (.+?) # $2 = Header text
733 733 [ ]*
734 734 \#* # optional closing #'s (not counted)
735 735 $/x
736 736 def block_markdown_atx( text )
737 737 if text =~ ATX_RE
738 738 tag = "h#{ $1.length }"
739 739 blk, cont = "<#{ tag }>#{ $2 }</#{ tag }>\n\n", $'
740 740 blocks cont
741 741 text.replace( blk + cont )
742 742 end
743 743 end
744 744
745 745 MARKDOWN_BQ_RE = /\A(^ *> ?.+$(.+\n)*\n*)+/m
746 746
747 747 def block_markdown_bq( text )
748 748 text.gsub!( MARKDOWN_BQ_RE ) do |blk|
749 749 blk.gsub!( /^ *> ?/, '' )
750 750 flush_left blk
751 751 blocks blk
752 752 blk.gsub!( /^(\S)/, "\t\\1" )
753 753 "<blockquote>\n#{ blk }\n</blockquote>\n\n"
754 754 end
755 755 end
756 756
757 757 MARKDOWN_RULE_RE = /^(#{
758 758 ['*', '-', '_'].collect { |ch| ' ?(' + Regexp::quote( ch ) + ' ?){3,}' }.join( '|' )
759 759 })$/
760 760
761 761 def block_markdown_rule( text )
762 762 text.gsub!( MARKDOWN_RULE_RE ) do |blk|
763 763 "<hr />"
764 764 end
765 765 end
766 766
767 767 # XXX TODO XXX
768 768 def block_markdown_lists( text )
769 769 end
770 770
771 771 def inline_textile_span( text )
772 772 QTAGS.each do |qtag_rc, ht, qtag_re, rtype|
773 773 text.gsub!( qtag_re ) do |m|
774 774
775 775 case rtype
776 776 when :limit
777 777 sta,oqs,qtag,content,oqa = $~[1..6]
778 778 atts = nil
779 779 if content =~ /^(#{C})(.+)$/
780 780 atts, content = $~[1..2]
781 781 end
782 782 else
783 783 qtag,atts,cite,content = $~[1..4]
784 784 sta = ''
785 785 end
786 786 atts = pba( atts )
787 787 atts = shelve( atts ) if atts
788 788
789 789 "#{ sta }#{ oqs }<#{ ht }#{ atts }>#{ content }</#{ ht }>#{ oqa }"
790 790
791 791 end
792 792 end
793 793 end
794 794
795 795 LINK_RE = /
796 796 (
797 797 ([\s\[{(]|[#{PUNCT}])? # $pre
798 798 " # start
799 799 (#{C}) # $atts
800 800 ([^"\n]+?) # $text
801 801 \s?
802 802 (?:\(([^)]+?)\)(?="))? # $title
803 803 ":
804 804 ( # $url
805 805 (\/|[a-zA-Z]+:\/\/|www\.|mailto:) # $proto
806 806 [\w\/]\S+?
807 807 )
808 808 (\/)? # $slash
809 809 ([^\w\=\/;\(\)]*?) # $post
810 810 )
811 811 (?=<|\s|$)
812 812 /x
813 813 #"
814 814 def inline_textile_link( text )
815 815 text.gsub!( LINK_RE ) do |m|
816 816 all,pre,atts,text,title,url,proto,slash,post = $~[1..9]
817 817 if text.include?('<br />')
818 818 all
819 819 else
820 820 url, url_title = check_refs( url )
821 821 title ||= url_title
822 822
823 823 # Idea below : an URL with unbalanced parethesis and
824 824 # ending by ')' is put into external parenthesis
825 825 if ( url[-1]==?) and ((url.count("(") - url.count(")")) < 0 ) )
826 826 url=url[0..-2] # discard closing parenth from url
827 827 post = ")"+post # add closing parenth to post
828 828 end
829 829 atts = pba( atts )
830 830 atts = " href=\"#{ htmlesc url }#{ slash }\"#{ atts }"
831 831 atts << " title=\"#{ htmlesc title }\"" if title
832 832 atts = shelve( atts ) if atts
833 833
834 834 external = (url =~ /^https?:\/\//) ? ' class="external"' : ''
835 835
836 836 "#{ pre }<a#{ atts }#{ external }>#{ text }</a>#{ post }"
837 837 end
838 838 end
839 839 end
840 840
841 841 MARKDOWN_REFLINK_RE = /
842 842 \[([^\[\]]+)\] # $text
843 843 [ ]? # opt. space
844 844 (?:\n[ ]*)? # one optional newline followed by spaces
845 845 \[(.*?)\] # $id
846 846 /x
847 847
848 848 def inline_markdown_reflink( text )
849 849 text.gsub!( MARKDOWN_REFLINK_RE ) do |m|
850 850 text, id = $~[1..2]
851 851
852 852 if id.empty?
853 853 url, title = check_refs( text )
854 854 else
855 855 url, title = check_refs( id )
856 856 end
857 857
858 858 atts = " href=\"#{ url }\""
859 859 atts << " title=\"#{ title }\"" if title
860 860 atts = shelve( atts )
861 861
862 862 "<a#{ atts }>#{ text }</a>"
863 863 end
864 864 end
865 865
866 866 MARKDOWN_LINK_RE = /
867 867 \[([^\[\]]+)\] # $text
868 868 \( # open paren
869 869 [ \t]* # opt space
870 870 <?(.+?)>? # $href
871 871 [ \t]* # opt space
872 872 (?: # whole title
873 873 (['"]) # $quote
874 874 (.*?) # $title
875 875 \3 # matching quote
876 876 )? # title is optional
877 877 \)
878 878 /x
879 879
880 880 def inline_markdown_link( text )
881 881 text.gsub!( MARKDOWN_LINK_RE ) do |m|
882 882 text, url, quote, title = $~[1..4]
883 883
884 884 atts = " href=\"#{ url }\""
885 885 atts << " title=\"#{ title }\"" if title
886 886 atts = shelve( atts )
887 887
888 888 "<a#{ atts }>#{ text }</a>"
889 889 end
890 890 end
891 891
892 892 TEXTILE_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
893 893 MARKDOWN_REFS_RE = /(^ *)\[([^\n]+?)\]:\s+<?(#{HYPERLINK})>?(?:\s+"((?:[^"]|\\")+)")?(?=\s|$)/m
894 894
895 895 def refs( text )
896 896 @rules.each do |rule_name|
897 897 method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
898 898 end
899 899 end
900 900
901 901 def refs_textile( text )
902 902 text.gsub!( TEXTILE_REFS_RE ) do |m|
903 903 flag, url = $~[2..3]
904 904 @urlrefs[flag.downcase] = [url, nil]
905 905 nil
906 906 end
907 907 end
908 908
909 909 def refs_markdown( text )
910 910 text.gsub!( MARKDOWN_REFS_RE ) do |m|
911 911 flag, url = $~[2..3]
912 912 title = $~[6]
913 913 @urlrefs[flag.downcase] = [url, title]
914 914 nil
915 915 end
916 916 end
917 917
918 918 def check_refs( text )
919 919 ret = @urlrefs[text.downcase] if text
920 920 ret || [text, nil]
921 921 end
922 922
923 923 IMAGE_RE = /
924 924 (>|\s|^) # start of line?
925 925 \! # opening
926 926 (\<|\=|\>)? # optional alignment atts
927 927 (#{C}) # optional style,class atts
928 928 (?:\. )? # optional dot-space
929 929 ([^\s(!]+?) # presume this is the src
930 930 \s? # optional space
931 931 (?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
932 932 \! # closing
933 933 (?::#{ HYPERLINK })? # optional href
934 934 /x
935 935
936 936 def inline_textile_image( text )
937 937 text.gsub!( IMAGE_RE ) do |m|
938 938 stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
939 939 htmlesc title
940 940 atts = pba( atts )
941 941 atts = " src=\"#{ htmlesc url.dup }\"#{ atts }"
942 942 atts << " title=\"#{ title }\"" if title
943 943 atts << " alt=\"#{ title }\""
944 944 # size = @getimagesize($url);
945 945 # if($size) $atts.= " $size[3]";
946 946
947 947 href, alt_title = check_refs( href ) if href
948 948 url, url_title = check_refs( url )
949 949
950 950 out = ''
951 951 out << "<a#{ shelve( " href=\"#{ href }\"" ) }>" if href
952 952 out << "<img#{ shelve( atts ) } />"
953 953 out << "</a>#{ href_a1 }#{ href_a2 }" if href
954 954
955 955 if algn
956 956 algn = h_align( algn )
957 957 if stln == "<p>"
958 958 out = "<p style=\"float:#{ algn }\">#{ out }"
959 959 else
960 960 out = "#{ stln }<div style=\"float:#{ algn }\">#{ out }</div>"
961 961 end
962 962 else
963 963 out = stln + out
964 964 end
965 965
966 966 out
967 967 end
968 968 end
969 969
970 970 def shelve( val )
971 971 @shelf << val
972 972 " :redsh##{ @shelf.length }:"
973 973 end
974 974
975 975 def retrieve( text )
976 976 @shelf.each_with_index do |r, i|
977 977 text.gsub!( " :redsh##{ i + 1 }:", r )
978 978 end
979 979 end
980 980
981 981 def incoming_entities( text )
982 982 ## turn any incoming ampersands into a dummy character for now.
983 983 ## This uses a negative lookahead for alphanumerics followed by a semicolon,
984 984 ## implying an incoming html entity, to be skipped
985 985
986 986 text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
987 987 end
988 988
989 989 def no_textile( text )
990 990 text.gsub!( /(^|\s)==([^=]+.*?)==(\s|$)?/,
991 991 '\1<notextile>\2</notextile>\3' )
992 992 text.gsub!( /^ *==([^=]+.*?)==/m,
993 993 '\1<notextile>\2</notextile>\3' )
994 994 end
995 995
996 996 def clean_white_space( text )
997 997 # normalize line breaks
998 998 text.gsub!( /\r\n/, "\n" )
999 999 text.gsub!( /\r/, "\n" )
1000 1000 text.gsub!( /\t/, ' ' )
1001 1001 text.gsub!( /^ +$/, '' )
1002 1002 text.gsub!( /\n{3,}/, "\n\n" )
1003 1003 text.gsub!( /"$/, "\" " )
1004 1004
1005 1005 # if entire document is indented, flush
1006 1006 # to the left side
1007 1007 flush_left text
1008 1008 end
1009 1009
1010 1010 def flush_left( text )
1011 1011 indt = 0
1012 1012 if text =~ /^ /
1013 1013 while text !~ /^ {#{indt}}\S/
1014 1014 indt += 1
1015 1015 end unless text.empty?
1016 1016 if indt.nonzero?
1017 1017 text.gsub!( /^ {#{indt}}/, '' )
1018 1018 end
1019 1019 end
1020 1020 end
1021 1021
1022 1022 def footnote_ref( text )
1023 1023 text.gsub!( /\b\[([0-9]+?)\](\s)?/,
1024 1024 '<sup><a href="#fn\1">\1</a></sup>\2' )
1025 1025 end
1026 1026
1027 1027 OFFTAGS = /(code|pre|kbd|notextile)/
1028 1028 OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }\W|\Z)/mi
1029 1029 OFFTAG_OPEN = /<#{ OFFTAGS }/
1030 1030 OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
1031 1031 HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
1032 1032 ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
1033 1033
1034 1034 def glyphs_textile( text, level = 0 )
1035 1035 if text !~ HASTAG_MATCH
1036 1036 pgl text
1037 1037 footnote_ref text
1038 1038 else
1039 1039 codepre = 0
1040 1040 text.gsub!( ALLTAG_MATCH ) do |line|
1041 1041 ## matches are off if we're between <code>, <pre> etc.
1042 1042 if $1
1043 1043 if line =~ OFFTAG_OPEN
1044 1044 codepre += 1
1045 1045 elsif line =~ OFFTAG_CLOSE
1046 1046 codepre -= 1
1047 1047 codepre = 0 if codepre < 0
1048 1048 end
1049 1049 elsif codepre.zero?
1050 1050 glyphs_textile( line, level + 1 )
1051 1051 else
1052 1052 htmlesc( line, :NoQuotes )
1053 1053 end
1054 1054 # p [level, codepre, line]
1055 1055
1056 1056 line
1057 1057 end
1058 1058 end
1059 1059 end
1060 1060
1061 def rip_offtags( text, escape_aftertag=true )
1061 def rip_offtags( text, escape_aftertag=true, escape_line=true )
1062 1062 if text =~ /<.*>/
1063 1063 ## strip and encode <pre> content
1064 1064 codepre, used_offtags = 0, {}
1065 1065 text.gsub!( OFFTAG_MATCH ) do |line|
1066 1066 if $3
1067 1067 first, offtag, aftertag = $3, $4, $5
1068 1068 codepre += 1
1069 1069 used_offtags[offtag] = true
1070 1070 if codepre - used_offtags.length > 0
1071 htmlesc( line, :NoQuotes )
1071 htmlesc( line, :NoQuotes ) if escape_line
1072 1072 @pre_list.last << line
1073 1073 line = ""
1074 1074 else
1075 1075 ### htmlesc is disabled between CODE tags which will be parsed with highlighter
1076 1076 ### Regexp in formatter.rb is : /<code\s+class="(\w+)">\s?(.+)/m
1077 1077 ### NB: some changes were made not to use $N variables, because we use "match"
1078 1078 ### and it breaks following lines
1079 1079 htmlesc( aftertag, :NoQuotes ) if aftertag && escape_aftertag && !first.match(/<code\s+class="(\w+)">/)
1080 1080 line = "<redpre##{ @pre_list.length }>"
1081 1081 first.match(/<#{ OFFTAGS }([^>]*)>/)
1082 1082 tag = $1
1083 1083 $2.to_s.match(/(class\=("[^"]+"|'[^']+'))/i)
1084 1084 tag << " #{$1}" if $1
1085 1085 @pre_list << "<#{ tag }>#{ aftertag }"
1086 1086 end
1087 1087 elsif $1 and codepre > 0
1088 1088 if codepre - used_offtags.length > 0
1089 htmlesc( line, :NoQuotes )
1089 htmlesc( line, :NoQuotes ) if escape_line
1090 1090 @pre_list.last << line
1091 1091 line = ""
1092 1092 end
1093 1093 codepre -= 1 unless codepre.zero?
1094 1094 used_offtags = {} if codepre.zero?
1095 1095 end
1096 1096 line
1097 1097 end
1098 1098 end
1099 1099 text
1100 1100 end
1101 1101
1102 1102 def smooth_offtags( text )
1103 1103 unless @pre_list.empty?
1104 1104 ## replace <pre> content
1105 1105 text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
1106 1106 end
1107 1107 end
1108 1108
1109 1109 def inline( text )
1110 1110 [/^inline_/, /^glyphs_/].each do |meth_re|
1111 1111 @rules.each do |rule_name|
1112 1112 method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
1113 1113 end
1114 1114 end
1115 1115 end
1116 1116
1117 1117 def h_align( text )
1118 1118 H_ALGN_VALS[text]
1119 1119 end
1120 1120
1121 1121 def v_align( text )
1122 1122 V_ALGN_VALS[text]
1123 1123 end
1124 1124
1125 1125 def textile_popup_help( name, windowW, windowH )
1126 1126 ' <a target="_blank" href="http://hobix.com/textile/#' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
1127 1127 end
1128 1128
1129 1129 # HTML cleansing stuff
1130 1130 BASIC_TAGS = {
1131 1131 'a' => ['href', 'title'],
1132 1132 'img' => ['src', 'alt', 'title'],
1133 1133 'br' => [],
1134 1134 'i' => nil,
1135 1135 'u' => nil,
1136 1136 'b' => nil,
1137 1137 'pre' => nil,
1138 1138 'kbd' => nil,
1139 1139 'code' => ['lang'],
1140 1140 'cite' => nil,
1141 1141 'strong' => nil,
1142 1142 'em' => nil,
1143 1143 'ins' => nil,
1144 1144 'sup' => nil,
1145 1145 'sub' => nil,
1146 1146 'del' => nil,
1147 1147 'table' => nil,
1148 1148 'tr' => nil,
1149 1149 'td' => ['colspan', 'rowspan'],
1150 1150 'th' => nil,
1151 1151 'ol' => nil,
1152 1152 'ul' => nil,
1153 1153 'li' => nil,
1154 1154 'p' => nil,
1155 1155 'h1' => nil,
1156 1156 'h2' => nil,
1157 1157 'h3' => nil,
1158 1158 'h4' => nil,
1159 1159 'h5' => nil,
1160 1160 'h6' => nil,
1161 1161 'blockquote' => ['cite']
1162 1162 }
1163 1163
1164 1164 def clean_html( text, tags = BASIC_TAGS )
1165 1165 text.gsub!( /<!\[CDATA\[/, '' )
1166 1166 text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
1167 1167 raw = $~
1168 1168 tag = raw[2].downcase
1169 1169 if tags.has_key? tag
1170 1170 pcs = [tag]
1171 1171 tags[tag].each do |prop|
1172 1172 ['"', "'", ''].each do |q|
1173 1173 q2 = ( q != '' ? q : '\s' )
1174 1174 if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
1175 1175 attrv = $1
1176 1176 next if prop == 'src' and attrv =~ %r{^(?!http)\w+:}
1177 1177 pcs << "#{prop}=\"#{$1.gsub('"', '\\"')}\""
1178 1178 break
1179 1179 end
1180 1180 end
1181 1181 end if tags[tag]
1182 1182 "<#{raw[1]}#{pcs.join " "}>"
1183 1183 else
1184 1184 " "
1185 1185 end
1186 1186 end
1187 1187 end
1188 1188
1189 1189 ALLOWED_TAGS = %w(redpre pre code notextile)
1190 1190
1191 1191 def escape_html_tags(text)
1192 1192 text.gsub!(%r{<(\/?([!\w]+)[^<>\n]*)(>?)}) {|m| ALLOWED_TAGS.include?($2) ? "<#{$1}#{$3}" : "&lt;#{$1}#{'&gt;' unless $3.blank?}" }
1193 1193 end
1194 1194 end
1195 1195
@@ -1,182 +1,182
1 1 # Redmine - project management software
2 2 # Copyright (C) 2006-2011 Jean-Philippe Lang
3 3 #
4 4 # This program is free software; you can redistribute it and/or
5 5 # modify it under the terms of the GNU General Public License
6 6 # as published by the Free Software Foundation; either version 2
7 7 # of the License, or (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software
16 16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 17
18 18 require 'redcloth3'
19 19 require 'digest/md5'
20 20
21 21 module Redmine
22 22 module WikiFormatting
23 23 module Textile
24 24 class Formatter < RedCloth3
25 25 include ActionView::Helpers::TagHelper
26 26
27 27 # auto_link rule after textile rules so that it doesn't break !image_url! tags
28 28 RULES = [:textile, :block_markdown_rule, :inline_auto_link, :inline_auto_mailto]
29 29
30 30 def initialize(*args)
31 31 super
32 32 self.hard_breaks=true
33 33 self.no_span_caps=true
34 34 self.filter_styles=true
35 35 end
36 36
37 37 def to_html(*rules)
38 38 @toc = []
39 39 super(*RULES).to_s
40 40 end
41 41
42 42 def get_section(index)
43 43 section = extract_sections(index)[1]
44 44 hash = Digest::MD5.hexdigest(section)
45 45 return section, hash
46 46 end
47 47
48 48 def update_section(index, update, hash=nil)
49 49 t = extract_sections(index)
50 50 if hash.present? && hash != Digest::MD5.hexdigest(t[1])
51 51 raise Redmine::WikiFormatting::StaleSectionError
52 52 end
53 53 t[1] = update unless t[1].blank?
54 54 t.reject(&:blank?).join "\n\n"
55 55 end
56 56
57 57 def extract_sections(index)
58 58 @pre_list = []
59 59 text = self.dup
60 rip_offtags text, false
60 rip_offtags text, false, false
61 61 before = ''
62 62 s = ''
63 63 after = ''
64 64 i = 0
65 65 l = 1
66 66 started = false
67 67 ended = false
68 68 text.scan(/(((?:.*?)(\A|\r?\n\r?\n))(h(\d+)(#{A}#{C})\.(?::(\S+))? (.*?)$)|.*)/m).each do |all, content, lf, heading, level|
69 69 if heading.nil?
70 70 if ended
71 71 after << all
72 72 elsif started
73 73 s << all
74 74 else
75 75 before << all
76 76 end
77 77 break
78 78 end
79 79 i += 1
80 80 if ended
81 81 after << all
82 82 elsif i == index
83 83 l = level.to_i
84 84 before << content
85 85 s << heading
86 86 started = true
87 87 elsif i > index
88 88 s << content
89 89 if level.to_i > l
90 90 s << heading
91 91 else
92 92 after << heading
93 93 ended = true
94 94 end
95 95 else
96 96 before << all
97 97 end
98 98 end
99 99 sections = [before.strip, s.strip, after.strip]
100 100 sections.each {|section| smooth_offtags_without_code_highlighting section}
101 101 sections
102 102 end
103 103
104 104 private
105 105
106 106 # Patch for RedCloth. Fixed in RedCloth r128 but _why hasn't released it yet.
107 107 # <a href="http://code.whytheluckystiff.net/redcloth/changeset/128">http://code.whytheluckystiff.net/redcloth/changeset/128</a>
108 108 def hard_break( text )
109 109 text.gsub!( /(.)\n(?!\n|\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
110 110 end
111 111
112 112 alias :smooth_offtags_without_code_highlighting :smooth_offtags
113 113 # Patch to add code highlighting support to RedCloth
114 114 def smooth_offtags( text )
115 115 unless @pre_list.empty?
116 116 ## replace <pre> content
117 117 text.gsub!(/<redpre#(\d+)>/) do
118 118 content = @pre_list[$1.to_i]
119 119 if content.match(/<code\s+class="(\w+)">\s?(.+)/m)
120 120 content = "<code class=\"#{$1} syntaxhl\">" +
121 121 Redmine::SyntaxHighlighting.highlight_by_language($2, $1)
122 122 end
123 123 content
124 124 end
125 125 end
126 126 end
127 127
128 128 AUTO_LINK_RE = %r{
129 129 ( # leading text
130 130 <\w+.*?>| # leading HTML tag, or
131 131 [^=<>!:'"/]| # leading punctuation, or
132 132 ^ # beginning of line
133 133 )
134 134 (
135 135 (?:https?://)| # protocol spec, or
136 136 (?:s?ftps?://)|
137 137 (?:www\.) # www.*
138 138 )
139 139 (
140 140 (\S+?) # url
141 141 (\/)? # slash
142 142 )
143 143 ((?:&gt;)?|[^\w\=\/;\(\)]*?) # post
144 144 (?=<|\s|$)
145 145 }x unless const_defined?(:AUTO_LINK_RE)
146 146
147 147 # Turns all urls into clickable links (code from Rails).
148 148 def inline_auto_link(text)
149 149 text.gsub!(AUTO_LINK_RE) do
150 150 all, leading, proto, url, post = $&, $1, $2, $3, $6
151 151 if leading =~ /<a\s/i || leading =~ /![<>=]?/
152 152 # don't replace URL's that are already linked
153 153 # and URL's prefixed with ! !> !< != (textile images)
154 154 all
155 155 else
156 156 # Idea below : an URL with unbalanced parethesis and
157 157 # ending by ')' is put into external parenthesis
158 158 if ( url[-1]==?) and ((url.count("(") - url.count(")")) < 0 ) )
159 159 url=url[0..-2] # discard closing parenth from url
160 160 post = ")"+post # add closing parenth to post
161 161 end
162 162 tag = content_tag('a', proto + url, :href => "#{proto=="www."?"http://www.":proto}#{url}", :class => 'external')
163 163 %(#{leading}#{tag}#{post})
164 164 end
165 165 end
166 166 end
167 167
168 168 # Turns all email addresses into clickable links (code from Rails).
169 169 def inline_auto_mailto(text)
170 170 text.gsub!(/([\w\.!#\$%\-+.]+@[A-Za-z0-9\-]+(\.[A-Za-z0-9\-]+)+)/) do
171 171 mail = $1
172 172 if text.match(/<a\b[^>]*>(.*)(#{Regexp.escape(mail)})(.*)<\/a>/)
173 173 mail
174 174 else
175 175 content_tag('a', mail, :href => "mailto:#{mail}", :class => "email")
176 176 end
177 177 end
178 178 end
179 179 end
180 180 end
181 181 end
182 182 end
@@ -1,338 +1,343
1 1 # Redmine - project management software
2 2 # Copyright (C) 2006-2011 Jean-Philippe Lang
3 3 #
4 4 # This program is free software; you can redistribute it and/or
5 5 # modify it under the terms of the GNU General Public License
6 6 # as published by the Free Software Foundation; either version 2
7 7 # of the License, or (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software
16 16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 17
18 18 require File.expand_path('../../../../../test_helper', __FILE__)
19 19 require 'digest/md5'
20 20
21 21 class Redmine::WikiFormatting::TextileFormatterTest < ActionView::TestCase
22 22
23 23 def setup
24 24 @formatter = Redmine::WikiFormatting::Textile::Formatter
25 25 end
26 26
27 27 MODIFIERS = {
28 28 "*" => 'strong', # bold
29 29 "_" => 'em', # italic
30 30 "+" => 'ins', # underline
31 31 "-" => 'del', # deleted
32 32 "^" => 'sup', # superscript
33 33 "~" => 'sub' # subscript
34 34 }
35 35
36 36 def test_modifiers
37 37 assert_html_output(
38 38 '*bold*' => '<strong>bold</strong>',
39 39 'before *bold*' => 'before <strong>bold</strong>',
40 40 '*bold* after' => '<strong>bold</strong> after',
41 41 '*two words*' => '<strong>two words</strong>',
42 42 '*two*words*' => '<strong>two*words</strong>',
43 43 '*two * words*' => '<strong>two * words</strong>',
44 44 '*two* *words*' => '<strong>two</strong> <strong>words</strong>',
45 45 '*(two)* *(words)*' => '<strong>(two)</strong> <strong>(words)</strong>',
46 46 # with class
47 47 '*(foo)two words*' => '<strong class="foo">two words</strong>'
48 48 )
49 49 end
50 50
51 51 def test_modifiers_combination
52 52 MODIFIERS.each do |m1, tag1|
53 53 MODIFIERS.each do |m2, tag2|
54 54 next if m1 == m2
55 55 text = "#{m2}#{m1}Phrase modifiers#{m1}#{m2}"
56 56 html = "<#{tag2}><#{tag1}>Phrase modifiers</#{tag1}></#{tag2}>"
57 57 assert_html_output text => html
58 58 end
59 59 end
60 60 end
61 61
62 62 def test_inline_code
63 63 assert_html_output(
64 64 'this is @some code@' => 'this is <code>some code</code>',
65 65 '@<Location /redmine>@' => '<code>&lt;Location /redmine&gt;</code>'
66 66 )
67 67 end
68 68
69 69 def test_escaping
70 70 assert_html_output(
71 71 'this is a <script>' => 'this is a &lt;script&gt;'
72 72 )
73 73 end
74 74
75 75 def test_use_of_backslashes_followed_by_numbers_in_headers
76 76 assert_html_output({
77 77 'h1. 2009\02\09' => '<h1>2009\02\09</h1>'
78 78 }, false)
79 79 end
80 80
81 81 def test_double_dashes_should_not_strikethrough
82 82 assert_html_output(
83 83 'double -- dashes -- test' => 'double -- dashes -- test',
84 84 'double -- *dashes* -- test' => 'double -- <strong>dashes</strong> -- test'
85 85 )
86 86 end
87 87
88 88 def test_acronyms
89 89 assert_html_output(
90 90 'this is an acronym: GPL(General Public License)' => 'this is an acronym: <acronym title="General Public License">GPL</acronym>',
91 91 '2 letters JP(Jean-Philippe) acronym' => '2 letters <acronym title="Jean-Philippe">JP</acronym> acronym',
92 92 'GPL(This is a double-quoted "title")' => '<acronym title="This is a double-quoted &quot;title&quot;">GPL</acronym>'
93 93 )
94 94 end
95 95
96 96 def test_blockquote
97 97 # orig raw text
98 98 raw = <<-RAW
99 99 John said:
100 100 > Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.
101 101 > Nullam commodo metus accumsan nulla. Curabitur lobortis dui id dolor.
102 102 > * Donec odio lorem,
103 103 > * sagittis ac,
104 104 > * malesuada in,
105 105 > * adipiscing eu, dolor.
106 106 >
107 107 > >Nulla varius pulvinar diam. Proin id arcu id lorem scelerisque condimentum. Proin vehicula turpis vitae lacus.
108 108 > Proin a tellus. Nam vel neque.
109 109
110 110 He's right.
111 111 RAW
112 112
113 113 # expected html
114 114 expected = <<-EXPECTED
115 115 <p>John said:</p>
116 116 <blockquote>
117 117 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.<br />
118 118 Nullam commodo metus accumsan nulla. Curabitur lobortis dui id dolor.
119 119 <ul>
120 120 <li>Donec odio lorem,</li>
121 121 <li>sagittis ac,</li>
122 122 <li>malesuada in,</li>
123 123 <li>adipiscing eu, dolor.</li>
124 124 </ul>
125 125 <blockquote>
126 126 <p>Nulla varius pulvinar diam. Proin id arcu id lorem scelerisque condimentum. Proin vehicula turpis vitae lacus.</p>
127 127 </blockquote>
128 128 <p>Proin a tellus. Nam vel neque.</p>
129 129 </blockquote>
130 130 <p>He's right.</p>
131 131 EXPECTED
132 132
133 133 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
134 134 end
135 135
136 136 def test_table
137 137 raw = <<-RAW
138 138 This is a table with empty cells:
139 139
140 140 |cell11|cell12||
141 141 |cell21||cell23|
142 142 |cell31|cell32|cell33|
143 143 RAW
144 144
145 145 expected = <<-EXPECTED
146 146 <p>This is a table with empty cells:</p>
147 147
148 148 <table>
149 149 <tr><td>cell11</td><td>cell12</td><td></td></tr>
150 150 <tr><td>cell21</td><td></td><td>cell23</td></tr>
151 151 <tr><td>cell31</td><td>cell32</td><td>cell33</td></tr>
152 152 </table>
153 153 EXPECTED
154 154
155 155 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
156 156 end
157 157
158 158 def test_table_with_line_breaks
159 159 raw = <<-RAW
160 160 This is a table with line breaks:
161 161
162 162 |cell11
163 163 continued|cell12||
164 164 |-cell21-||cell23
165 165 cell23 line2
166 166 cell23 *line3*|
167 167 |cell31|cell32
168 168 cell32 line2|cell33|
169 169
170 170 RAW
171 171
172 172 expected = <<-EXPECTED
173 173 <p>This is a table with line breaks:</p>
174 174
175 175 <table>
176 176 <tr>
177 177 <td>cell11<br />continued</td>
178 178 <td>cell12</td>
179 179 <td></td>
180 180 </tr>
181 181 <tr>
182 182 <td><del>cell21</del></td>
183 183 <td></td>
184 184 <td>cell23<br/>cell23 line2<br/>cell23 <strong>line3</strong></td>
185 185 </tr>
186 186 <tr>
187 187 <td>cell31</td>
188 188 <td>cell32<br/>cell32 line2</td>
189 189 <td>cell33</td>
190 190 </tr>
191 191 </table>
192 192 EXPECTED
193 193
194 194 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
195 195 end
196 196
197 197 def test_textile_should_not_mangle_brackets
198 198 assert_equal '<p>[msg1][msg2]</p>', to_html('[msg1][msg2]')
199 199 end
200 200
201 201 def test_textile_should_escape_image_urls
202 202 # this is onclick="alert('XSS');" in encoded form
203 203 raw = '!/images/comment.png"onclick=&#x61;&#x6c;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29;;&#x22;!'
204 204 expected = '<p><img src="/images/comment.png&quot;onclick=&amp;#x61;&amp;#x6c;&amp;#x65;&amp;#x72;&amp;#x74;&amp;#x28;&amp;#x27;&amp;#x58;&amp;#x53;&amp;#x53;&amp;#x27;&amp;#x29;;&amp;#x22;" alt="" /></p>'
205 205 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
206 206 end
207 207
208 208
209 209 STR_WITHOUT_PRE = [
210 210 # 0
211 211 "h1. Title
212 212
213 213 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.",
214 214 # 1
215 215 "h2. Heading 2
216 216
217 217 Maecenas sed elit sit amet mi accumsan vestibulum non nec velit. Proin porta tincidunt lorem, consequat rhoncus dolor fermentum in.
218 218
219 219 Cras ipsum felis, ultrices at porttitor vel, faucibus eu nunc.",
220 220 # 2
221 221 "h2. Heading 2
222 222
223 223 Morbi facilisis accumsan orci non pharetra.
224 224
225 225 h3. Heading 3
226 226
227 227 Nulla nunc nisi, egestas in ornare vel, posuere ac libero.",
228 228 # 3
229 229 "h3. Heading 3
230 230
231 231 Praesent eget turpis nibh, a lacinia nulla.",
232 232 # 4
233 233 "h2. Heading 2
234 234
235 235 Ut rhoncus elementum adipiscing."]
236 236
237 237 TEXT_WITHOUT_PRE = STR_WITHOUT_PRE.join("\n\n").freeze
238 238
239 239 def test_get_section_should_return_the_requested_section_and_its_hash
240 240 assert_section_with_hash STR_WITHOUT_PRE[1], TEXT_WITHOUT_PRE, 2
241 241 assert_section_with_hash STR_WITHOUT_PRE[2..3].join("\n\n"), TEXT_WITHOUT_PRE, 3
242 242 assert_section_with_hash STR_WITHOUT_PRE[3], TEXT_WITHOUT_PRE, 5
243 243 assert_section_with_hash STR_WITHOUT_PRE[4], TEXT_WITHOUT_PRE, 6
244 244
245 245 assert_section_with_hash '', TEXT_WITHOUT_PRE, 0
246 246 assert_section_with_hash '', TEXT_WITHOUT_PRE, 10
247 247 end
248 248
249 249 def test_update_section_should_update_the_requested_section
250 250 replacement = "New text"
251 251
252 252 assert_equal [STR_WITHOUT_PRE[0], replacement, STR_WITHOUT_PRE[2..4]].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(2, replacement)
253 253 assert_equal [STR_WITHOUT_PRE[0..1], replacement, STR_WITHOUT_PRE[4]].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(3, replacement)
254 254 assert_equal [STR_WITHOUT_PRE[0..2], replacement, STR_WITHOUT_PRE[4]].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(5, replacement)
255 255 assert_equal [STR_WITHOUT_PRE[0..3], replacement].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(6, replacement)
256 256
257 257 assert_equal TEXT_WITHOUT_PRE, @formatter.new(TEXT_WITHOUT_PRE).update_section(0, replacement)
258 258 assert_equal TEXT_WITHOUT_PRE, @formatter.new(TEXT_WITHOUT_PRE).update_section(10, replacement)
259 259 end
260 260
261 261 def test_update_section_with_hash_should_update_the_requested_section
262 262 replacement = "New text"
263 263
264 264 assert_equal [STR_WITHOUT_PRE[0], replacement, STR_WITHOUT_PRE[2..4]].flatten.join("\n\n"),
265 265 @formatter.new(TEXT_WITHOUT_PRE).update_section(2, replacement, Digest::MD5.hexdigest(STR_WITHOUT_PRE[1]))
266 266 end
267 267
268 268 def test_update_section_with_wrong_hash_should_raise_an_error
269 269 assert_raise Redmine::WikiFormatting::StaleSectionError do
270 270 @formatter.new(TEXT_WITHOUT_PRE).update_section(2, "New text", Digest::MD5.hexdigest("Old text"))
271 271 end
272 272 end
273 273
274 274 STR_WITH_PRE = [
275 275 # 0
276 276 "h1. Title
277 277
278 278 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.",
279 279 # 1
280 280 "h2. Heading 2
281 281
282 282 <pre><code class=\"ruby\">
283 283 def foo
284 284 end
285 285 </code></pre>
286 286
287 <pre><code><pre><code class=\"ruby\">
288 Place your code here.
289 </code></pre>
290 </code></pre>
291
287 292 Morbi facilisis accumsan orci non pharetra.
288 293
289 294 <pre>
290 295 Pre Content:
291 296
292 297 h2. Inside pre
293 298
294 299 <tag> inside pre block
295 300
296 301 Morbi facilisis accumsan orci non pharetra.
297 302 </pre>",
298 303 # 2
299 304 "h3. Heading 3
300 305
301 306 Nulla nunc nisi, egestas in ornare vel, posuere ac libero."]
302 307
303 308 def test_get_section_should_ignore_pre_content
304 309 text = STR_WITH_PRE.join("\n\n")
305 310
306 311 assert_section_with_hash STR_WITH_PRE[1..2].join("\n\n"), text, 2
307 312 assert_section_with_hash STR_WITH_PRE[2], text, 3
308 313 end
309 314
310 315 def test_update_section_should_not_escape_pre_content_outside_section
311 316 text = STR_WITH_PRE.join("\n\n")
312 317 replacement = "New text"
313 318
314 319 assert_equal [STR_WITH_PRE[0..1], "New text"].flatten.join("\n\n"),
315 320 @formatter.new(text).update_section(3, replacement)
316 321 end
317 322
318 323 private
319 324
320 325 def assert_html_output(to_test, expect_paragraph = true)
321 326 to_test.each do |text, expected|
322 327 assert_equal(( expect_paragraph ? "<p>#{expected}</p>" : expected ), @formatter.new(text).to_html, "Formatting the following text failed:\n===\n#{text}\n===\n")
323 328 end
324 329 end
325 330
326 331 def to_html(text)
327 332 @formatter.new(text).to_html
328 333 end
329 334
330 335 def assert_section_with_hash(expected, text, index)
331 336 result = @formatter.new(text).get_section(index)
332 337
333 338 assert_kind_of Array, result
334 339 assert_equal 2, result.size
335 340 assert_equal expected, result.first, "section content did not match"
336 341 assert_equal Digest::MD5.hexdigest(expected), result.last, "section hash did not match"
337 342 end
338 343 end
General Comments 0
You need to be logged in to leave comments. Login now