##// END OF EJS Templates
Fixed that ordered/unordered lists inside table cell are mangled (#14038)....
Jean-Philippe Lang -
r11615:b3d80d50a3a1
parent child
Show More
@@ -1,1208 +1,1209
1 1 # vim:ts=4:sw=4:
2 2 # = RedCloth - Textile and Markdown Hybrid for Ruby
3 3 #
4 4 # Homepage:: http://whytheluckystiff.net/ruby/redcloth/
5 5 # Author:: why the lucky stiff (http://whytheluckystiff.net/)
6 6 # Copyright:: (cc) 2004 why the lucky stiff (and his puppet organizations.)
7 7 # License:: BSD
8 8 #
9 9 # (see http://hobix.com/textile/ for a Textile Reference.)
10 10 #
11 11 # Based on (and also inspired by) both:
12 12 #
13 13 # PyTextile: http://diveintomark.org/projects/textile/textile.py.txt
14 14 # Textism for PHP: http://www.textism.com/tools/textile/
15 15 #
16 16 #
17 17
18 18 # = RedCloth
19 19 #
20 20 # RedCloth is a Ruby library for converting Textile and/or Markdown
21 21 # into HTML. You can use either format, intermingled or separately.
22 22 # You can also extend RedCloth to honor your own custom text stylings.
23 23 #
24 24 # RedCloth users are encouraged to use Textile if they are generating
25 25 # HTML and to use Markdown if others will be viewing the plain text.
26 26 #
27 27 # == What is Textile?
28 28 #
29 29 # Textile is a simple formatting style for text
30 30 # documents, loosely based on some HTML conventions.
31 31 #
32 32 # == Sample Textile Text
33 33 #
34 34 # h2. This is a title
35 35 #
36 36 # h3. This is a subhead
37 37 #
38 38 # This is a bit of paragraph.
39 39 #
40 40 # bq. This is a blockquote.
41 41 #
42 42 # = Writing Textile
43 43 #
44 44 # A Textile document consists of paragraphs. Paragraphs
45 45 # can be specially formatted by adding a small instruction
46 46 # to the beginning of the paragraph.
47 47 #
48 48 # h[n]. Header of size [n].
49 49 # bq. Blockquote.
50 50 # # Numeric list.
51 51 # * Bulleted list.
52 52 #
53 53 # == Quick Phrase Modifiers
54 54 #
55 55 # Quick phrase modifiers are also included, to allow formatting
56 56 # of small portions of text within a paragraph.
57 57 #
58 58 # \_emphasis\_
59 59 # \_\_italicized\_\_
60 60 # \*strong\*
61 61 # \*\*bold\*\*
62 62 # ??citation??
63 63 # -deleted text-
64 64 # +inserted text+
65 65 # ^superscript^
66 66 # ~subscript~
67 67 # @code@
68 68 # %(classname)span%
69 69 #
70 70 # ==notextile== (leave text alone)
71 71 #
72 72 # == Links
73 73 #
74 74 # To make a hypertext link, put the link text in "quotation
75 75 # marks" followed immediately by a colon and the URL of the link.
76 76 #
77 77 # Optional: text in (parentheses) following the link text,
78 78 # but before the closing quotation mark, will become a Title
79 79 # attribute for the link, visible as a tool tip when a cursor is above it.
80 80 #
81 81 # Example:
82 82 #
83 83 # "This is a link (This is a title) ":http://www.textism.com
84 84 #
85 85 # Will become:
86 86 #
87 87 # <a href="http://www.textism.com" title="This is a title">This is a link</a>
88 88 #
89 89 # == Images
90 90 #
91 91 # To insert an image, put the URL for the image inside exclamation marks.
92 92 #
93 93 # Optional: text that immediately follows the URL in (parentheses) will
94 94 # be used as the Alt text for the image. Images on the web should always
95 95 # have descriptive Alt text for the benefit of readers using non-graphical
96 96 # browsers.
97 97 #
98 98 # Optional: place a colon followed by a URL immediately after the
99 99 # closing ! to make the image into a link.
100 100 #
101 101 # Example:
102 102 #
103 103 # !http://www.textism.com/common/textist.gif(Textist)!
104 104 #
105 105 # Will become:
106 106 #
107 107 # <img src="http://www.textism.com/common/textist.gif" alt="Textist" />
108 108 #
109 109 # With a link:
110 110 #
111 111 # !/common/textist.gif(Textist)!:http://textism.com
112 112 #
113 113 # Will become:
114 114 #
115 115 # <a href="http://textism.com"><img src="/common/textist.gif" alt="Textist" /></a>
116 116 #
117 117 # == Defining Acronyms
118 118 #
119 119 # HTML allows authors to define acronyms via the tag. The definition appears as a
120 120 # tool tip when a cursor hovers over the acronym. A crucial aid to clear writing,
121 121 # this should be used at least once for each acronym in documents where they appear.
122 122 #
123 123 # To quickly define an acronym in Textile, place the full text in (parentheses)
124 124 # immediately following the acronym.
125 125 #
126 126 # Example:
127 127 #
128 128 # ACLU(American Civil Liberties Union)
129 129 #
130 130 # Will become:
131 131 #
132 132 # <acronym title="American Civil Liberties Union">ACLU</acronym>
133 133 #
134 134 # == Adding Tables
135 135 #
136 136 # In Textile, simple tables can be added by seperating each column by
137 137 # a pipe.
138 138 #
139 139 # |a|simple|table|row|
140 140 # |And|Another|table|row|
141 141 #
142 142 # Attributes are defined by style definitions in parentheses.
143 143 #
144 144 # table(border:1px solid black).
145 145 # (background:#ddd;color:red). |{}| | | |
146 146 #
147 147 # == Using RedCloth
148 148 #
149 149 # RedCloth is simply an extension of the String class, which can handle
150 150 # Textile formatting. Use it like a String and output HTML with its
151 151 # RedCloth#to_html method.
152 152 #
153 153 # doc = RedCloth.new "
154 154 #
155 155 # h2. Test document
156 156 #
157 157 # Just a simple test."
158 158 #
159 159 # puts doc.to_html
160 160 #
161 161 # By default, RedCloth uses both Textile and Markdown formatting, with
162 162 # Textile formatting taking precedence. If you want to turn off Markdown
163 163 # formatting, to boost speed and limit the processor:
164 164 #
165 165 # class RedCloth::Textile.new( str )
166 166
167 167 class RedCloth3 < String
168 168
169 169 VERSION = '3.0.4'
170 170 DEFAULT_RULES = [:textile, :markdown]
171 171
172 172 #
173 173 # Two accessor for setting security restrictions.
174 174 #
175 175 # This is a nice thing if you're using RedCloth for
176 176 # formatting in public places (e.g. Wikis) where you
177 177 # don't want users to abuse HTML for bad things.
178 178 #
179 179 # If +:filter_html+ is set, HTML which wasn't
180 180 # created by the Textile processor will be escaped.
181 181 #
182 182 # If +:filter_styles+ is set, it will also disable
183 183 # the style markup specifier. ('{color: red}')
184 184 #
185 185 attr_accessor :filter_html, :filter_styles
186 186
187 187 #
188 188 # Accessor for toggling hard breaks.
189 189 #
190 190 # If +:hard_breaks+ is set, single newlines will
191 191 # be converted to HTML break tags. This is the
192 192 # default behavior for traditional RedCloth.
193 193 #
194 194 attr_accessor :hard_breaks
195 195
196 196 # Accessor for toggling lite mode.
197 197 #
198 198 # In lite mode, block-level rules are ignored. This means
199 199 # that tables, paragraphs, lists, and such aren't available.
200 200 # Only the inline markup for bold, italics, entities and so on.
201 201 #
202 202 # r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
203 203 # r.to_html
204 204 # #=> "And then? She <strong>fell</strong>!"
205 205 #
206 206 attr_accessor :lite_mode
207 207
208 208 #
209 209 # Accessor for toggling span caps.
210 210 #
211 211 # Textile places `span' tags around capitalized
212 212 # words by default, but this wreaks havoc on Wikis.
213 213 # If +:no_span_caps+ is set, this will be
214 214 # suppressed.
215 215 #
216 216 attr_accessor :no_span_caps
217 217
218 218 #
219 219 # Establishes the markup predence. Available rules include:
220 220 #
221 221 # == Textile Rules
222 222 #
223 223 # The following textile rules can be set individually. Or add the complete
224 224 # set of rules with the single :textile rule, which supplies the rule set in
225 225 # the following precedence:
226 226 #
227 227 # refs_textile:: Textile references (i.e. [hobix]http://hobix.com/)
228 228 # block_textile_table:: Textile table block structures
229 229 # block_textile_lists:: Textile list structures
230 230 # block_textile_prefix:: Textile blocks with prefixes (i.e. bq., h2., etc.)
231 231 # inline_textile_image:: Textile inline images
232 232 # inline_textile_link:: Textile inline links
233 233 # inline_textile_span:: Textile inline spans
234 234 # glyphs_textile:: Textile entities (such as em-dashes and smart quotes)
235 235 #
236 236 # == Markdown
237 237 #
238 238 # refs_markdown:: Markdown references (for example: [hobix]: http://hobix.com/)
239 239 # block_markdown_setext:: Markdown setext headers
240 240 # block_markdown_atx:: Markdown atx headers
241 241 # block_markdown_rule:: Markdown horizontal rules
242 242 # block_markdown_bq:: Markdown blockquotes
243 243 # block_markdown_lists:: Markdown lists
244 244 # inline_markdown_link:: Markdown links
245 245 attr_accessor :rules
246 246
247 247 # Returns a new RedCloth object, based on _string_ and
248 248 # enforcing all the included _restrictions_.
249 249 #
250 250 # r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
251 251 # r.to_html
252 252 # #=>"<h1>A &lt;b&gt;bold&lt;/b&gt; man</h1>"
253 253 #
254 254 def initialize( string, restrictions = [] )
255 255 restrictions.each { |r| method( "#{ r }=" ).call( true ) }
256 256 super( string )
257 257 end
258 258
259 259 #
260 260 # Generates HTML from the Textile contents.
261 261 #
262 262 # r = RedCloth.new( "And then? She *fell*!" )
263 263 # r.to_html( true )
264 264 # #=>"And then? She <strong>fell</strong>!"
265 265 #
266 266 def to_html( *rules )
267 267 rules = DEFAULT_RULES if rules.empty?
268 268 # make our working copy
269 269 text = self.dup
270 270
271 271 @urlrefs = {}
272 272 @shelf = []
273 273 textile_rules = [:block_textile_table, :block_textile_lists,
274 274 :block_textile_prefix, :inline_textile_image, :inline_textile_link,
275 275 :inline_textile_code, :inline_textile_span, :glyphs_textile]
276 276 markdown_rules = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
277 277 :block_markdown_bq, :block_markdown_lists,
278 278 :inline_markdown_reflink, :inline_markdown_link]
279 279 @rules = rules.collect do |rule|
280 280 case rule
281 281 when :markdown
282 282 markdown_rules
283 283 when :textile
284 284 textile_rules
285 285 else
286 286 rule
287 287 end
288 288 end.flatten
289 289
290 290 # standard clean up
291 291 incoming_entities text
292 292 clean_white_space text
293 293
294 294 # start processor
295 295 @pre_list = []
296 296 rip_offtags text
297 297 no_textile text
298 298 escape_html_tags text
299 299 # need to do this before #hard_break and #blocks
300 300 block_textile_quotes text unless @lite_mode
301 301 hard_break text
302 302 unless @lite_mode
303 303 refs text
304 304 blocks text
305 305 end
306 306 inline text
307 307 smooth_offtags text
308 308
309 309 retrieve text
310 310
311 311 text.gsub!( /<\/?notextile>/, '' )
312 312 text.gsub!( /x%x%/, '&#38;' )
313 313 clean_html text if filter_html
314 314 text.strip!
315 315 text
316 316
317 317 end
318 318
319 319 #######
320 320 private
321 321 #######
322 322 #
323 323 # Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
324 324 # (from PyTextile)
325 325 #
326 326 TEXTILE_TAGS =
327 327
328 328 [[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
329 329 [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
330 330 [140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
331 331 [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
332 332 [153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
333 333
334 334 collect! do |a, b|
335 335 [a.chr, ( b.zero? and "" or "&#{ b };" )]
336 336 end
337 337
338 338 #
339 339 # Regular expressions to convert to HTML.
340 340 #
341 341 A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
342 342 A_VLGN = /[\-^~]/
343 343 C_CLAS = '(?:\([^")]+\))'
344 344 C_LNGE = '(?:\[[^"\[\]]+\])'
345 345 C_STYL = '(?:\{[^"}]+\})'
346 346 S_CSPN = '(?:\\\\\d+)'
347 347 S_RSPN = '(?:/\d+)'
348 348 A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
349 349 S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
350 350 C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
351 351 # PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
352 352 PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
353 353 PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
354 354 PUNCT_Q = Regexp::quote( '*-_+^~%' )
355 355 HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
356 356
357 357 # Text markup tags, don't conflict with block tags
358 358 SIMPLE_HTML_TAGS = [
359 359 'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
360 360 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
361 361 'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
362 362 ]
363 363
364 364 QTAGS = [
365 365 ['**', 'b', :limit],
366 366 ['*', 'strong', :limit],
367 367 ['??', 'cite', :limit],
368 368 ['-', 'del', :limit],
369 369 ['__', 'i', :limit],
370 370 ['_', 'em', :limit],
371 371 ['%', 'span', :limit],
372 372 ['+', 'ins', :limit],
373 373 ['^', 'sup', :limit],
374 374 ['~', 'sub', :limit]
375 375 ]
376 376 QTAGS_JOIN = QTAGS.map {|rc, ht, rtype| Regexp::quote rc}.join('|')
377 377
378 378 QTAGS.collect! do |rc, ht, rtype|
379 379 rcq = Regexp::quote rc
380 380 re =
381 381 case rtype
382 382 when :limit
383 383 /(^|[>\s\(]) # sta
384 384 (?!\-\-)
385 385 (#{QTAGS_JOIN}|) # oqs
386 386 (#{rcq}) # qtag
387 387 (\w|[^\s].*?[^\s]) # content
388 388 (?!\-\-)
389 389 #{rcq}
390 390 (#{QTAGS_JOIN}|) # oqa
391 391 (?=[[:punct:]]|<|\s|\)|$)/x
392 392 else
393 393 /(#{rcq})
394 394 (#{C})
395 395 (?::(\S+))?
396 396 (\w|[^\s\-].*?[^\s\-])
397 397 #{rcq}/xm
398 398 end
399 399 [rc, ht, re, rtype]
400 400 end
401 401
402 402 # Elements to handle
403 403 GLYPHS = [
404 404 # [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
405 405 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1&#8217;' ], # single closing
406 406 # [ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '&#8217;' ], # single closing
407 407 # [ /\'/, '&#8216;' ], # single opening
408 408 # [ /</, '&lt;' ], # less-than
409 409 # [ />/, '&gt;' ], # greater-than
410 410 # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
411 411 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1&#8221;' ], # double closing
412 412 # [ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '&#8221;' ], # double closing
413 413 # [ /"/, '&#8220;' ], # double opening
414 414 # [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
415 415 # [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
416 416 # [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
417 417 # [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
418 418 # [ /\s->\s/, ' &rarr; ' ], # right arrow
419 419 # [ /\s-\s/, ' &#8211; ' ], # en dash
420 420 # [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
421 421 # [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
422 422 # [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
423 423 # [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
424 424 ]
425 425
426 426 H_ALGN_VALS = {
427 427 '<' => 'left',
428 428 '=' => 'center',
429 429 '>' => 'right',
430 430 '<>' => 'justify'
431 431 }
432 432
433 433 V_ALGN_VALS = {
434 434 '^' => 'top',
435 435 '-' => 'middle',
436 436 '~' => 'bottom'
437 437 }
438 438
439 439 #
440 440 # Flexible HTML escaping
441 441 #
442 442 def htmlesc( str, mode=:Quotes )
443 443 if str
444 444 str.gsub!( '&', '&amp;' )
445 445 str.gsub!( '"', '&quot;' ) if mode != :NoQuotes
446 446 str.gsub!( "'", '&#039;' ) if mode == :Quotes
447 447 str.gsub!( '<', '&lt;')
448 448 str.gsub!( '>', '&gt;')
449 449 end
450 450 str
451 451 end
452 452
453 453 # Search and replace for Textile glyphs (quotes, dashes, other symbols)
454 454 def pgl( text )
455 455 #GLYPHS.each do |re, resub, tog|
456 456 # next if tog and method( tog ).call
457 457 # text.gsub! re, resub
458 458 #end
459 459 text.gsub!(/\b([A-Z][A-Z0-9]{1,})\b(?:[(]([^)]*)[)])/) do |m|
460 460 "<acronym title=\"#{htmlesc $2}\">#{$1}</acronym>"
461 461 end
462 462 end
463 463
464 464 # Parses Textile attribute lists and builds an HTML attribute string
465 465 def pba( text_in, element = "" )
466 466
467 467 return '' unless text_in
468 468
469 469 style = []
470 470 text = text_in.dup
471 471 if element == 'td'
472 472 colspan = $1 if text =~ /\\(\d+)/
473 473 rowspan = $1 if text =~ /\/(\d+)/
474 474 style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
475 475 end
476 476
477 477 if text.sub!( /\{([^"}]*)\}/, '' ) && !filter_styles
478 478 sanitized = sanitize_styles($1)
479 479 style << "#{ sanitized };" unless sanitized.blank?
480 480 end
481 481
482 482 lang = $1 if
483 483 text.sub!( /\[([^)]+?)\]/, '' )
484 484
485 485 cls = $1 if
486 486 text.sub!( /\(([^()]+?)\)/, '' )
487 487
488 488 style << "padding-left:#{ $1.length }em;" if
489 489 text.sub!( /([(]+)/, '' )
490 490
491 491 style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
492 492
493 493 style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
494 494
495 495 cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
496 496
497 497 atts = ''
498 498 atts << " style=\"#{ style.join }\"" unless style.empty?
499 499 atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
500 500 atts << " lang=\"#{ lang }\"" if lang
501 501 atts << " id=\"#{ id }\"" if id
502 502 atts << " colspan=\"#{ colspan }\"" if colspan
503 503 atts << " rowspan=\"#{ rowspan }\"" if rowspan
504 504
505 505 atts
506 506 end
507 507
508 508 STYLES_RE = /^(color|width|height|border|background|padding|margin|font|text|float)(-[a-z]+)*:\s*((\d+%?|\d+px|\d+(\.\d+)?em|#[0-9a-f]+|[a-z]+)\s*)+$/i
509 509
510 510 def sanitize_styles(str)
511 511 styles = str.split(";").map(&:strip)
512 512 styles.reject! do |style|
513 513 !style.match(STYLES_RE)
514 514 end
515 515 styles.join(";")
516 516 end
517 517
518 518 TABLE_RE = /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
519 519
520 520 # Parses a Textile table block, building HTML from the result.
521 521 def block_textile_table( text )
522 522 text.gsub!( TABLE_RE ) do |matches|
523 523
524 524 tatts, fullrow = $~[1..2]
525 525 tatts = pba( tatts, 'table' )
526 526 tatts = shelve( tatts ) if tatts
527 527 rows = []
528
528 fullrow.gsub!(/([^|])\n/, "\\1<br />")
529 529 fullrow.each_line do |row|
530 530 ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
531 531 cells = []
532 532 row.split( /(\|)(?![^\[\|]*\]\])/ )[1..-2].each do |cell|
533 Rails.logger.debug "cell: #{cell}"
533 534 next if cell == '|'
534 535 ctyp = 'd'
535 536 ctyp = 'h' if cell =~ /^_/
536 537
537 538 catts = ''
538 539 catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
539 540
540 541 catts = shelve( catts ) if catts
541 542 cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
542 543 end
543 544 ratts = shelve( ratts ) if ratts
544 545 rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
545 546 end
546 547 "\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
547 548 end
548 549 end
549 550
550 551 LISTS_RE = /^([#*]+?#{C} .*?)$(?![^#*])/m
551 552 LISTS_CONTENT_RE = /^([#*]+)(#{A}#{C}) (.*)$/m
552 553
553 554 # Parses Textile lists and generates HTML
554 555 def block_textile_lists( text )
555 556 text.gsub!( LISTS_RE ) do |match|
556 557 lines = match.split( /\n/ )
557 558 last_line = -1
558 559 depth = []
559 560 lines.each_with_index do |line, line_id|
560 561 if line =~ LISTS_CONTENT_RE
561 562 tl,atts,content = $~[1..3]
562 563 if depth.last
563 564 if depth.last.length > tl.length
564 565 (depth.length - 1).downto(0) do |i|
565 566 break if depth[i].length == tl.length
566 567 lines[line_id - 1] << "</li>\n\t</#{ lT( depth[i] ) }l>\n\t"
567 568 depth.pop
568 569 end
569 570 end
570 571 if depth.last and depth.last.length == tl.length
571 572 lines[line_id - 1] << '</li>'
572 573 end
573 574 end
574 575 unless depth.last == tl
575 576 depth << tl
576 577 atts = pba( atts )
577 578 atts = shelve( atts ) if atts
578 579 lines[line_id] = "\t<#{ lT(tl) }l#{ atts }>\n\t<li>#{ content }"
579 580 else
580 581 lines[line_id] = "\t\t<li>#{ content }"
581 582 end
582 583 last_line = line_id
583 584
584 585 else
585 586 last_line = line_id
586 587 end
587 588 if line_id - last_line > 1 or line_id == lines.length - 1
588 589 while v = depth.pop
589 590 lines[last_line] << "</li>\n\t</#{ lT( v ) }l>"
590 591 end
591 592 end
592 593 end
593 594 lines.join( "\n" )
594 595 end
595 596 end
596 597
597 598 QUOTES_RE = /(^>+([^\n]*?)(\n|$))+/m
598 599 QUOTES_CONTENT_RE = /^([> ]+)(.*)$/m
599 600
600 601 def block_textile_quotes( text )
601 602 text.gsub!( QUOTES_RE ) do |match|
602 603 lines = match.split( /\n/ )
603 604 quotes = ''
604 605 indent = 0
605 606 lines.each do |line|
606 607 line =~ QUOTES_CONTENT_RE
607 608 bq,content = $1, $2
608 609 l = bq.count('>')
609 610 if l != indent
610 611 quotes << ("\n\n" + (l>indent ? '<blockquote>' * (l-indent) : '</blockquote>' * (indent-l)) + "\n\n")
611 612 indent = l
612 613 end
613 614 quotes << (content + "\n")
614 615 end
615 616 quotes << ("\n" + '</blockquote>' * indent + "\n\n")
616 617 quotes
617 618 end
618 619 end
619 620
620 621 CODE_RE = /(\W)
621 622 @
622 623 (?:\|(\w+?)\|)?
623 624 (.+?)
624 625 @
625 626 (?=\W)/x
626 627
627 628 def inline_textile_code( text )
628 629 text.gsub!( CODE_RE ) do |m|
629 630 before,lang,code,after = $~[1..4]
630 631 lang = " lang=\"#{ lang }\"" if lang
631 632 rip_offtags( "#{ before }<code#{ lang }>#{ code }</code>#{ after }", false )
632 633 end
633 634 end
634 635
635 636 def lT( text )
636 637 text =~ /\#$/ ? 'o' : 'u'
637 638 end
638 639
639 640 def hard_break( text )
640 641 text.gsub!( /(.)\n(?!\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
641 642 end
642 643
643 644 BLOCKS_GROUP_RE = /\n{2,}(?! )/m
644 645
645 646 def blocks( text, deep_code = false )
646 647 text.replace( text.split( BLOCKS_GROUP_RE ).collect do |blk|
647 648 plain = blk !~ /\A[#*> ]/
648 649
649 650 # skip blocks that are complex HTML
650 651 if blk =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
651 652 blk
652 653 else
653 654 # search for indentation levels
654 655 blk.strip!
655 656 if blk.empty?
656 657 blk
657 658 else
658 659 code_blk = nil
659 660 blk.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
660 661 flush_left iblk
661 662 blocks iblk, plain
662 663 iblk.gsub( /^(\S)/, "\t\\1" )
663 664 if plain
664 665 code_blk = iblk; ""
665 666 else
666 667 iblk
667 668 end
668 669 end
669 670
670 671 block_applied = 0
671 672 @rules.each do |rule_name|
672 673 block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( blk ) )
673 674 end
674 675 if block_applied.zero?
675 676 if deep_code
676 677 blk = "\t<pre><code>#{ blk }</code></pre>"
677 678 else
678 679 blk = "\t<p>#{ blk }</p>"
679 680 end
680 681 end
681 682 # hard_break blk
682 683 blk + "\n#{ code_blk }"
683 684 end
684 685 end
685 686
686 687 end.join( "\n\n" ) )
687 688 end
688 689
689 690 def textile_bq( tag, atts, cite, content )
690 691 cite, cite_title = check_refs( cite )
691 692 cite = " cite=\"#{ cite }\"" if cite
692 693 atts = shelve( atts ) if atts
693 694 "\t<blockquote#{ cite }>\n\t\t<p#{ atts }>#{ content }</p>\n\t</blockquote>"
694 695 end
695 696
696 697 def textile_p( tag, atts, cite, content )
697 698 atts = shelve( atts ) if atts
698 699 "\t<#{ tag }#{ atts }>#{ content }</#{ tag }>"
699 700 end
700 701
701 702 alias textile_h1 textile_p
702 703 alias textile_h2 textile_p
703 704 alias textile_h3 textile_p
704 705 alias textile_h4 textile_p
705 706 alias textile_h5 textile_p
706 707 alias textile_h6 textile_p
707 708
708 709 def textile_fn_( tag, num, atts, cite, content )
709 710 atts << " id=\"fn#{ num }\" class=\"footnote\""
710 711 content = "<sup>#{ num }</sup> #{ content }"
711 712 atts = shelve( atts ) if atts
712 713 "\t<p#{ atts }>#{ content }</p>"
713 714 end
714 715
715 716 BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/m
716 717
717 718 def block_textile_prefix( text )
718 719 if text =~ BLOCK_RE
719 720 tag,tagpre,num,atts,cite,content = $~[1..6]
720 721 atts = pba( atts )
721 722
722 723 # pass to prefix handler
723 724 replacement = nil
724 725 if respond_to? "textile_#{ tag }", true
725 726 replacement = method( "textile_#{ tag }" ).call( tag, atts, cite, content )
726 727 elsif respond_to? "textile_#{ tagpre }_", true
727 728 replacement = method( "textile_#{ tagpre }_" ).call( tagpre, num, atts, cite, content )
728 729 end
729 730 text.gsub!( $& ) { replacement } if replacement
730 731 end
731 732 end
732 733
733 734 SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
734 735 def block_markdown_setext( text )
735 736 if text =~ SETEXT_RE
736 737 tag = if $2 == "="; "h1"; else; "h2"; end
737 738 blk, cont = "<#{ tag }>#{ $1 }</#{ tag }>", $'
738 739 blocks cont
739 740 text.replace( blk + cont )
740 741 end
741 742 end
742 743
743 744 ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
744 745 [ ]*
745 746 (.+?) # $2 = Header text
746 747 [ ]*
747 748 \#* # optional closing #'s (not counted)
748 749 $/x
749 750 def block_markdown_atx( text )
750 751 if text =~ ATX_RE
751 752 tag = "h#{ $1.length }"
752 753 blk, cont = "<#{ tag }>#{ $2 }</#{ tag }>\n\n", $'
753 754 blocks cont
754 755 text.replace( blk + cont )
755 756 end
756 757 end
757 758
758 759 MARKDOWN_BQ_RE = /\A(^ *> ?.+$(.+\n)*\n*)+/m
759 760
760 761 def block_markdown_bq( text )
761 762 text.gsub!( MARKDOWN_BQ_RE ) do |blk|
762 763 blk.gsub!( /^ *> ?/, '' )
763 764 flush_left blk
764 765 blocks blk
765 766 blk.gsub!( /^(\S)/, "\t\\1" )
766 767 "<blockquote>\n#{ blk }\n</blockquote>\n\n"
767 768 end
768 769 end
769 770
770 771 MARKDOWN_RULE_RE = /^(#{
771 772 ['*', '-', '_'].collect { |ch| ' ?(' + Regexp::quote( ch ) + ' ?){3,}' }.join( '|' )
772 773 })$/
773 774
774 775 def block_markdown_rule( text )
775 776 text.gsub!( MARKDOWN_RULE_RE ) do |blk|
776 777 "<hr />"
777 778 end
778 779 end
779 780
780 781 # XXX TODO XXX
781 782 def block_markdown_lists( text )
782 783 end
783 784
784 785 def inline_textile_span( text )
785 786 QTAGS.each do |qtag_rc, ht, qtag_re, rtype|
786 787 text.gsub!( qtag_re ) do |m|
787 788
788 789 case rtype
789 790 when :limit
790 791 sta,oqs,qtag,content,oqa = $~[1..6]
791 792 atts = nil
792 793 if content =~ /^(#{C})(.+)$/
793 794 atts, content = $~[1..2]
794 795 end
795 796 else
796 797 qtag,atts,cite,content = $~[1..4]
797 798 sta = ''
798 799 end
799 800 atts = pba( atts )
800 801 atts = shelve( atts ) if atts
801 802
802 803 "#{ sta }#{ oqs }<#{ ht }#{ atts }>#{ content }</#{ ht }>#{ oqa }"
803 804
804 805 end
805 806 end
806 807 end
807 808
808 809 LINK_RE = /
809 810 (
810 811 ([\s\[{(]|[#{PUNCT}])? # $pre
811 812 " # start
812 813 (#{C}) # $atts
813 814 ([^"\n]+?) # $text
814 815 \s?
815 816 (?:\(([^)]+?)\)(?="))? # $title
816 817 ":
817 818 ( # $url
818 819 (\/|[a-zA-Z]+:\/\/|www\.|mailto:) # $proto
819 820 [[:alnum:]_\/]\S+?
820 821 )
821 822 (\/)? # $slash
822 823 ([^[:alnum:]_\=\/;\(\)]*?) # $post
823 824 )
824 825 (?=<|\s|$)
825 826 /x
826 827 #"
827 828 def inline_textile_link( text )
828 829 text.gsub!( LINK_RE ) do |m|
829 830 all,pre,atts,text,title,url,proto,slash,post = $~[1..9]
830 831 if text.include?('<br />')
831 832 all
832 833 else
833 834 url, url_title = check_refs( url )
834 835 title ||= url_title
835 836
836 837 # Idea below : an URL with unbalanced parethesis and
837 838 # ending by ')' is put into external parenthesis
838 839 if ( url[-1]==?) and ((url.count("(") - url.count(")")) < 0 ) )
839 840 url=url[0..-2] # discard closing parenth from url
840 841 post = ")"+post # add closing parenth to post
841 842 end
842 843 atts = pba( atts )
843 844 atts = " href=\"#{ htmlesc url }#{ slash }\"#{ atts }"
844 845 atts << " title=\"#{ htmlesc title }\"" if title
845 846 atts = shelve( atts ) if atts
846 847
847 848 external = (url =~ /^https?:\/\//) ? ' class="external"' : ''
848 849
849 850 "#{ pre }<a#{ atts }#{ external }>#{ text }</a>#{ post }"
850 851 end
851 852 end
852 853 end
853 854
854 855 MARKDOWN_REFLINK_RE = /
855 856 \[([^\[\]]+)\] # $text
856 857 [ ]? # opt. space
857 858 (?:\n[ ]*)? # one optional newline followed by spaces
858 859 \[(.*?)\] # $id
859 860 /x
860 861
861 862 def inline_markdown_reflink( text )
862 863 text.gsub!( MARKDOWN_REFLINK_RE ) do |m|
863 864 text, id = $~[1..2]
864 865
865 866 if id.empty?
866 867 url, title = check_refs( text )
867 868 else
868 869 url, title = check_refs( id )
869 870 end
870 871
871 872 atts = " href=\"#{ url }\""
872 873 atts << " title=\"#{ title }\"" if title
873 874 atts = shelve( atts )
874 875
875 876 "<a#{ atts }>#{ text }</a>"
876 877 end
877 878 end
878 879
879 880 MARKDOWN_LINK_RE = /
880 881 \[([^\[\]]+)\] # $text
881 882 \( # open paren
882 883 [ \t]* # opt space
883 884 <?(.+?)>? # $href
884 885 [ \t]* # opt space
885 886 (?: # whole title
886 887 (['"]) # $quote
887 888 (.*?) # $title
888 889 \3 # matching quote
889 890 )? # title is optional
890 891 \)
891 892 /x
892 893
893 894 def inline_markdown_link( text )
894 895 text.gsub!( MARKDOWN_LINK_RE ) do |m|
895 896 text, url, quote, title = $~[1..4]
896 897
897 898 atts = " href=\"#{ url }\""
898 899 atts << " title=\"#{ title }\"" if title
899 900 atts = shelve( atts )
900 901
901 902 "<a#{ atts }>#{ text }</a>"
902 903 end
903 904 end
904 905
905 906 TEXTILE_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
906 907 MARKDOWN_REFS_RE = /(^ *)\[([^\n]+?)\]:\s+<?(#{HYPERLINK})>?(?:\s+"((?:[^"]|\\")+)")?(?=\s|$)/m
907 908
908 909 def refs( text )
909 910 @rules.each do |rule_name|
910 911 method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
911 912 end
912 913 end
913 914
914 915 def refs_textile( text )
915 916 text.gsub!( TEXTILE_REFS_RE ) do |m|
916 917 flag, url = $~[2..3]
917 918 @urlrefs[flag.downcase] = [url, nil]
918 919 nil
919 920 end
920 921 end
921 922
922 923 def refs_markdown( text )
923 924 text.gsub!( MARKDOWN_REFS_RE ) do |m|
924 925 flag, url = $~[2..3]
925 926 title = $~[6]
926 927 @urlrefs[flag.downcase] = [url, title]
927 928 nil
928 929 end
929 930 end
930 931
931 932 def check_refs( text )
932 933 ret = @urlrefs[text.downcase] if text
933 934 ret || [text, nil]
934 935 end
935 936
936 937 IMAGE_RE = /
937 938 (>|\s|^) # start of line?
938 939 \! # opening
939 940 (\<|\=|\>)? # optional alignment atts
940 941 (#{C}) # optional style,class atts
941 942 (?:\. )? # optional dot-space
942 943 ([^\s(!]+?) # presume this is the src
943 944 \s? # optional space
944 945 (?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
945 946 \! # closing
946 947 (?::#{ HYPERLINK })? # optional href
947 948 /x
948 949
949 950 def inline_textile_image( text )
950 951 text.gsub!( IMAGE_RE ) do |m|
951 952 stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
952 953 htmlesc title
953 954 atts = pba( atts )
954 955 atts = " src=\"#{ htmlesc url.dup }\"#{ atts }"
955 956 atts << " title=\"#{ title }\"" if title
956 957 atts << " alt=\"#{ title }\""
957 958 # size = @getimagesize($url);
958 959 # if($size) $atts.= " $size[3]";
959 960
960 961 href, alt_title = check_refs( href ) if href
961 962 url, url_title = check_refs( url )
962 963
963 964 out = ''
964 965 out << "<a#{ shelve( " href=\"#{ href }\"" ) }>" if href
965 966 out << "<img#{ shelve( atts ) } />"
966 967 out << "</a>#{ href_a1 }#{ href_a2 }" if href
967 968
968 969 if algn
969 970 algn = h_align( algn )
970 971 if stln == "<p>"
971 972 out = "<p style=\"float:#{ algn }\">#{ out }"
972 973 else
973 974 out = "#{ stln }<div style=\"float:#{ algn }\">#{ out }</div>"
974 975 end
975 976 else
976 977 out = stln + out
977 978 end
978 979
979 980 out
980 981 end
981 982 end
982 983
983 984 def shelve( val )
984 985 @shelf << val
985 986 " :redsh##{ @shelf.length }:"
986 987 end
987 988
988 989 def retrieve( text )
989 990 @shelf.each_with_index do |r, i|
990 991 text.gsub!( " :redsh##{ i + 1 }:", r )
991 992 end
992 993 end
993 994
994 995 def incoming_entities( text )
995 996 ## turn any incoming ampersands into a dummy character for now.
996 997 ## This uses a negative lookahead for alphanumerics followed by a semicolon,
997 998 ## implying an incoming html entity, to be skipped
998 999
999 1000 text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
1000 1001 end
1001 1002
1002 1003 def no_textile( text )
1003 1004 text.gsub!( /(^|\s)==([^=]+.*?)==(\s|$)?/,
1004 1005 '\1<notextile>\2</notextile>\3' )
1005 1006 text.gsub!( /^ *==([^=]+.*?)==/m,
1006 1007 '\1<notextile>\2</notextile>\3' )
1007 1008 end
1008 1009
1009 1010 def clean_white_space( text )
1010 1011 # normalize line breaks
1011 1012 text.gsub!( /\r\n/, "\n" )
1012 1013 text.gsub!( /\r/, "\n" )
1013 1014 text.gsub!( /\t/, ' ' )
1014 1015 text.gsub!( /^ +$/, '' )
1015 1016 text.gsub!( /\n{3,}/, "\n\n" )
1016 1017 text.gsub!( /"$/, "\" " )
1017 1018
1018 1019 # if entire document is indented, flush
1019 1020 # to the left side
1020 1021 flush_left text
1021 1022 end
1022 1023
1023 1024 def flush_left( text )
1024 1025 indt = 0
1025 1026 if text =~ /^ /
1026 1027 while text !~ /^ {#{indt}}\S/
1027 1028 indt += 1
1028 1029 end unless text.empty?
1029 1030 if indt.nonzero?
1030 1031 text.gsub!( /^ {#{indt}}/, '' )
1031 1032 end
1032 1033 end
1033 1034 end
1034 1035
1035 1036 def footnote_ref( text )
1036 1037 text.gsub!( /\b\[([0-9]+?)\](\s)?/,
1037 1038 '<sup><a href="#fn\1">\1</a></sup>\2' )
1038 1039 end
1039 1040
1040 1041 OFFTAGS = /(code|pre|kbd|notextile)/
1041 1042 OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }\W|\Z)/mi
1042 1043 OFFTAG_OPEN = /<#{ OFFTAGS }/
1043 1044 OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
1044 1045 HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
1045 1046 ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
1046 1047
1047 1048 def glyphs_textile( text, level = 0 )
1048 1049 if text !~ HASTAG_MATCH
1049 1050 pgl text
1050 1051 footnote_ref text
1051 1052 else
1052 1053 codepre = 0
1053 1054 text.gsub!( ALLTAG_MATCH ) do |line|
1054 1055 ## matches are off if we're between <code>, <pre> etc.
1055 1056 if $1
1056 1057 if line =~ OFFTAG_OPEN
1057 1058 codepre += 1
1058 1059 elsif line =~ OFFTAG_CLOSE
1059 1060 codepre -= 1
1060 1061 codepre = 0 if codepre < 0
1061 1062 end
1062 1063 elsif codepre.zero?
1063 1064 glyphs_textile( line, level + 1 )
1064 1065 else
1065 1066 htmlesc( line, :NoQuotes )
1066 1067 end
1067 1068 # p [level, codepre, line]
1068 1069
1069 1070 line
1070 1071 end
1071 1072 end
1072 1073 end
1073 1074
1074 1075 def rip_offtags( text, escape_aftertag=true, escape_line=true )
1075 1076 if text =~ /<.*>/
1076 1077 ## strip and encode <pre> content
1077 1078 codepre, used_offtags = 0, {}
1078 1079 text.gsub!( OFFTAG_MATCH ) do |line|
1079 1080 if $3
1080 1081 first, offtag, aftertag = $3, $4, $5
1081 1082 codepre += 1
1082 1083 used_offtags[offtag] = true
1083 1084 if codepre - used_offtags.length > 0
1084 1085 htmlesc( line, :NoQuotes ) if escape_line
1085 1086 @pre_list.last << line
1086 1087 line = ""
1087 1088 else
1088 1089 ### htmlesc is disabled between CODE tags which will be parsed with highlighter
1089 1090 ### Regexp in formatter.rb is : /<code\s+class="(\w+)">\s?(.+)/m
1090 1091 ### NB: some changes were made not to use $N variables, because we use "match"
1091 1092 ### and it breaks following lines
1092 1093 htmlesc( aftertag, :NoQuotes ) if aftertag && escape_aftertag && !first.match(/<code\s+class="(\w+)">/)
1093 1094 line = "<redpre##{ @pre_list.length }>"
1094 1095 first.match(/<#{ OFFTAGS }([^>]*)>/)
1095 1096 tag = $1
1096 1097 $2.to_s.match(/(class\=("[^"]+"|'[^']+'))/i)
1097 1098 tag << " #{$1}" if $1
1098 1099 @pre_list << "<#{ tag }>#{ aftertag }"
1099 1100 end
1100 1101 elsif $1 and codepre > 0
1101 1102 if codepre - used_offtags.length > 0
1102 1103 htmlesc( line, :NoQuotes ) if escape_line
1103 1104 @pre_list.last << line
1104 1105 line = ""
1105 1106 end
1106 1107 codepre -= 1 unless codepre.zero?
1107 1108 used_offtags = {} if codepre.zero?
1108 1109 end
1109 1110 line
1110 1111 end
1111 1112 end
1112 1113 text
1113 1114 end
1114 1115
1115 1116 def smooth_offtags( text )
1116 1117 unless @pre_list.empty?
1117 1118 ## replace <pre> content
1118 1119 text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
1119 1120 end
1120 1121 end
1121 1122
1122 1123 def inline( text )
1123 1124 [/^inline_/, /^glyphs_/].each do |meth_re|
1124 1125 @rules.each do |rule_name|
1125 1126 method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
1126 1127 end
1127 1128 end
1128 1129 end
1129 1130
1130 1131 def h_align( text )
1131 1132 H_ALGN_VALS[text]
1132 1133 end
1133 1134
1134 1135 def v_align( text )
1135 1136 V_ALGN_VALS[text]
1136 1137 end
1137 1138
1138 1139 def textile_popup_help( name, windowW, windowH )
1139 1140 ' <a target="_blank" href="http://hobix.com/textile/#' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
1140 1141 end
1141 1142
1142 1143 # HTML cleansing stuff
1143 1144 BASIC_TAGS = {
1144 1145 'a' => ['href', 'title'],
1145 1146 'img' => ['src', 'alt', 'title'],
1146 1147 'br' => [],
1147 1148 'i' => nil,
1148 1149 'u' => nil,
1149 1150 'b' => nil,
1150 1151 'pre' => nil,
1151 1152 'kbd' => nil,
1152 1153 'code' => ['lang'],
1153 1154 'cite' => nil,
1154 1155 'strong' => nil,
1155 1156 'em' => nil,
1156 1157 'ins' => nil,
1157 1158 'sup' => nil,
1158 1159 'sub' => nil,
1159 1160 'del' => nil,
1160 1161 'table' => nil,
1161 1162 'tr' => nil,
1162 1163 'td' => ['colspan', 'rowspan'],
1163 1164 'th' => nil,
1164 1165 'ol' => nil,
1165 1166 'ul' => nil,
1166 1167 'li' => nil,
1167 1168 'p' => nil,
1168 1169 'h1' => nil,
1169 1170 'h2' => nil,
1170 1171 'h3' => nil,
1171 1172 'h4' => nil,
1172 1173 'h5' => nil,
1173 1174 'h6' => nil,
1174 1175 'blockquote' => ['cite']
1175 1176 }
1176 1177
1177 1178 def clean_html( text, tags = BASIC_TAGS )
1178 1179 text.gsub!( /<!\[CDATA\[/, '' )
1179 1180 text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
1180 1181 raw = $~
1181 1182 tag = raw[2].downcase
1182 1183 if tags.has_key? tag
1183 1184 pcs = [tag]
1184 1185 tags[tag].each do |prop|
1185 1186 ['"', "'", ''].each do |q|
1186 1187 q2 = ( q != '' ? q : '\s' )
1187 1188 if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
1188 1189 attrv = $1
1189 1190 next if prop == 'src' and attrv =~ %r{^(?!http)\w+:}
1190 1191 pcs << "#{prop}=\"#{$1.gsub('"', '\\"')}\""
1191 1192 break
1192 1193 end
1193 1194 end
1194 1195 end if tags[tag]
1195 1196 "<#{raw[1]}#{pcs.join " "}>"
1196 1197 else
1197 1198 " "
1198 1199 end
1199 1200 end
1200 1201 end
1201 1202
1202 1203 ALLOWED_TAGS = %w(redpre pre code notextile)
1203 1204
1204 1205 def escape_html_tags(text)
1205 1206 text.gsub!(%r{<(\/?([!\w]+)[^<>\n]*)(>?)}) {|m| ALLOWED_TAGS.include?($2) ? "<#{$1}#{$3}" : "&lt;#{$1}#{'&gt;' unless $3.blank?}" }
1206 1207 end
1207 1208 end
1208 1209
@@ -1,456 +1,492
1 1 # Redmine - project management software
2 2 # Copyright (C) 2006-2013 Jean-Philippe Lang
3 3 #
4 4 # This program is free software; you can redistribute it and/or
5 5 # modify it under the terms of the GNU General Public License
6 6 # as published by the Free Software Foundation; either version 2
7 7 # of the License, or (at your option) any later version.
8 8 #
9 9 # This program is distributed in the hope that it will be useful,
10 10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 12 # GNU General Public License for more details.
13 13 #
14 14 # You should have received a copy of the GNU General Public License
15 15 # along with this program; if not, write to the Free Software
16 16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 17
18 18 require File.expand_path('../../../../../test_helper', __FILE__)
19 19 require 'digest/md5'
20 20
21 21 class Redmine::WikiFormatting::TextileFormatterTest < ActionView::TestCase
22 22
23 23 def setup
24 24 @formatter = Redmine::WikiFormatting::Textile::Formatter
25 25 end
26 26
27 27 MODIFIERS = {
28 28 "*" => 'strong', # bold
29 29 "_" => 'em', # italic
30 30 "+" => 'ins', # underline
31 31 "-" => 'del', # deleted
32 32 "^" => 'sup', # superscript
33 33 "~" => 'sub' # subscript
34 34 }
35 35
36 36 def test_modifiers
37 37 assert_html_output(
38 38 '*bold*' => '<strong>bold</strong>',
39 39 'before *bold*' => 'before <strong>bold</strong>',
40 40 '*bold* after' => '<strong>bold</strong> after',
41 41 '*two words*' => '<strong>two words</strong>',
42 42 '*two*words*' => '<strong>two*words</strong>',
43 43 '*two * words*' => '<strong>two * words</strong>',
44 44 '*two* *words*' => '<strong>two</strong> <strong>words</strong>',
45 45 '*(two)* *(words)*' => '<strong>(two)</strong> <strong>(words)</strong>',
46 46 # with class
47 47 '*(foo)two words*' => '<strong class="foo">two words</strong>'
48 48 )
49 49 end
50 50
51 51 def test_modifiers_combination
52 52 MODIFIERS.each do |m1, tag1|
53 53 MODIFIERS.each do |m2, tag2|
54 54 next if m1 == m2
55 55 text = "#{m2}#{m1}Phrase modifiers#{m1}#{m2}"
56 56 html = "<#{tag2}><#{tag1}>Phrase modifiers</#{tag1}></#{tag2}>"
57 57 assert_html_output text => html
58 58 end
59 59 end
60 60 end
61 61
62 62 def test_styles
63 63 # single style
64 64 assert_html_output({
65 65 'p{color:red}. text' => '<p style="color:red;">text</p>',
66 66 'p{color:red;}. text' => '<p style="color:red;">text</p>',
67 67 'p{color: red}. text' => '<p style="color: red;">text</p>',
68 68 'p{color:#f00}. text' => '<p style="color:#f00;">text</p>',
69 69 'p{color:#ff0000}. text' => '<p style="color:#ff0000;">text</p>',
70 70 'p{border:10px}. text' => '<p style="border:10px;">text</p>',
71 71 'p{border:10}. text' => '<p style="border:10;">text</p>',
72 72 'p{border:10%}. text' => '<p style="border:10%;">text</p>',
73 73 'p{border:10em}. text' => '<p style="border:10em;">text</p>',
74 74 'p{border:1.5em}. text' => '<p style="border:1.5em;">text</p>',
75 75 'p{border-left:1px}. text' => '<p style="border-left:1px;">text</p>',
76 76 'p{border-right:1px}. text' => '<p style="border-right:1px;">text</p>',
77 77 'p{border-top:1px}. text' => '<p style="border-top:1px;">text</p>',
78 78 'p{border-bottom:1px}. text' => '<p style="border-bottom:1px;">text</p>',
79 79 }, false)
80 80
81 81 # multiple styles
82 82 assert_html_output({
83 83 'p{color:red; border-top:1px}. text' => '<p style="color:red;border-top:1px;">text</p>',
84 84 'p{color:red ; border-top:1px}. text' => '<p style="color:red;border-top:1px;">text</p>',
85 85 'p{color:red;border-top:1px}. text' => '<p style="color:red;border-top:1px;">text</p>',
86 86 }, false)
87 87
88 88 # styles with multiple values
89 89 assert_html_output({
90 90 'p{border:1px solid red;}. text' => '<p style="border:1px solid red;">text</p>',
91 91 'p{border-top-left-radius: 10px 5px;}. text' => '<p style="border-top-left-radius: 10px 5px;">text</p>',
92 92 }, false)
93 93 end
94 94
95 95 def test_invalid_styles_should_be_filtered
96 96 assert_html_output({
97 97 'p{invalid}. text' => '<p>text</p>',
98 98 'p{invalid:red}. text' => '<p>text</p>',
99 99 'p{color:(red)}. text' => '<p>text</p>',
100 100 'p{color:red;invalid:blue}. text' => '<p style="color:red;">text</p>',
101 101 'p{invalid:blue;color:red}. text' => '<p style="color:red;">text</p>',
102 102 'p{color:"}. text' => '<p>p{color:"}. text</p>',
103 103 }, false)
104 104 end
105 105
106 106 def test_inline_code
107 107 assert_html_output(
108 108 'this is @some code@' => 'this is <code>some code</code>',
109 109 '@<Location /redmine>@' => '<code>&lt;Location /redmine&gt;</code>'
110 110 )
111 111 end
112 112
113 113 def test_nested_lists
114 114 raw = <<-RAW
115 115 # Item 1
116 116 # Item 2
117 117 ** Item 2a
118 118 ** Item 2b
119 119 # Item 3
120 120 ** Item 3a
121 121 RAW
122 122
123 123 expected = <<-EXPECTED
124 124 <ol>
125 125 <li>Item 1</li>
126 126 <li>Item 2
127 127 <ul>
128 128 <li>Item 2a</li>
129 129 <li>Item 2b</li>
130 130 </ul>
131 131 </li>
132 132 <li>Item 3
133 133 <ul>
134 134 <li>Item 3a</li>
135 135 </ul>
136 136 </li>
137 137 </ol>
138 138 EXPECTED
139 139
140 140 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
141 141 end
142 142
143 143 def test_escaping
144 144 assert_html_output(
145 145 'this is a <script>' => 'this is a &lt;script&gt;'
146 146 )
147 147 end
148 148
149 149 def test_use_of_backslashes_followed_by_numbers_in_headers
150 150 assert_html_output({
151 151 'h1. 2009\02\09' => '<h1>2009\02\09</h1>'
152 152 }, false)
153 153 end
154 154
155 155 def test_double_dashes_should_not_strikethrough
156 156 assert_html_output(
157 157 'double -- dashes -- test' => 'double -- dashes -- test',
158 158 'double -- *dashes* -- test' => 'double -- <strong>dashes</strong> -- test'
159 159 )
160 160 end
161 161
162 162 def test_acronyms
163 163 assert_html_output(
164 164 'this is an acronym: GPL(General Public License)' => 'this is an acronym: <acronym title="General Public License">GPL</acronym>',
165 165 '2 letters JP(Jean-Philippe) acronym' => '2 letters <acronym title="Jean-Philippe">JP</acronym> acronym',
166 166 'GPL(This is a double-quoted "title")' => '<acronym title="This is a double-quoted &quot;title&quot;">GPL</acronym>'
167 167 )
168 168 end
169 169
170 170 def test_blockquote
171 171 # orig raw text
172 172 raw = <<-RAW
173 173 John said:
174 174 > Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.
175 175 > Nullam commodo metus accumsan nulla. Curabitur lobortis dui id dolor.
176 176 > * Donec odio lorem,
177 177 > * sagittis ac,
178 178 > * malesuada in,
179 179 > * adipiscing eu, dolor.
180 180 >
181 181 > >Nulla varius pulvinar diam. Proin id arcu id lorem scelerisque condimentum. Proin vehicula turpis vitae lacus.
182 182 > Proin a tellus. Nam vel neque.
183 183
184 184 He's right.
185 185 RAW
186 186
187 187 # expected html
188 188 expected = <<-EXPECTED
189 189 <p>John said:</p>
190 190 <blockquote>
191 191 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.<br />
192 192 Nullam commodo metus accumsan nulla. Curabitur lobortis dui id dolor.
193 193 <ul>
194 194 <li>Donec odio lorem,</li>
195 195 <li>sagittis ac,</li>
196 196 <li>malesuada in,</li>
197 197 <li>adipiscing eu, dolor.</li>
198 198 </ul>
199 199 <blockquote>
200 200 <p>Nulla varius pulvinar diam. Proin id arcu id lorem scelerisque condimentum. Proin vehicula turpis vitae lacus.</p>
201 201 </blockquote>
202 202 <p>Proin a tellus. Nam vel neque.</p>
203 203 </blockquote>
204 204 <p>He's right.</p>
205 205 EXPECTED
206 206
207 207 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
208 208 end
209 209
210 210 def test_table
211 211 raw = <<-RAW
212 212 This is a table with empty cells:
213 213
214 214 |cell11|cell12||
215 215 |cell21||cell23|
216 216 |cell31|cell32|cell33|
217 217 RAW
218 218
219 219 expected = <<-EXPECTED
220 220 <p>This is a table with empty cells:</p>
221 221
222 222 <table>
223 223 <tr><td>cell11</td><td>cell12</td><td></td></tr>
224 224 <tr><td>cell21</td><td></td><td>cell23</td></tr>
225 225 <tr><td>cell31</td><td>cell32</td><td>cell33</td></tr>
226 226 </table>
227 227 EXPECTED
228 228
229 229 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
230 230 end
231 231
232 232 def test_table_with_line_breaks
233 233 raw = <<-RAW
234 234 This is a table with line breaks:
235 235
236 236 |cell11
237 237 continued|cell12||
238 238 |-cell21-||cell23
239 239 cell23 line2
240 240 cell23 *line3*|
241 241 |cell31|cell32
242 242 cell32 line2|cell33|
243 243
244 244 RAW
245 245
246 246 expected = <<-EXPECTED
247 247 <p>This is a table with line breaks:</p>
248 248
249 249 <table>
250 250 <tr>
251 251 <td>cell11<br />continued</td>
252 252 <td>cell12</td>
253 253 <td></td>
254 254 </tr>
255 255 <tr>
256 256 <td><del>cell21</del></td>
257 257 <td></td>
258 258 <td>cell23<br/>cell23 line2<br/>cell23 <strong>line3</strong></td>
259 259 </tr>
260 260 <tr>
261 261 <td>cell31</td>
262 262 <td>cell32<br/>cell32 line2</td>
263 263 <td>cell33</td>
264 264 </tr>
265 265 </table>
266 266 EXPECTED
267 267
268 268 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
269 269 end
270 270
271 def test_tables_with_lists
272 raw = <<-RAW
273 This is a table with lists:
274
275 |cell11|cell12|
276 |cell21|ordered list
277 # item
278 # item 2|
279 |cell31|unordered list
280 * item
281 * item 2|
282
283 RAW
284
285 expected = <<-EXPECTED
286 <p>This is a table with lists:</p>
287
288 <table>
289 <tr>
290 <td>cell11</td>
291 <td>cell12</td>
292 </tr>
293 <tr>
294 <td>cell21</td>
295 <td>ordered list<br /># item<br /># item 2</td>
296 </tr>
297 <tr>
298 <td>cell31</td>
299 <td>unordered list<br />* item<br />* item 2</td>
300 </tr>
301 </table>
302 EXPECTED
303
304 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
305 end
306
271 307 def test_textile_should_not_mangle_brackets
272 308 assert_equal '<p>[msg1][msg2]</p>', to_html('[msg1][msg2]')
273 309 end
274 310
275 311 def test_textile_should_escape_image_urls
276 312 # this is onclick="alert('XSS');" in encoded form
277 313 raw = '!/images/comment.png"onclick=&#x61;&#x6c;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29;;&#x22;!'
278 314 expected = '<p><img src="/images/comment.png&quot;onclick=&amp;#x61;&amp;#x6c;&amp;#x65;&amp;#x72;&amp;#x74;&amp;#x28;&amp;#x27;&amp;#x58;&amp;#x53;&amp;#x53;&amp;#x27;&amp;#x29;;&amp;#x22;" alt="" /></p>'
279 315 assert_equal expected.gsub(%r{\s+}, ''), to_html(raw).gsub(%r{\s+}, '')
280 316 end
281 317
282 318
283 319 STR_WITHOUT_PRE = [
284 320 # 0
285 321 "h1. Title
286 322
287 323 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.",
288 324 # 1
289 325 "h2. Heading 2
290 326
291 327 Maecenas sed elit sit amet mi accumsan vestibulum non nec velit. Proin porta tincidunt lorem, consequat rhoncus dolor fermentum in.
292 328
293 329 Cras ipsum felis, ultrices at porttitor vel, faucibus eu nunc.",
294 330 # 2
295 331 "h2. Heading 2
296 332
297 333 Morbi facilisis accumsan orci non pharetra.
298 334
299 335 h3. Heading 3
300 336
301 337 Nulla nunc nisi, egestas in ornare vel, posuere ac libero.",
302 338 # 3
303 339 "h3. Heading 3
304 340
305 341 Praesent eget turpis nibh, a lacinia nulla.",
306 342 # 4
307 343 "h2. Heading 2
308 344
309 345 Ut rhoncus elementum adipiscing."]
310 346
311 347 TEXT_WITHOUT_PRE = STR_WITHOUT_PRE.join("\n\n").freeze
312 348
313 349 def test_get_section_should_return_the_requested_section_and_its_hash
314 350 assert_section_with_hash STR_WITHOUT_PRE[1], TEXT_WITHOUT_PRE, 2
315 351 assert_section_with_hash STR_WITHOUT_PRE[2..3].join("\n\n"), TEXT_WITHOUT_PRE, 3
316 352 assert_section_with_hash STR_WITHOUT_PRE[3], TEXT_WITHOUT_PRE, 5
317 353 assert_section_with_hash STR_WITHOUT_PRE[4], TEXT_WITHOUT_PRE, 6
318 354
319 355 assert_section_with_hash '', TEXT_WITHOUT_PRE, 0
320 356 assert_section_with_hash '', TEXT_WITHOUT_PRE, 10
321 357 end
322 358
323 359 def test_update_section_should_update_the_requested_section
324 360 replacement = "New text"
325 361
326 362 assert_equal [STR_WITHOUT_PRE[0], replacement, STR_WITHOUT_PRE[2..4]].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(2, replacement)
327 363 assert_equal [STR_WITHOUT_PRE[0..1], replacement, STR_WITHOUT_PRE[4]].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(3, replacement)
328 364 assert_equal [STR_WITHOUT_PRE[0..2], replacement, STR_WITHOUT_PRE[4]].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(5, replacement)
329 365 assert_equal [STR_WITHOUT_PRE[0..3], replacement].flatten.join("\n\n"), @formatter.new(TEXT_WITHOUT_PRE).update_section(6, replacement)
330 366
331 367 assert_equal TEXT_WITHOUT_PRE, @formatter.new(TEXT_WITHOUT_PRE).update_section(0, replacement)
332 368 assert_equal TEXT_WITHOUT_PRE, @formatter.new(TEXT_WITHOUT_PRE).update_section(10, replacement)
333 369 end
334 370
335 371 def test_update_section_with_hash_should_update_the_requested_section
336 372 replacement = "New text"
337 373
338 374 assert_equal [STR_WITHOUT_PRE[0], replacement, STR_WITHOUT_PRE[2..4]].flatten.join("\n\n"),
339 375 @formatter.new(TEXT_WITHOUT_PRE).update_section(2, replacement, Digest::MD5.hexdigest(STR_WITHOUT_PRE[1]))
340 376 end
341 377
342 378 def test_update_section_with_wrong_hash_should_raise_an_error
343 379 assert_raise Redmine::WikiFormatting::StaleSectionError do
344 380 @formatter.new(TEXT_WITHOUT_PRE).update_section(2, "New text", Digest::MD5.hexdigest("Old text"))
345 381 end
346 382 end
347 383
348 384 STR_WITH_PRE = [
349 385 # 0
350 386 "h1. Title
351 387
352 388 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas sed libero.",
353 389 # 1
354 390 "h2. Heading 2
355 391
356 392 <pre><code class=\"ruby\">
357 393 def foo
358 394 end
359 395 </code></pre>
360 396
361 397 <pre><code><pre><code class=\"ruby\">
362 398 Place your code here.
363 399 </code></pre>
364 400 </code></pre>
365 401
366 402 Morbi facilisis accumsan orci non pharetra.
367 403
368 404 <pre>
369 405 Pre Content:
370 406
371 407 h2. Inside pre
372 408
373 409 <tag> inside pre block
374 410
375 411 Morbi facilisis accumsan orci non pharetra.
376 412 </pre>",
377 413 # 2
378 414 "h3. Heading 3
379 415
380 416 Nulla nunc nisi, egestas in ornare vel, posuere ac libero."]
381 417
382 418 def test_get_section_should_ignore_pre_content
383 419 text = STR_WITH_PRE.join("\n\n")
384 420
385 421 assert_section_with_hash STR_WITH_PRE[1..2].join("\n\n"), text, 2
386 422 assert_section_with_hash STR_WITH_PRE[2], text, 3
387 423 end
388 424
389 425 def test_update_section_should_not_escape_pre_content_outside_section
390 426 text = STR_WITH_PRE.join("\n\n")
391 427 replacement = "New text"
392 428
393 429 assert_equal [STR_WITH_PRE[0..1], "New text"].flatten.join("\n\n"),
394 430 @formatter.new(text).update_section(3, replacement)
395 431 end
396 432
397 433 def test_get_section_should_support_lines_with_spaces_before_heading
398 434 # the lines after Content 2 and Heading 4 contain a space
399 435 text = <<-STR
400 436 h1. Heading 1
401 437
402 438 Content 1
403 439
404 440 h1. Heading 2
405 441
406 442 Content 2
407 443
408 444 h1. Heading 3
409 445
410 446 Content 3
411 447
412 448 h1. Heading 4
413 449
414 450 Content 4
415 451 STR
416 452
417 453 [1, 2, 3, 4].each do |index|
418 454 assert_match /\Ah1. Heading #{index}.+Content #{index}/m, @formatter.new(text).get_section(index).first
419 455 end
420 456 end
421 457
422 458 def test_get_section_should_support_headings_starting_with_a_tab
423 459 text = <<-STR
424 460 h1.\tHeading 1
425 461
426 462 Content 1
427 463
428 464 h1. Heading 2
429 465
430 466 Content 2
431 467 STR
432 468
433 469 assert_match /\Ah1.\tHeading 1\s+Content 1\z/, @formatter.new(text).get_section(1).first
434 470 end
435 471
436 472 private
437 473
438 474 def assert_html_output(to_test, expect_paragraph = true)
439 475 to_test.each do |text, expected|
440 476 assert_equal(( expect_paragraph ? "<p>#{expected}</p>" : expected ), @formatter.new(text).to_html, "Formatting the following text failed:\n===\n#{text}\n===\n")
441 477 end
442 478 end
443 479
444 480 def to_html(text)
445 481 @formatter.new(text).to_html
446 482 end
447 483
448 484 def assert_section_with_hash(expected, text, index)
449 485 result = @formatter.new(text).get_section(index)
450 486
451 487 assert_kind_of Array, result
452 488 assert_equal 2, result.size
453 489 assert_equal expected, result.first, "section content did not match"
454 490 assert_equal Digest::MD5.hexdigest(expected), result.last, "section hash did not match"
455 491 end
456 492 end
General Comments 0
You need to be logged in to leave comments. Login now