##// END OF EJS Templates
Stricter textile links parsing (#2417)....
Jean-Philippe Lang -
r2210:8b7fb7213f0e
parent child
Show More
@@ -1,1174 +1,1177
1 1 # vim:ts=4:sw=4:
2 2 # = RedCloth - Textile and Markdown Hybrid for Ruby
3 3 #
4 4 # Homepage:: http://whytheluckystiff.net/ruby/redcloth/
5 5 # Author:: why the lucky stiff (http://whytheluckystiff.net/)
6 6 # Copyright:: (cc) 2004 why the lucky stiff (and his puppet organizations.)
7 7 # License:: BSD
8 8 #
9 9 # (see http://hobix.com/textile/ for a Textile Reference.)
10 10 #
11 11 # Based on (and also inspired by) both:
12 12 #
13 13 # PyTextile: http://diveintomark.org/projects/textile/textile.py.txt
14 14 # Textism for PHP: http://www.textism.com/tools/textile/
15 15 #
16 16 #
17 17
18 18 # = RedCloth
19 19 #
20 20 # RedCloth is a Ruby library for converting Textile and/or Markdown
21 21 # into HTML. You can use either format, intermingled or separately.
22 22 # You can also extend RedCloth to honor your own custom text stylings.
23 23 #
24 24 # RedCloth users are encouraged to use Textile if they are generating
25 25 # HTML and to use Markdown if others will be viewing the plain text.
26 26 #
27 27 # == What is Textile?
28 28 #
29 29 # Textile is a simple formatting style for text
30 30 # documents, loosely based on some HTML conventions.
31 31 #
32 32 # == Sample Textile Text
33 33 #
34 34 # h2. This is a title
35 35 #
36 36 # h3. This is a subhead
37 37 #
38 38 # This is a bit of paragraph.
39 39 #
40 40 # bq. This is a blockquote.
41 41 #
42 42 # = Writing Textile
43 43 #
44 44 # A Textile document consists of paragraphs. Paragraphs
45 45 # can be specially formatted by adding a small instruction
46 46 # to the beginning of the paragraph.
47 47 #
48 48 # h[n]. Header of size [n].
49 49 # bq. Blockquote.
50 50 # # Numeric list.
51 51 # * Bulleted list.
52 52 #
53 53 # == Quick Phrase Modifiers
54 54 #
55 55 # Quick phrase modifiers are also included, to allow formatting
56 56 # of small portions of text within a paragraph.
57 57 #
58 58 # \_emphasis\_
59 59 # \_\_italicized\_\_
60 60 # \*strong\*
61 61 # \*\*bold\*\*
62 62 # ??citation??
63 63 # -deleted text-
64 64 # +inserted text+
65 65 # ^superscript^
66 66 # ~subscript~
67 67 # @code@
68 68 # %(classname)span%
69 69 #
70 70 # ==notextile== (leave text alone)
71 71 #
72 72 # == Links
73 73 #
74 74 # To make a hypertext link, put the link text in "quotation
75 75 # marks" followed immediately by a colon and the URL of the link.
76 76 #
77 77 # Optional: text in (parentheses) following the link text,
78 78 # but before the closing quotation mark, will become a Title
79 79 # attribute for the link, visible as a tool tip when a cursor is above it.
80 80 #
81 81 # Example:
82 82 #
83 83 # "This is a link (This is a title) ":http://www.textism.com
84 84 #
85 85 # Will become:
86 86 #
87 87 # <a href="http://www.textism.com" title="This is a title">This is a link</a>
88 88 #
89 89 # == Images
90 90 #
91 91 # To insert an image, put the URL for the image inside exclamation marks.
92 92 #
93 93 # Optional: text that immediately follows the URL in (parentheses) will
94 94 # be used as the Alt text for the image. Images on the web should always
95 95 # have descriptive Alt text for the benefit of readers using non-graphical
96 96 # browsers.
97 97 #
98 98 # Optional: place a colon followed by a URL immediately after the
99 99 # closing ! to make the image into a link.
100 100 #
101 101 # Example:
102 102 #
103 103 # !http://www.textism.com/common/textist.gif(Textist)!
104 104 #
105 105 # Will become:
106 106 #
107 107 # <img src="http://www.textism.com/common/textist.gif" alt="Textist" />
108 108 #
109 109 # With a link:
110 110 #
111 111 # !/common/textist.gif(Textist)!:http://textism.com
112 112 #
113 113 # Will become:
114 114 #
115 115 # <a href="http://textism.com"><img src="/common/textist.gif" alt="Textist" /></a>
116 116 #
117 117 # == Defining Acronyms
118 118 #
119 119 # HTML allows authors to define acronyms via the tag. The definition appears as a
120 120 # tool tip when a cursor hovers over the acronym. A crucial aid to clear writing,
121 121 # this should be used at least once for each acronym in documents where they appear.
122 122 #
123 123 # To quickly define an acronym in Textile, place the full text in (parentheses)
124 124 # immediately following the acronym.
125 125 #
126 126 # Example:
127 127 #
128 128 # ACLU(American Civil Liberties Union)
129 129 #
130 130 # Will become:
131 131 #
132 132 # <acronym title="American Civil Liberties Union">ACLU</acronym>
133 133 #
134 134 # == Adding Tables
135 135 #
136 136 # In Textile, simple tables can be added by seperating each column by
137 137 # a pipe.
138 138 #
139 139 # |a|simple|table|row|
140 140 # |And|Another|table|row|
141 141 #
142 142 # Attributes are defined by style definitions in parentheses.
143 143 #
144 144 # table(border:1px solid black).
145 145 # (background:#ddd;color:red). |{}| | | |
146 146 #
147 147 # == Using RedCloth
148 148 #
149 149 # RedCloth is simply an extension of the String class, which can handle
150 150 # Textile formatting. Use it like a String and output HTML with its
151 151 # RedCloth#to_html method.
152 152 #
153 153 # doc = RedCloth.new "
154 154 #
155 155 # h2. Test document
156 156 #
157 157 # Just a simple test."
158 158 #
159 159 # puts doc.to_html
160 160 #
161 161 # By default, RedCloth uses both Textile and Markdown formatting, with
162 162 # Textile formatting taking precedence. If you want to turn off Markdown
163 163 # formatting, to boost speed and limit the processor:
164 164 #
165 165 # class RedCloth::Textile.new( str )
166 166
167 167 class RedCloth3 < String
168 168
169 169 VERSION = '3.0.4'
170 170 DEFAULT_RULES = [:textile, :markdown]
171 171
172 172 #
173 173 # Two accessor for setting security restrictions.
174 174 #
175 175 # This is a nice thing if you're using RedCloth for
176 176 # formatting in public places (e.g. Wikis) where you
177 177 # don't want users to abuse HTML for bad things.
178 178 #
179 179 # If +:filter_html+ is set, HTML which wasn't
180 180 # created by the Textile processor will be escaped.
181 181 #
182 182 # If +:filter_styles+ is set, it will also disable
183 183 # the style markup specifier. ('{color: red}')
184 184 #
185 185 attr_accessor :filter_html, :filter_styles
186 186
187 187 #
188 188 # Accessor for toggling hard breaks.
189 189 #
190 190 # If +:hard_breaks+ is set, single newlines will
191 191 # be converted to HTML break tags. This is the
192 192 # default behavior for traditional RedCloth.
193 193 #
194 194 attr_accessor :hard_breaks
195 195
196 196 # Accessor for toggling lite mode.
197 197 #
198 198 # In lite mode, block-level rules are ignored. This means
199 199 # that tables, paragraphs, lists, and such aren't available.
200 200 # Only the inline markup for bold, italics, entities and so on.
201 201 #
202 202 # r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
203 203 # r.to_html
204 204 # #=> "And then? She <strong>fell</strong>!"
205 205 #
206 206 attr_accessor :lite_mode
207 207
208 208 #
209 209 # Accessor for toggling span caps.
210 210 #
211 211 # Textile places `span' tags around capitalized
212 212 # words by default, but this wreaks havoc on Wikis.
213 213 # If +:no_span_caps+ is set, this will be
214 214 # suppressed.
215 215 #
216 216 attr_accessor :no_span_caps
217 217
218 218 #
219 219 # Establishes the markup predence. Available rules include:
220 220 #
221 221 # == Textile Rules
222 222 #
223 223 # The following textile rules can be set individually. Or add the complete
224 224 # set of rules with the single :textile rule, which supplies the rule set in
225 225 # the following precedence:
226 226 #
227 227 # refs_textile:: Textile references (i.e. [hobix]http://hobix.com/)
228 228 # block_textile_table:: Textile table block structures
229 229 # block_textile_lists:: Textile list structures
230 230 # block_textile_prefix:: Textile blocks with prefixes (i.e. bq., h2., etc.)
231 231 # inline_textile_image:: Textile inline images
232 232 # inline_textile_link:: Textile inline links
233 233 # inline_textile_span:: Textile inline spans
234 234 # glyphs_textile:: Textile entities (such as em-dashes and smart quotes)
235 235 #
236 236 # == Markdown
237 237 #
238 238 # refs_markdown:: Markdown references (for example: [hobix]: http://hobix.com/)
239 239 # block_markdown_setext:: Markdown setext headers
240 240 # block_markdown_atx:: Markdown atx headers
241 241 # block_markdown_rule:: Markdown horizontal rules
242 242 # block_markdown_bq:: Markdown blockquotes
243 243 # block_markdown_lists:: Markdown lists
244 244 # inline_markdown_link:: Markdown links
245 245 attr_accessor :rules
246 246
247 247 # Returns a new RedCloth object, based on _string_ and
248 248 # enforcing all the included _restrictions_.
249 249 #
250 250 # r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
251 251 # r.to_html
252 252 # #=>"<h1>A &lt;b&gt;bold&lt;/b&gt; man</h1>"
253 253 #
254 254 def initialize( string, restrictions = [] )
255 255 restrictions.each { |r| method( "#{ r }=" ).call( true ) }
256 256 super( string )
257 257 end
258 258
259 259 #
260 260 # Generates HTML from the Textile contents.
261 261 #
262 262 # r = RedCloth.new( "And then? She *fell*!" )
263 263 # r.to_html( true )
264 264 # #=>"And then? She <strong>fell</strong>!"
265 265 #
266 266 def to_html( *rules )
267 267 rules = DEFAULT_RULES if rules.empty?
268 268 # make our working copy
269 269 text = self.dup
270 270
271 271 @urlrefs = {}
272 272 @shelf = []
273 273 textile_rules = [:refs_textile, :block_textile_table, :block_textile_lists,
274 274 :block_textile_prefix, :inline_textile_image, :inline_textile_link,
275 275 :inline_textile_code, :inline_textile_span, :glyphs_textile]
276 276 markdown_rules = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
277 277 :block_markdown_bq, :block_markdown_lists,
278 278 :inline_markdown_reflink, :inline_markdown_link]
279 279 @rules = rules.collect do |rule|
280 280 case rule
281 281 when :markdown
282 282 markdown_rules
283 283 when :textile
284 284 textile_rules
285 285 else
286 286 rule
287 287 end
288 288 end.flatten
289 289
290 290 # standard clean up
291 291 incoming_entities text
292 292 clean_white_space text
293 293
294 294 # start processor
295 295 @pre_list = []
296 296 rip_offtags text
297 297 no_textile text
298 298 escape_html_tags text
299 299 hard_break text
300 300 unless @lite_mode
301 301 refs text
302 302 # need to do this before text is split by #blocks
303 303 block_textile_quotes text
304 304 blocks text
305 305 end
306 306 inline text
307 307 smooth_offtags text
308 308
309 309 retrieve text
310 310
311 311 text.gsub!( /<\/?notextile>/, '' )
312 312 text.gsub!( /x%x%/, '&#38;' )
313 313 clean_html text if filter_html
314 314 text.strip!
315 315 text
316 316
317 317 end
318 318
319 319 #######
320 320 private
321 321 #######
322 322 #
323 323 # Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
324 324 # (from PyTextile)
325 325 #
326 326 TEXTILE_TAGS =
327 327
328 328 [[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
329 329 [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
330 330 [140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
331 331 [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
332 332 [153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
333 333
334 334 collect! do |a, b|
335 335 [a.chr, ( b.zero? and "" or "&#{ b };" )]
336 336 end
337 337
338 338 #
339 339 # Regular expressions to convert to HTML.
340 340 #
341 341 A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
342 342 A_VLGN = /[\-^~]/
343 343 C_CLAS = '(?:\([^)]+\))'
344 344 C_LNGE = '(?:\[[^\[\]]+\])'
345 345 C_STYL = '(?:\{[^}]+\})'
346 346 S_CSPN = '(?:\\\\\d+)'
347 347 S_RSPN = '(?:/\d+)'
348 348 A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
349 349 S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
350 350 C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
351 351 # PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
352 352 PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
353 353 PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
354 354 PUNCT_Q = Regexp::quote( '*-_+^~%' )
355 355 HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
356 356
357 357 # Text markup tags, don't conflict with block tags
358 358 SIMPLE_HTML_TAGS = [
359 359 'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
360 360 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
361 361 'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
362 362 ]
363 363
364 364 QTAGS = [
365 365 ['**', 'b', :limit],
366 366 ['*', 'strong', :limit],
367 367 ['??', 'cite', :limit],
368 368 ['-', 'del', :limit],
369 369 ['__', 'i', :limit],
370 370 ['_', 'em', :limit],
371 371 ['%', 'span', :limit],
372 372 ['+', 'ins', :limit],
373 373 ['^', 'sup', :limit],
374 374 ['~', 'sub', :limit]
375 375 ]
376 376 QTAGS.collect! do |rc, ht, rtype|
377 377 rcq = Regexp::quote rc
378 378 re =
379 379 case rtype
380 380 when :limit
381 381 /(^|[>\s\(])
382 382 (#{rcq})
383 383 (#{C})
384 384 (?::(\S+?))?
385 385 (\w|[^\s\-].*?[^\s\-])
386 386 #{rcq}
387 387 (?=[[:punct:]]|\s|\)|$)/x
388 388 else
389 389 /(#{rcq})
390 390 (#{C})
391 391 (?::(\S+))?
392 392 (\w|[^\s\-].*?[^\s\-])
393 393 #{rcq}/xm
394 394 end
395 395 [rc, ht, re, rtype]
396 396 end
397 397
398 398 # Elements to handle
399 399 GLYPHS = [
400 400 # [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
401 401 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1&#8217;' ], # single closing
402 402 # [ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '&#8217;' ], # single closing
403 403 # [ /\'/, '&#8216;' ], # single opening
404 404 # [ /</, '&lt;' ], # less-than
405 405 # [ />/, '&gt;' ], # greater-than
406 406 # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
407 407 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1&#8221;' ], # double closing
408 408 # [ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '&#8221;' ], # double closing
409 409 # [ /"/, '&#8220;' ], # double opening
410 410 # [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
411 411 # [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
412 412 # [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
413 413 # [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
414 414 # [ /\s->\s/, ' &rarr; ' ], # right arrow
415 415 # [ /\s-\s/, ' &#8211; ' ], # en dash
416 416 # [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
417 417 # [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
418 418 # [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
419 419 # [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
420 420 ]
421 421
422 422 H_ALGN_VALS = {
423 423 '<' => 'left',
424 424 '=' => 'center',
425 425 '>' => 'right',
426 426 '<>' => 'justify'
427 427 }
428 428
429 429 V_ALGN_VALS = {
430 430 '^' => 'top',
431 431 '-' => 'middle',
432 432 '~' => 'bottom'
433 433 }
434 434
435 435 #
436 436 # Flexible HTML escaping
437 437 #
438 438 def htmlesc( str, mode=:Quotes )
439 439 if str
440 440 str.gsub!( '&', '&amp;' )
441 441 str.gsub!( '"', '&quot;' ) if mode != :NoQuotes
442 442 str.gsub!( "'", '&#039;' ) if mode == :Quotes
443 443 str.gsub!( '<', '&lt;')
444 444 str.gsub!( '>', '&gt;')
445 445 end
446 446 str
447 447 end
448 448
449 449 # Search and replace for Textile glyphs (quotes, dashes, other symbols)
450 450 def pgl( text )
451 451 #GLYPHS.each do |re, resub, tog|
452 452 # next if tog and method( tog ).call
453 453 # text.gsub! re, resub
454 454 #end
455 455 text.gsub!(/\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/) do |m|
456 456 "<acronym title=\"#{htmlesc $2}\">#{$1}</acronym>"
457 457 end
458 458 end
459 459
460 460 # Parses Textile attribute lists and builds an HTML attribute string
461 461 def pba( text_in, element = "" )
462 462
463 463 return '' unless text_in
464 464
465 465 style = []
466 466 text = text_in.dup
467 467 if element == 'td'
468 468 colspan = $1 if text =~ /\\(\d+)/
469 469 rowspan = $1 if text =~ /\/(\d+)/
470 470 style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
471 471 end
472 472
473 473 style << "#{ htmlesc $1 };" if text.sub!( /\{([^}]*)\}/, '' ) && !filter_styles
474 474
475 475 lang = $1 if
476 476 text.sub!( /\[([^)]+?)\]/, '' )
477 477
478 478 cls = $1 if
479 479 text.sub!( /\(([^()]+?)\)/, '' )
480 480
481 481 style << "padding-left:#{ $1.length }em;" if
482 482 text.sub!( /([(]+)/, '' )
483 483
484 484 style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
485 485
486 486 style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
487 487
488 488 cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
489 489
490 490 atts = ''
491 491 atts << " style=\"#{ style.join }\"" unless style.empty?
492 492 atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
493 493 atts << " lang=\"#{ lang }\"" if lang
494 494 atts << " id=\"#{ id }\"" if id
495 495 atts << " colspan=\"#{ colspan }\"" if colspan
496 496 atts << " rowspan=\"#{ rowspan }\"" if rowspan
497 497
498 498 atts
499 499 end
500 500
501 501 TABLE_RE = /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
502 502
503 503 # Parses a Textile table block, building HTML from the result.
504 504 def block_textile_table( text )
505 505 text.gsub!( TABLE_RE ) do |matches|
506 506
507 507 tatts, fullrow = $~[1..2]
508 508 tatts = pba( tatts, 'table' )
509 509 tatts = shelve( tatts ) if tatts
510 510 rows = []
511 511
512 512 fullrow.each_line do |row|
513 513 ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
514 514 cells = []
515 515 row.split( /(\|)(?![^\[\|]*\]\])/ )[1..-2].each do |cell|
516 516 next if cell == '|'
517 517 ctyp = 'd'
518 518 ctyp = 'h' if cell =~ /^_/
519 519
520 520 catts = ''
521 521 catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
522 522
523 523 catts = shelve( catts ) if catts
524 524 cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
525 525 end
526 526 ratts = shelve( ratts ) if ratts
527 527 rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
528 528 end
529 529 "\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
530 530 end
531 531 end
532 532
533 533 LISTS_RE = /^([#*]+?#{C} .*?)$(?![^#*])/m
534 534 LISTS_CONTENT_RE = /^([#*]+)(#{A}#{C}) (.*)$/m
535 535
536 536 # Parses Textile lists and generates HTML
537 537 def block_textile_lists( text )
538 538 text.gsub!( LISTS_RE ) do |match|
539 539 lines = match.split( /\n/ )
540 540 last_line = -1
541 541 depth = []
542 542 lines.each_with_index do |line, line_id|
543 543 if line =~ LISTS_CONTENT_RE
544 544 tl,atts,content = $~[1..3]
545 545 if depth.last
546 546 if depth.last.length > tl.length
547 547 (depth.length - 1).downto(0) do |i|
548 548 break if depth[i].length == tl.length
549 549 lines[line_id - 1] << "</li>\n\t</#{ lT( depth[i] ) }l>\n\t"
550 550 depth.pop
551 551 end
552 552 end
553 553 if depth.last and depth.last.length == tl.length
554 554 lines[line_id - 1] << '</li>'
555 555 end
556 556 end
557 557 unless depth.last == tl
558 558 depth << tl
559 559 atts = pba( atts )
560 560 atts = shelve( atts ) if atts
561 561 lines[line_id] = "\t<#{ lT(tl) }l#{ atts }>\n\t<li>#{ content }"
562 562 else
563 563 lines[line_id] = "\t\t<li>#{ content }"
564 564 end
565 565 last_line = line_id
566 566
567 567 else
568 568 last_line = line_id
569 569 end
570 570 if line_id - last_line > 1 or line_id == lines.length - 1
571 571 depth.delete_if do |v|
572 572 lines[last_line] << "</li>\n\t</#{ lT( v ) }l>"
573 573 end
574 574 end
575 575 end
576 576 lines.join( "\n" )
577 577 end
578 578 end
579 579
580 580 QUOTES_RE = /(^>+([^\n]*?)\n?)+/m
581 581 QUOTES_CONTENT_RE = /^([> ]+)(.*)$/m
582 582
583 583 def block_textile_quotes( text )
584 584 text.gsub!( QUOTES_RE ) do |match|
585 585 lines = match.split( /\n/ )
586 586 quotes = ''
587 587 indent = 0
588 588 lines.each do |line|
589 589 line =~ QUOTES_CONTENT_RE
590 590 bq,content = $1, $2
591 591 l = bq.count('>')
592 592 if l != indent
593 593 quotes << ("\n\n" + (l>indent ? '<blockquote>' * (l-indent) : '</blockquote>' * (indent-l)) + "\n\n")
594 594 indent = l
595 595 end
596 596 quotes << (content + "\n")
597 597 end
598 598 quotes << ("\n" + '</blockquote>' * indent + "\n\n")
599 599 quotes
600 600 end
601 601 end
602 602
603 603 CODE_RE = /(\W)
604 604 @
605 605 (?:\|(\w+?)\|)?
606 606 (.+?)
607 607 @
608 608 (?=\W)/x
609 609
610 610 def inline_textile_code( text )
611 611 text.gsub!( CODE_RE ) do |m|
612 612 before,lang,code,after = $~[1..4]
613 613 lang = " lang=\"#{ lang }\"" if lang
614 614 rip_offtags( "#{ before }<code#{ lang }>#{ code }</code>#{ after }" )
615 615 end
616 616 end
617 617
618 618 def lT( text )
619 619 text =~ /\#$/ ? 'o' : 'u'
620 620 end
621 621
622 622 def hard_break( text )
623 623 text.gsub!( /(.)\n(?!\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
624 624 end
625 625
626 626 BLOCKS_GROUP_RE = /\n{2,}(?! )/m
627 627
628 628 def blocks( text, deep_code = false )
629 629 text.replace( text.split( BLOCKS_GROUP_RE ).collect do |blk|
630 630 plain = blk !~ /\A[#*> ]/
631 631
632 632 # skip blocks that are complex HTML
633 633 if blk =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
634 634 blk
635 635 else
636 636 # search for indentation levels
637 637 blk.strip!
638 638 if blk.empty?
639 639 blk
640 640 else
641 641 code_blk = nil
642 642 blk.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
643 643 flush_left iblk
644 644 blocks iblk, plain
645 645 iblk.gsub( /^(\S)/, "\t\\1" )
646 646 if plain
647 647 code_blk = iblk; ""
648 648 else
649 649 iblk
650 650 end
651 651 end
652 652
653 653 block_applied = 0
654 654 @rules.each do |rule_name|
655 655 block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( blk ) )
656 656 end
657 657 if block_applied.zero?
658 658 if deep_code
659 659 blk = "\t<pre><code>#{ blk }</code></pre>"
660 660 else
661 661 blk = "\t<p>#{ blk }</p>"
662 662 end
663 663 end
664 664 # hard_break blk
665 665 blk + "\n#{ code_blk }"
666 666 end
667 667 end
668 668
669 669 end.join( "\n\n" ) )
670 670 end
671 671
672 672 def textile_bq( tag, atts, cite, content )
673 673 cite, cite_title = check_refs( cite )
674 674 cite = " cite=\"#{ cite }\"" if cite
675 675 atts = shelve( atts ) if atts
676 676 "\t<blockquote#{ cite }>\n\t\t<p#{ atts }>#{ content }</p>\n\t</blockquote>"
677 677 end
678 678
679 679 def textile_p( tag, atts, cite, content )
680 680 atts = shelve( atts ) if atts
681 681 "\t<#{ tag }#{ atts }>#{ content }</#{ tag }>"
682 682 end
683 683
684 684 alias textile_h1 textile_p
685 685 alias textile_h2 textile_p
686 686 alias textile_h3 textile_p
687 687 alias textile_h4 textile_p
688 688 alias textile_h5 textile_p
689 689 alias textile_h6 textile_p
690 690
691 691 def textile_fn_( tag, num, atts, cite, content )
692 692 atts << " id=\"fn#{ num }\" class=\"footnote\""
693 693 content = "<sup>#{ num }</sup> #{ content }"
694 694 atts = shelve( atts ) if atts
695 695 "\t<p#{ atts }>#{ content }</p>"
696 696 end
697 697
698 698 BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/m
699 699
700 700 def block_textile_prefix( text )
701 701 if text =~ BLOCK_RE
702 702 tag,tagpre,num,atts,cite,content = $~[1..6]
703 703 atts = pba( atts )
704 704
705 705 # pass to prefix handler
706 706 if respond_to? "textile_#{ tag }", true
707 707 text.gsub!( $&, method( "textile_#{ tag }" ).call( tag, atts, cite, content ) )
708 708 elsif respond_to? "textile_#{ tagpre }_", true
709 709 text.gsub!( $&, method( "textile_#{ tagpre }_" ).call( tagpre, num, atts, cite, content ) )
710 710 end
711 711 end
712 712 end
713 713
714 714 SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
715 715 def block_markdown_setext( text )
716 716 if text =~ SETEXT_RE
717 717 tag = if $2 == "="; "h1"; else; "h2"; end
718 718 blk, cont = "<#{ tag }>#{ $1 }</#{ tag }>", $'
719 719 blocks cont
720 720 text.replace( blk + cont )
721 721 end
722 722 end
723 723
724 724 ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
725 725 [ ]*
726 726 (.+?) # $2 = Header text
727 727 [ ]*
728 728 \#* # optional closing #'s (not counted)
729 729 $/x
730 730 def block_markdown_atx( text )
731 731 if text =~ ATX_RE
732 732 tag = "h#{ $1.length }"
733 733 blk, cont = "<#{ tag }>#{ $2 }</#{ tag }>\n\n", $'
734 734 blocks cont
735 735 text.replace( blk + cont )
736 736 end
737 737 end
738 738
739 739 MARKDOWN_BQ_RE = /\A(^ *> ?.+$(.+\n)*\n*)+/m
740 740
741 741 def block_markdown_bq( text )
742 742 text.gsub!( MARKDOWN_BQ_RE ) do |blk|
743 743 blk.gsub!( /^ *> ?/, '' )
744 744 flush_left blk
745 745 blocks blk
746 746 blk.gsub!( /^(\S)/, "\t\\1" )
747 747 "<blockquote>\n#{ blk }\n</blockquote>\n\n"
748 748 end
749 749 end
750 750
751 751 MARKDOWN_RULE_RE = /^(#{
752 752 ['*', '-', '_'].collect { |ch| ' ?(' + Regexp::quote( ch ) + ' ?){3,}' }.join( '|' )
753 753 })$/
754 754
755 755 def block_markdown_rule( text )
756 756 text.gsub!( MARKDOWN_RULE_RE ) do |blk|
757 757 "<hr />"
758 758 end
759 759 end
760 760
761 761 # XXX TODO XXX
762 762 def block_markdown_lists( text )
763 763 end
764 764
765 765 def inline_textile_span( text )
766 766 QTAGS.each do |qtag_rc, ht, qtag_re, rtype|
767 767 text.gsub!( qtag_re ) do |m|
768 768
769 769 case rtype
770 770 when :limit
771 771 sta,qtag,atts,cite,content = $~[1..5]
772 772 else
773 773 qtag,atts,cite,content = $~[1..4]
774 774 sta = ''
775 775 end
776 776 atts = pba( atts )
777 777 atts << " cite=\"#{ cite }\"" if cite
778 778 atts = shelve( atts ) if atts
779 779
780 780 "#{ sta }<#{ ht }#{ atts }>#{ content }</#{ ht }>"
781 781
782 782 end
783 783 end
784 784 end
785 785
786 786 LINK_RE = /
787 787 ([\s\[{(]|[#{PUNCT}])? # $pre
788 788 " # start
789 789 (#{C}) # $atts
790 790 ([^"\n]+?) # $text
791 791 \s?
792 792 (?:\(([^)]+?)\)(?="))? # $title
793 793 ":
794 ([\w\/]\S+?) # $url
794 ( # $url
795 (\/|https?:\/\/|s?ftps?:\/\/|www\.)
796 [\w\/]\S+?
797 )
795 798 (\/)? # $slash
796 799 ([^\w\=\/;\(\)]*?) # $post
797 800 (?=<|\s|$)
798 801 /x
799 802 #"
800 803 def inline_textile_link( text )
801 804 text.gsub!( LINK_RE ) do |m|
802 pre,atts,text,title,url,slash,post = $~[1..7]
805 pre,atts,text,title,url,proto,slash,post = $~[1..8]
803 806
804 807 url, url_title = check_refs( url )
805 808 title ||= url_title
806 809
807 810 # Idea below : an URL with unbalanced parethesis and
808 811 # ending by ')' is put into external parenthesis
809 812 if ( url[-1]==?) and ((url.count("(") - url.count(")")) < 0 ) )
810 813 url=url[0..-2] # discard closing parenth from url
811 814 post = ")"+post # add closing parenth to post
812 815 end
813 816 atts = pba( atts )
814 817 atts = " href=\"#{ url }#{ slash }\"#{ atts }"
815 818 atts << " title=\"#{ htmlesc title }\"" if title
816 819 atts = shelve( atts ) if atts
817 820
818 821 external = (url =~ /^https?:\/\//) ? ' class="external"' : ''
819 822
820 823 "#{ pre }<a#{ atts }#{ external }>#{ text }</a>#{ post }"
821 824 end
822 825 end
823 826
824 827 MARKDOWN_REFLINK_RE = /
825 828 \[([^\[\]]+)\] # $text
826 829 [ ]? # opt. space
827 830 (?:\n[ ]*)? # one optional newline followed by spaces
828 831 \[(.*?)\] # $id
829 832 /x
830 833
831 834 def inline_markdown_reflink( text )
832 835 text.gsub!( MARKDOWN_REFLINK_RE ) do |m|
833 836 text, id = $~[1..2]
834 837
835 838 if id.empty?
836 839 url, title = check_refs( text )
837 840 else
838 841 url, title = check_refs( id )
839 842 end
840 843
841 844 atts = " href=\"#{ url }\""
842 845 atts << " title=\"#{ title }\"" if title
843 846 atts = shelve( atts )
844 847
845 848 "<a#{ atts }>#{ text }</a>"
846 849 end
847 850 end
848 851
849 852 MARKDOWN_LINK_RE = /
850 853 \[([^\[\]]+)\] # $text
851 854 \( # open paren
852 855 [ \t]* # opt space
853 856 <?(.+?)>? # $href
854 857 [ \t]* # opt space
855 858 (?: # whole title
856 859 (['"]) # $quote
857 860 (.*?) # $title
858 861 \3 # matching quote
859 862 )? # title is optional
860 863 \)
861 864 /x
862 865
863 866 def inline_markdown_link( text )
864 867 text.gsub!( MARKDOWN_LINK_RE ) do |m|
865 868 text, url, quote, title = $~[1..4]
866 869
867 870 atts = " href=\"#{ url }\""
868 871 atts << " title=\"#{ title }\"" if title
869 872 atts = shelve( atts )
870 873
871 874 "<a#{ atts }>#{ text }</a>"
872 875 end
873 876 end
874 877
875 878 TEXTILE_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
876 879 MARKDOWN_REFS_RE = /(^ *)\[([^\n]+?)\]:\s+<?(#{HYPERLINK})>?(?:\s+"((?:[^"]|\\")+)")?(?=\s|$)/m
877 880
878 881 def refs( text )
879 882 @rules.each do |rule_name|
880 883 method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
881 884 end
882 885 end
883 886
884 887 def refs_textile( text )
885 888 text.gsub!( TEXTILE_REFS_RE ) do |m|
886 889 flag, url = $~[2..3]
887 890 @urlrefs[flag.downcase] = [url, nil]
888 891 nil
889 892 end
890 893 end
891 894
892 895 def refs_markdown( text )
893 896 text.gsub!( MARKDOWN_REFS_RE ) do |m|
894 897 flag, url = $~[2..3]
895 898 title = $~[6]
896 899 @urlrefs[flag.downcase] = [url, title]
897 900 nil
898 901 end
899 902 end
900 903
901 904 def check_refs( text )
902 905 ret = @urlrefs[text.downcase] if text
903 906 ret || [text, nil]
904 907 end
905 908
906 909 IMAGE_RE = /
907 910 (<p>|.|^) # start of line?
908 911 \! # opening
909 912 (\<|\=|\>)? # optional alignment atts
910 913 (#{C}) # optional style,class atts
911 914 (?:\. )? # optional dot-space
912 915 ([^\s(!]+?) # presume this is the src
913 916 \s? # optional space
914 917 (?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
915 918 \! # closing
916 919 (?::#{ HYPERLINK })? # optional href
917 920 /x
918 921
919 922 def inline_textile_image( text )
920 923 text.gsub!( IMAGE_RE ) do |m|
921 924 stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
922 925 htmlesc title
923 926 atts = pba( atts )
924 927 atts = " src=\"#{ url }\"#{ atts }"
925 928 atts << " title=\"#{ title }\"" if title
926 929 atts << " alt=\"#{ title }\""
927 930 # size = @getimagesize($url);
928 931 # if($size) $atts.= " $size[3]";
929 932
930 933 href, alt_title = check_refs( href ) if href
931 934 url, url_title = check_refs( url )
932 935
933 936 out = ''
934 937 out << "<a#{ shelve( " href=\"#{ href }\"" ) }>" if href
935 938 out << "<img#{ shelve( atts ) } />"
936 939 out << "</a>#{ href_a1 }#{ href_a2 }" if href
937 940
938 941 if algn
939 942 algn = h_align( algn )
940 943 if stln == "<p>"
941 944 out = "<p style=\"float:#{ algn }\">#{ out }"
942 945 else
943 946 out = "#{ stln }<div style=\"float:#{ algn }\">#{ out }</div>"
944 947 end
945 948 else
946 949 out = stln + out
947 950 end
948 951
949 952 out
950 953 end
951 954 end
952 955
953 956 def shelve( val )
954 957 @shelf << val
955 958 " :redsh##{ @shelf.length }:"
956 959 end
957 960
958 961 def retrieve( text )
959 962 @shelf.each_with_index do |r, i|
960 963 text.gsub!( " :redsh##{ i + 1 }:", r )
961 964 end
962 965 end
963 966
964 967 def incoming_entities( text )
965 968 ## turn any incoming ampersands into a dummy character for now.
966 969 ## This uses a negative lookahead for alphanumerics followed by a semicolon,
967 970 ## implying an incoming html entity, to be skipped
968 971
969 972 text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
970 973 end
971 974
972 975 def no_textile( text )
973 976 text.gsub!( /(^|\s)==([^=]+.*?)==(\s|$)?/,
974 977 '\1<notextile>\2</notextile>\3' )
975 978 text.gsub!( /^ *==([^=]+.*?)==/m,
976 979 '\1<notextile>\2</notextile>\3' )
977 980 end
978 981
979 982 def clean_white_space( text )
980 983 # normalize line breaks
981 984 text.gsub!( /\r\n/, "\n" )
982 985 text.gsub!( /\r/, "\n" )
983 986 text.gsub!( /\t/, ' ' )
984 987 text.gsub!( /^ +$/, '' )
985 988 text.gsub!( /\n{3,}/, "\n\n" )
986 989 text.gsub!( /"$/, "\" " )
987 990
988 991 # if entire document is indented, flush
989 992 # to the left side
990 993 flush_left text
991 994 end
992 995
993 996 def flush_left( text )
994 997 indt = 0
995 998 if text =~ /^ /
996 999 while text !~ /^ {#{indt}}\S/
997 1000 indt += 1
998 1001 end unless text.empty?
999 1002 if indt.nonzero?
1000 1003 text.gsub!( /^ {#{indt}}/, '' )
1001 1004 end
1002 1005 end
1003 1006 end
1004 1007
1005 1008 def footnote_ref( text )
1006 1009 text.gsub!( /\b\[([0-9]+?)\](\s)?/,
1007 1010 '<sup><a href="#fn\1">\1</a></sup>\2' )
1008 1011 end
1009 1012
1010 1013 OFFTAGS = /(code|pre|kbd|notextile)/
1011 1014 OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }|\Z)/mi
1012 1015 OFFTAG_OPEN = /<#{ OFFTAGS }/
1013 1016 OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
1014 1017 HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
1015 1018 ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
1016 1019
1017 1020 def glyphs_textile( text, level = 0 )
1018 1021 if text !~ HASTAG_MATCH
1019 1022 pgl text
1020 1023 footnote_ref text
1021 1024 else
1022 1025 codepre = 0
1023 1026 text.gsub!( ALLTAG_MATCH ) do |line|
1024 1027 ## matches are off if we're between <code>, <pre> etc.
1025 1028 if $1
1026 1029 if line =~ OFFTAG_OPEN
1027 1030 codepre += 1
1028 1031 elsif line =~ OFFTAG_CLOSE
1029 1032 codepre -= 1
1030 1033 codepre = 0 if codepre < 0
1031 1034 end
1032 1035 elsif codepre.zero?
1033 1036 glyphs_textile( line, level + 1 )
1034 1037 else
1035 1038 htmlesc( line, :NoQuotes )
1036 1039 end
1037 1040 # p [level, codepre, line]
1038 1041
1039 1042 line
1040 1043 end
1041 1044 end
1042 1045 end
1043 1046
1044 1047 def rip_offtags( text )
1045 1048 if text =~ /<.*>/
1046 1049 ## strip and encode <pre> content
1047 1050 codepre, used_offtags = 0, {}
1048 1051 text.gsub!( OFFTAG_MATCH ) do |line|
1049 1052 if $3
1050 1053 offtag, aftertag = $4, $5
1051 1054 codepre += 1
1052 1055 used_offtags[offtag] = true
1053 1056 if codepre - used_offtags.length > 0
1054 1057 htmlesc( line, :NoQuotes )
1055 1058 @pre_list.last << line
1056 1059 line = ""
1057 1060 else
1058 1061 htmlesc( aftertag, :NoQuotes ) if aftertag
1059 1062 line = "<redpre##{ @pre_list.length }>"
1060 1063 $3.match(/<#{ OFFTAGS }([^>]*)>/)
1061 1064 tag = $1
1062 1065 $2.to_s.match(/(class\=\S+)/i)
1063 1066 tag << " #{$1}" if $1
1064 1067 @pre_list << "<#{ tag }>#{ aftertag }"
1065 1068 end
1066 1069 elsif $1 and codepre > 0
1067 1070 if codepre - used_offtags.length > 0
1068 1071 htmlesc( line, :NoQuotes )
1069 1072 @pre_list.last << line
1070 1073 line = ""
1071 1074 end
1072 1075 codepre -= 1 unless codepre.zero?
1073 1076 used_offtags = {} if codepre.zero?
1074 1077 end
1075 1078 line
1076 1079 end
1077 1080 end
1078 1081 text
1079 1082 end
1080 1083
1081 1084 def smooth_offtags( text )
1082 1085 unless @pre_list.empty?
1083 1086 ## replace <pre> content
1084 1087 text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
1085 1088 end
1086 1089 end
1087 1090
1088 1091 def inline( text )
1089 1092 [/^inline_/, /^glyphs_/].each do |meth_re|
1090 1093 @rules.each do |rule_name|
1091 1094 method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
1092 1095 end
1093 1096 end
1094 1097 end
1095 1098
1096 1099 def h_align( text )
1097 1100 H_ALGN_VALS[text]
1098 1101 end
1099 1102
1100 1103 def v_align( text )
1101 1104 V_ALGN_VALS[text]
1102 1105 end
1103 1106
1104 1107 def textile_popup_help( name, windowW, windowH )
1105 1108 ' <a target="_blank" href="http://hobix.com/textile/#' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
1106 1109 end
1107 1110
1108 1111 # HTML cleansing stuff
1109 1112 BASIC_TAGS = {
1110 1113 'a' => ['href', 'title'],
1111 1114 'img' => ['src', 'alt', 'title'],
1112 1115 'br' => [],
1113 1116 'i' => nil,
1114 1117 'u' => nil,
1115 1118 'b' => nil,
1116 1119 'pre' => nil,
1117 1120 'kbd' => nil,
1118 1121 'code' => ['lang'],
1119 1122 'cite' => nil,
1120 1123 'strong' => nil,
1121 1124 'em' => nil,
1122 1125 'ins' => nil,
1123 1126 'sup' => nil,
1124 1127 'sub' => nil,
1125 1128 'del' => nil,
1126 1129 'table' => nil,
1127 1130 'tr' => nil,
1128 1131 'td' => ['colspan', 'rowspan'],
1129 1132 'th' => nil,
1130 1133 'ol' => nil,
1131 1134 'ul' => nil,
1132 1135 'li' => nil,
1133 1136 'p' => nil,
1134 1137 'h1' => nil,
1135 1138 'h2' => nil,
1136 1139 'h3' => nil,
1137 1140 'h4' => nil,
1138 1141 'h5' => nil,
1139 1142 'h6' => nil,
1140 1143 'blockquote' => ['cite']
1141 1144 }
1142 1145
1143 1146 def clean_html( text, tags = BASIC_TAGS )
1144 1147 text.gsub!( /<!\[CDATA\[/, '' )
1145 1148 text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
1146 1149 raw = $~
1147 1150 tag = raw[2].downcase
1148 1151 if tags.has_key? tag
1149 1152 pcs = [tag]
1150 1153 tags[tag].each do |prop|
1151 1154 ['"', "'", ''].each do |q|
1152 1155 q2 = ( q != '' ? q : '\s' )
1153 1156 if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
1154 1157 attrv = $1
1155 1158 next if prop == 'src' and attrv =~ %r{^(?!http)\w+:}
1156 1159 pcs << "#{prop}=\"#{$1.gsub('"', '\\"')}\""
1157 1160 break
1158 1161 end
1159 1162 end
1160 1163 end if tags[tag]
1161 1164 "<#{raw[1]}#{pcs.join " "}>"
1162 1165 else
1163 1166 " "
1164 1167 end
1165 1168 end
1166 1169 end
1167 1170
1168 1171 ALLOWED_TAGS = %w(redpre pre code notextile)
1169 1172
1170 1173 def escape_html_tags(text)
1171 1174 text.gsub!(%r{<(\/?([!\w]+)[^<>\n]*)(>?)}) {|m| ALLOWED_TAGS.include?($2) ? "<#{$1}#{$3}" : "&lt;#{$1}#{'&gt;' unless $3.blank?}" }
1172 1175 end
1173 1176 end
1174 1177
General Comments 0
You need to be logged in to leave comments. Login now