##// END OF EJS Templates
Textile formatting:...
Jean-Philippe Lang -
r1202:a92cce385105
parent child
Show More
@@ -1,1132 +1,1139
1 1 # vim:ts=4:sw=4:
2 2 # = RedCloth - Textile and Markdown Hybrid for Ruby
3 3 #
4 4 # Homepage:: http://whytheluckystiff.net/ruby/redcloth/
5 5 # Author:: why the lucky stiff (http://whytheluckystiff.net/)
6 6 # Copyright:: (cc) 2004 why the lucky stiff (and his puppet organizations.)
7 7 # License:: BSD
8 8 #
9 9 # (see http://hobix.com/textile/ for a Textile Reference.)
10 10 #
11 11 # Based on (and also inspired by) both:
12 12 #
13 13 # PyTextile: http://diveintomark.org/projects/textile/textile.py.txt
14 14 # Textism for PHP: http://www.textism.com/tools/textile/
15 15 #
16 16 #
17 17
18 18 # = RedCloth
19 19 #
20 20 # RedCloth is a Ruby library for converting Textile and/or Markdown
21 21 # into HTML. You can use either format, intermingled or separately.
22 22 # You can also extend RedCloth to honor your own custom text stylings.
23 23 #
24 24 # RedCloth users are encouraged to use Textile if they are generating
25 25 # HTML and to use Markdown if others will be viewing the plain text.
26 26 #
27 27 # == What is Textile?
28 28 #
29 29 # Textile is a simple formatting style for text
30 30 # documents, loosely based on some HTML conventions.
31 31 #
32 32 # == Sample Textile Text
33 33 #
34 34 # h2. This is a title
35 35 #
36 36 # h3. This is a subhead
37 37 #
38 38 # This is a bit of paragraph.
39 39 #
40 40 # bq. This is a blockquote.
41 41 #
42 42 # = Writing Textile
43 43 #
44 44 # A Textile document consists of paragraphs. Paragraphs
45 45 # can be specially formatted by adding a small instruction
46 46 # to the beginning of the paragraph.
47 47 #
48 48 # h[n]. Header of size [n].
49 49 # bq. Blockquote.
50 50 # # Numeric list.
51 51 # * Bulleted list.
52 52 #
53 53 # == Quick Phrase Modifiers
54 54 #
55 55 # Quick phrase modifiers are also included, to allow formatting
56 56 # of small portions of text within a paragraph.
57 57 #
58 58 # \_emphasis\_
59 59 # \_\_italicized\_\_
60 60 # \*strong\*
61 61 # \*\*bold\*\*
62 62 # ??citation??
63 63 # -deleted text-
64 64 # +inserted text+
65 65 # ^superscript^
66 66 # ~subscript~
67 67 # @code@
68 68 # %(classname)span%
69 69 #
70 70 # ==notextile== (leave text alone)
71 71 #
72 72 # == Links
73 73 #
74 74 # To make a hypertext link, put the link text in "quotation
75 75 # marks" followed immediately by a colon and the URL of the link.
76 76 #
77 77 # Optional: text in (parentheses) following the link text,
78 78 # but before the closing quotation mark, will become a Title
79 79 # attribute for the link, visible as a tool tip when a cursor is above it.
80 80 #
81 81 # Example:
82 82 #
83 83 # "This is a link (This is a title) ":http://www.textism.com
84 84 #
85 85 # Will become:
86 86 #
87 87 # <a href="http://www.textism.com" title="This is a title">This is a link</a>
88 88 #
89 89 # == Images
90 90 #
91 91 # To insert an image, put the URL for the image inside exclamation marks.
92 92 #
93 93 # Optional: text that immediately follows the URL in (parentheses) will
94 94 # be used as the Alt text for the image. Images on the web should always
95 95 # have descriptive Alt text for the benefit of readers using non-graphical
96 96 # browsers.
97 97 #
98 98 # Optional: place a colon followed by a URL immediately after the
99 99 # closing ! to make the image into a link.
100 100 #
101 101 # Example:
102 102 #
103 103 # !http://www.textism.com/common/textist.gif(Textist)!
104 104 #
105 105 # Will become:
106 106 #
107 107 # <img src="http://www.textism.com/common/textist.gif" alt="Textist" />
108 108 #
109 109 # With a link:
110 110 #
111 111 # !/common/textist.gif(Textist)!:http://textism.com
112 112 #
113 113 # Will become:
114 114 #
115 115 # <a href="http://textism.com"><img src="/common/textist.gif" alt="Textist" /></a>
116 116 #
117 117 # == Defining Acronyms
118 118 #
119 119 # HTML allows authors to define acronyms via the tag. The definition appears as a
120 120 # tool tip when a cursor hovers over the acronym. A crucial aid to clear writing,
121 121 # this should be used at least once for each acronym in documents where they appear.
122 122 #
123 123 # To quickly define an acronym in Textile, place the full text in (parentheses)
124 124 # immediately following the acronym.
125 125 #
126 126 # Example:
127 127 #
128 128 # ACLU(American Civil Liberties Union)
129 129 #
130 130 # Will become:
131 131 #
132 132 # <acronym title="American Civil Liberties Union">ACLU</acronym>
133 133 #
134 134 # == Adding Tables
135 135 #
136 136 # In Textile, simple tables can be added by seperating each column by
137 137 # a pipe.
138 138 #
139 139 # |a|simple|table|row|
140 140 # |And|Another|table|row|
141 141 #
142 142 # Attributes are defined by style definitions in parentheses.
143 143 #
144 144 # table(border:1px solid black).
145 145 # (background:#ddd;color:red). |{}| | | |
146 146 #
147 147 # == Using RedCloth
148 148 #
149 149 # RedCloth is simply an extension of the String class, which can handle
150 150 # Textile formatting. Use it like a String and output HTML with its
151 151 # RedCloth#to_html method.
152 152 #
153 153 # doc = RedCloth.new "
154 154 #
155 155 # h2. Test document
156 156 #
157 157 # Just a simple test."
158 158 #
159 159 # puts doc.to_html
160 160 #
161 161 # By default, RedCloth uses both Textile and Markdown formatting, with
162 162 # Textile formatting taking precedence. If you want to turn off Markdown
163 163 # formatting, to boost speed and limit the processor:
164 164 #
165 165 # class RedCloth::Textile.new( str )
166 166
167 167 class RedCloth < String
168 168
169 169 VERSION = '3.0.4'
170 170 DEFAULT_RULES = [:textile, :markdown]
171 171
172 172 #
173 173 # Two accessor for setting security restrictions.
174 174 #
175 175 # This is a nice thing if you're using RedCloth for
176 176 # formatting in public places (e.g. Wikis) where you
177 177 # don't want users to abuse HTML for bad things.
178 178 #
179 179 # If +:filter_html+ is set, HTML which wasn't
180 180 # created by the Textile processor will be escaped.
181 181 #
182 182 # If +:filter_styles+ is set, it will also disable
183 183 # the style markup specifier. ('{color: red}')
184 184 #
185 185 attr_accessor :filter_html, :filter_styles
186 186
187 187 #
188 188 # Accessor for toggling hard breaks.
189 189 #
190 190 # If +:hard_breaks+ is set, single newlines will
191 191 # be converted to HTML break tags. This is the
192 192 # default behavior for traditional RedCloth.
193 193 #
194 194 attr_accessor :hard_breaks
195 195
196 196 # Accessor for toggling lite mode.
197 197 #
198 198 # In lite mode, block-level rules are ignored. This means
199 199 # that tables, paragraphs, lists, and such aren't available.
200 200 # Only the inline markup for bold, italics, entities and so on.
201 201 #
202 202 # r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
203 203 # r.to_html
204 204 # #=> "And then? She <strong>fell</strong>!"
205 205 #
206 206 attr_accessor :lite_mode
207 207
208 208 #
209 209 # Accessor for toggling span caps.
210 210 #
211 211 # Textile places `span' tags around capitalized
212 212 # words by default, but this wreaks havoc on Wikis.
213 213 # If +:no_span_caps+ is set, this will be
214 214 # suppressed.
215 215 #
216 216 attr_accessor :no_span_caps
217 217
218 218 #
219 219 # Establishes the markup predence. Available rules include:
220 220 #
221 221 # == Textile Rules
222 222 #
223 223 # The following textile rules can be set individually. Or add the complete
224 224 # set of rules with the single :textile rule, which supplies the rule set in
225 225 # the following precedence:
226 226 #
227 227 # refs_textile:: Textile references (i.e. [hobix]http://hobix.com/)
228 228 # block_textile_table:: Textile table block structures
229 229 # block_textile_lists:: Textile list structures
230 230 # block_textile_prefix:: Textile blocks with prefixes (i.e. bq., h2., etc.)
231 231 # inline_textile_image:: Textile inline images
232 232 # inline_textile_link:: Textile inline links
233 233 # inline_textile_span:: Textile inline spans
234 234 # glyphs_textile:: Textile entities (such as em-dashes and smart quotes)
235 235 #
236 236 # == Markdown
237 237 #
238 238 # refs_markdown:: Markdown references (for example: [hobix]: http://hobix.com/)
239 239 # block_markdown_setext:: Markdown setext headers
240 240 # block_markdown_atx:: Markdown atx headers
241 241 # block_markdown_rule:: Markdown horizontal rules
242 242 # block_markdown_bq:: Markdown blockquotes
243 243 # block_markdown_lists:: Markdown lists
244 244 # inline_markdown_link:: Markdown links
245 245 attr_accessor :rules
246 246
247 247 # Returns a new RedCloth object, based on _string_ and
248 248 # enforcing all the included _restrictions_.
249 249 #
250 250 # r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
251 251 # r.to_html
252 252 # #=>"<h1>A &lt;b&gt;bold&lt;/b&gt; man</h1>"
253 253 #
254 254 def initialize( string, restrictions = [] )
255 255 restrictions.each { |r| method( "#{ r }=" ).call( true ) }
256 256 super( string )
257 257 end
258 258
259 259 #
260 260 # Generates HTML from the Textile contents.
261 261 #
262 262 # r = RedCloth.new( "And then? She *fell*!" )
263 263 # r.to_html( true )
264 264 # #=>"And then? She <strong>fell</strong>!"
265 265 #
266 266 def to_html( *rules )
267 267 rules = DEFAULT_RULES if rules.empty?
268 268 # make our working copy
269 269 text = self.dup
270 270
271 271 @urlrefs = {}
272 272 @shelf = []
273 273 textile_rules = [:refs_textile, :block_textile_table, :block_textile_lists,
274 274 :block_textile_prefix, :inline_textile_image, :inline_textile_link,
275 275 :inline_textile_code, :inline_textile_span]
276 276 markdown_rules = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
277 277 :block_markdown_bq, :block_markdown_lists,
278 278 :inline_markdown_reflink, :inline_markdown_link]
279 279 @rules = rules.collect do |rule|
280 280 case rule
281 281 when :markdown
282 282 markdown_rules
283 283 when :textile
284 284 textile_rules
285 285 else
286 286 rule
287 287 end
288 288 end.flatten
289 289
290 290 # standard clean up
291 291 incoming_entities text
292 292 clean_white_space text
293 293
294 294 # start processor
295 295 @pre_list = []
296 296 rip_offtags text
297 297 no_textile text
298 escape_html_tags text
298 299 hard_break text
299 300 unless @lite_mode
300 301 refs text
301 302 blocks text
302 303 end
303 304 inline text
304 305 smooth_offtags text
305 306
306 307 retrieve text
307 308
308 309 text.gsub!( /<\/?notextile>/, '' )
309 310 text.gsub!( /x%x%/, '&#38;' )
310 311 clean_html text if filter_html
311 312 text.strip!
312 313 text
313 314
314 315 end
315 316
316 317 #######
317 318 private
318 319 #######
319 320 #
320 321 # Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
321 322 # (from PyTextile)
322 323 #
323 324 TEXTILE_TAGS =
324 325
325 326 [[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
326 327 [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
327 328 [140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
328 329 [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
329 330 [153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
330 331
331 332 collect! do |a, b|
332 333 [a.chr, ( b.zero? and "" or "&#{ b };" )]
333 334 end
334 335
335 336 #
336 337 # Regular expressions to convert to HTML.
337 338 #
338 339 A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
339 340 A_VLGN = /[\-^~]/
340 341 C_CLAS = '(?:\([^)]+\))'
341 342 C_LNGE = '(?:\[[^\]]+\])'
342 343 C_STYL = '(?:\{[^}]+\})'
343 344 S_CSPN = '(?:\\\\\d+)'
344 345 S_RSPN = '(?:/\d+)'
345 346 A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
346 347 S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
347 348 C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
348 349 # PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
349 350 PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
350 351 PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
351 352 PUNCT_Q = Regexp::quote( '*-_+^~%' )
352 353 HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
353 354
354 355 # Text markup tags, don't conflict with block tags
355 356 SIMPLE_HTML_TAGS = [
356 357 'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
357 358 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
358 359 'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
359 360 ]
360 361
361 362 QTAGS = [
362 363 ['**', 'b', :limit],
363 364 ['*', 'strong', :limit],
364 365 ['??', 'cite', :limit],
365 366 ['-', 'del', :limit],
366 367 ['__', 'i', :limit],
367 368 ['_', 'em', :limit],
368 369 ['%', 'span', :limit],
369 370 ['+', 'ins', :limit],
370 371 ['^', 'sup', :limit],
371 372 ['~', 'sub', :limit]
372 373 ]
373 374 QTAGS.collect! do |rc, ht, rtype|
374 375 rcq = Regexp::quote rc
375 376 re =
376 377 case rtype
377 378 when :limit
378 /(\W)
379 /(^|[>\s])
379 380 (#{rcq})
380 381 (#{C})
381 382 (?::(\S+?))?
382 (\S.*?\S|\S)
383 ([^\s\-].*?[^\s\-]|\w)
383 384 #{rcq}
384 (?=\W)/x
385 (?=[[:punct:]]|\s|$)/x
385 386 else
386 387 /(#{rcq})
387 388 (#{C})
388 389 (?::(\S+))?
389 (\S.*?\S|\S)
390 ([^\s\-].*?[^\s\-]|\w)
390 391 #{rcq}/xm
391 392 end
392 393 [rc, ht, re, rtype]
393 394 end
394 395
395 396 # Elements to handle
396 397 GLYPHS = [
397 398 # [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
398 399 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1&#8217;' ], # single closing
399 400 # [ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '&#8217;' ], # single closing
400 401 # [ /\'/, '&#8216;' ], # single opening
401 402 [ /</, '&lt;' ], # less-than
402 403 [ />/, '&gt;' ], # greater-than
403 404 # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
404 405 # [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1&#8221;' ], # double closing
405 406 # [ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '&#8221;' ], # double closing
406 407 # [ /"/, '&#8220;' ], # double opening
407 408 [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
408 409 [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
409 410 [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
410 411 [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
411 412 [ /\s->\s/, ' &rarr; ' ], # right arrow
412 413 [ /\s-\s/, ' &#8211; ' ], # en dash
413 414 [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
414 415 [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
415 416 [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
416 417 [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
417 418 ]
418 419
419 420 H_ALGN_VALS = {
420 421 '<' => 'left',
421 422 '=' => 'center',
422 423 '>' => 'right',
423 424 '<>' => 'justify'
424 425 }
425 426
426 427 V_ALGN_VALS = {
427 428 '^' => 'top',
428 429 '-' => 'middle',
429 430 '~' => 'bottom'
430 431 }
431 432
432 433 #
433 434 # Flexible HTML escaping
434 435 #
435 436 def htmlesc( str, mode )
436 437 str.gsub!( '&', '&amp;' )
437 438 str.gsub!( '"', '&quot;' ) if mode != :NoQuotes
438 439 str.gsub!( "'", '&#039;' ) if mode == :Quotes
439 440 str.gsub!( '<', '&lt;')
440 441 str.gsub!( '>', '&gt;')
441 442 end
442 443
443 444 # Search and replace for Textile glyphs (quotes, dashes, other symbols)
444 445 def pgl( text )
445 446 GLYPHS.each do |re, resub, tog|
446 447 next if tog and method( tog ).call
447 448 text.gsub! re, resub
448 449 end
449 450 end
450 451
451 452 # Parses Textile attribute lists and builds an HTML attribute string
452 453 def pba( text_in, element = "" )
453 454
454 455 return '' unless text_in
455 456
456 457 style = []
457 458 text = text_in.dup
458 459 if element == 'td'
459 460 colspan = $1 if text =~ /\\(\d+)/
460 461 rowspan = $1 if text =~ /\/(\d+)/
461 462 style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
462 463 end
463 464
464 465 style << "#{ $1 };" if not filter_styles and
465 466 text.sub!( /\{([^}]*)\}/, '' )
466 467
467 468 lang = $1 if
468 469 text.sub!( /\[([^)]+?)\]/, '' )
469 470
470 471 cls = $1 if
471 472 text.sub!( /\(([^()]+?)\)/, '' )
472 473
473 474 style << "padding-left:#{ $1.length }em;" if
474 475 text.sub!( /([(]+)/, '' )
475 476
476 477 style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
477 478
478 479 style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
479 480
480 481 cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
481 482
482 483 atts = ''
483 484 atts << " style=\"#{ style.join }\"" unless style.empty?
484 485 atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
485 486 atts << " lang=\"#{ lang }\"" if lang
486 487 atts << " id=\"#{ id }\"" if id
487 488 atts << " colspan=\"#{ colspan }\"" if colspan
488 489 atts << " rowspan=\"#{ rowspan }\"" if rowspan
489 490
490 491 atts
491 492 end
492 493
493 494 TABLE_RE = /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
494 495
495 496 # Parses a Textile table block, building HTML from the result.
496 497 def block_textile_table( text )
497 498 text.gsub!( TABLE_RE ) do |matches|
498 499
499 500 tatts, fullrow = $~[1..2]
500 501 tatts = pba( tatts, 'table' )
501 502 tatts = shelve( tatts ) if tatts
502 503 rows = []
503 504
504 505 fullrow.
505 506 split( /\|$/m ).
506 507 delete_if { |x| x.empty? }.
507 508 each do |row|
508 509
509 510 ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
510 511
511 512 cells = []
512 513 row.split( '|' ).each do |cell|
513 514 ctyp = 'd'
514 515 ctyp = 'h' if cell =~ /^_/
515 516
516 517 catts = ''
517 518 catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
518 519
519 520 unless cell.strip.empty?
520 521 catts = shelve( catts ) if catts
521 522 cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
522 523 end
523 524 end
524 525 ratts = shelve( ratts ) if ratts
525 526 rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
526 527 end
527 528 "\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
528 529 end
529 530 end
530 531
531 532 LISTS_RE = /^([#*]+?#{C} .*?)$(?![^#*])/m
532 533 LISTS_CONTENT_RE = /^([#*]+)(#{A}#{C}) (.*)$/m
533 534
534 535 # Parses Textile lists and generates HTML
535 536 def block_textile_lists( text )
536 537 text.gsub!( LISTS_RE ) do |match|
537 538 lines = match.split( /\n/ )
538 539 last_line = -1
539 540 depth = []
540 541 lines.each_with_index do |line, line_id|
541 542 if line =~ LISTS_CONTENT_RE
542 543 tl,atts,content = $~[1..3]
543 544 if depth.last
544 545 if depth.last.length > tl.length
545 546 (depth.length - 1).downto(0) do |i|
546 547 break if depth[i].length == tl.length
547 548 lines[line_id - 1] << "</li>\n\t</#{ lT( depth[i] ) }l>\n\t"
548 549 depth.pop
549 550 end
550 551 end
551 552 if depth.last and depth.last.length == tl.length
552 553 lines[line_id - 1] << '</li>'
553 554 end
554 555 end
555 556 unless depth.last == tl
556 557 depth << tl
557 558 atts = pba( atts )
558 559 atts = shelve( atts ) if atts
559 560 lines[line_id] = "\t<#{ lT(tl) }l#{ atts }>\n\t<li>#{ content }"
560 561 else
561 562 lines[line_id] = "\t\t<li>#{ content }"
562 563 end
563 564 last_line = line_id
564 565
565 566 else
566 567 last_line = line_id
567 568 end
568 569 if line_id - last_line > 1 or line_id == lines.length - 1
569 570 depth.delete_if do |v|
570 571 lines[last_line] << "</li>\n\t</#{ lT( v ) }l>"
571 572 end
572 573 end
573 574 end
574 575 lines.join( "\n" )
575 576 end
576 577 end
577 578
578 579 CODE_RE = /(\W)
579 580 @
580 581 (?:\|(\w+?)\|)?
581 582 (.+?)
582 583 @
583 584 (?=\W)/x
584 585
585 586 def inline_textile_code( text )
586 587 text.gsub!( CODE_RE ) do |m|
587 588 before,lang,code,after = $~[1..4]
588 589 lang = " lang=\"#{ lang }\"" if lang
589 590 rip_offtags( "#{ before }<code#{ lang }>#{ code }</code>#{ after }" )
590 591 end
591 592 end
592 593
593 594 def lT( text )
594 595 text =~ /\#$/ ? 'o' : 'u'
595 596 end
596 597
597 598 def hard_break( text )
598 599 text.gsub!( /(.)\n(?!\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
599 600 end
600 601
601 602 BLOCKS_GROUP_RE = /\n{2,}(?! )/m
602 603
603 604 def blocks( text, deep_code = false )
604 605 text.replace( text.split( BLOCKS_GROUP_RE ).collect do |blk|
605 606 plain = blk !~ /\A[#*> ]/
606 607
607 608 # skip blocks that are complex HTML
608 609 if blk =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
609 610 blk
610 611 else
611 612 # search for indentation levels
612 613 blk.strip!
613 614 if blk.empty?
614 615 blk
615 616 else
616 617 code_blk = nil
617 618 blk.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
618 619 flush_left iblk
619 620 blocks iblk, plain
620 621 iblk.gsub( /^(\S)/, "\t\\1" )
621 622 if plain
622 623 code_blk = iblk; ""
623 624 else
624 625 iblk
625 626 end
626 627 end
627 628
628 629 block_applied = 0
629 630 @rules.each do |rule_name|
630 631 block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( blk ) )
631 632 end
632 633 if block_applied.zero?
633 634 if deep_code
634 635 blk = "\t<pre><code>#{ blk }</code></pre>"
635 636 else
636 637 blk = "\t<p>#{ blk }</p>"
637 638 end
638 639 end
639 640 # hard_break blk
640 641 blk + "\n#{ code_blk }"
641 642 end
642 643 end
643 644
644 645 end.join( "\n\n" ) )
645 646 end
646 647
647 648 def textile_bq( tag, atts, cite, content )
648 649 cite, cite_title = check_refs( cite )
649 650 cite = " cite=\"#{ cite }\"" if cite
650 651 atts = shelve( atts ) if atts
651 652 "\t<blockquote#{ cite }>\n\t\t<p#{ atts }>#{ content }</p>\n\t</blockquote>"
652 653 end
653 654
654 655 def textile_p( tag, atts, cite, content )
655 656 atts = shelve( atts ) if atts
656 657 "\t<#{ tag }#{ atts }>#{ content }</#{ tag }>"
657 658 end
658 659
659 660 alias textile_h1 textile_p
660 661 alias textile_h2 textile_p
661 662 alias textile_h3 textile_p
662 663 alias textile_h4 textile_p
663 664 alias textile_h5 textile_p
664 665 alias textile_h6 textile_p
665 666
666 667 def textile_fn_( tag, num, atts, cite, content )
667 668 atts << " id=\"fn#{ num }\""
668 669 content = "<sup>#{ num }</sup> #{ content }"
669 670 atts = shelve( atts ) if atts
670 671 "\t<p#{ atts }>#{ content }</p>"
671 672 end
672 673
673 674 BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/m
674 675
675 676 def block_textile_prefix( text )
676 677 if text =~ BLOCK_RE
677 678 tag,tagpre,num,atts,cite,content = $~[1..6]
678 679 atts = pba( atts )
679 680
680 681 # pass to prefix handler
681 682 if respond_to? "textile_#{ tag }", true
682 683 text.gsub!( $&, method( "textile_#{ tag }" ).call( tag, atts, cite, content ) )
683 684 elsif respond_to? "textile_#{ tagpre }_", true
684 685 text.gsub!( $&, method( "textile_#{ tagpre }_" ).call( tagpre, num, atts, cite, content ) )
685 686 end
686 687 end
687 688 end
688 689
689 690 SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
690 691 def block_markdown_setext( text )
691 692 if text =~ SETEXT_RE
692 693 tag = if $2 == "="; "h1"; else; "h2"; end
693 694 blk, cont = "<#{ tag }>#{ $1 }</#{ tag }>", $'
694 695 blocks cont
695 696 text.replace( blk + cont )
696 697 end
697 698 end
698 699
699 700 ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
700 701 [ ]*
701 702 (.+?) # $2 = Header text
702 703 [ ]*
703 704 \#* # optional closing #'s (not counted)
704 705 $/x
705 706 def block_markdown_atx( text )
706 707 if text =~ ATX_RE
707 708 tag = "h#{ $1.length }"
708 709 blk, cont = "<#{ tag }>#{ $2 }</#{ tag }>\n\n", $'
709 710 blocks cont
710 711 text.replace( blk + cont )
711 712 end
712 713 end
713 714
714 715 MARKDOWN_BQ_RE = /\A(^ *> ?.+$(.+\n)*\n*)+/m
715 716
716 717 def block_markdown_bq( text )
717 718 text.gsub!( MARKDOWN_BQ_RE ) do |blk|
718 719 blk.gsub!( /^ *> ?/, '' )
719 720 flush_left blk
720 721 blocks blk
721 722 blk.gsub!( /^(\S)/, "\t\\1" )
722 723 "<blockquote>\n#{ blk }\n</blockquote>\n\n"
723 724 end
724 725 end
725 726
726 727 MARKDOWN_RULE_RE = /^(#{
727 728 ['*', '-', '_'].collect { |ch| '( ?' + Regexp::quote( ch ) + ' ?){3,}' }.join( '|' )
728 729 })$/
729 730
730 731 def block_markdown_rule( text )
731 732 text.gsub!( MARKDOWN_RULE_RE ) do |blk|
732 733 "<hr />"
733 734 end
734 735 end
735 736
736 737 # XXX TODO XXX
737 738 def block_markdown_lists( text )
738 739 end
739 740
740 741 def inline_textile_span( text )
741 742 QTAGS.each do |qtag_rc, ht, qtag_re, rtype|
742 743 text.gsub!( qtag_re ) do |m|
743 744
744 745 case rtype
745 746 when :limit
746 747 sta,qtag,atts,cite,content = $~[1..5]
747 748 else
748 749 qtag,atts,cite,content = $~[1..4]
749 750 sta = ''
750 751 end
751 752 atts = pba( atts )
752 753 atts << " cite=\"#{ cite }\"" if cite
753 754 atts = shelve( atts ) if atts
754 755
755 756 "#{ sta }<#{ ht }#{ atts }>#{ content }</#{ ht }>"
756 757
757 758 end
758 759 end
759 760 end
760 761
761 762 LINK_RE = /
762 763 ([\s\[{(]|[#{PUNCT}])? # $pre
763 764 " # start
764 765 (#{C}) # $atts
765 766 ([^"]+?) # $text
766 767 \s?
767 768 (?:\(([^)]+?)\)(?="))? # $title
768 769 ":
769 770 (\S+?) # $url
770 771 (\/)? # $slash
771 772 ([^\w\/;]*?) # $post
772 773 (?=<|\s|$)
773 774 /x
774 775
775 776 def inline_textile_link( text )
776 777 text.gsub!( LINK_RE ) do |m|
777 778 pre,atts,text,title,url,slash,post = $~[1..7]
778 779
779 780 url, url_title = check_refs( url )
780 781 title ||= url_title
781 782
782 783 atts = pba( atts )
783 784 atts = " href=\"#{ url }#{ slash }\"#{ atts }"
784 785 atts << " title=\"#{ title }\"" if title
785 786 atts = shelve( atts ) if atts
786 787
787 788 external = (url =~ /^http:\/\//) ? ' class="external"' : ''
788 789
789 790 "#{ pre }<a#{ atts }#{ external }>#{ text }</a>#{ post }"
790 791 end
791 792 end
792 793
793 794 MARKDOWN_REFLINK_RE = /
794 795 \[([^\[\]]+)\] # $text
795 796 [ ]? # opt. space
796 797 (?:\n[ ]*)? # one optional newline followed by spaces
797 798 \[(.*?)\] # $id
798 799 /x
799 800
800 801 def inline_markdown_reflink( text )
801 802 text.gsub!( MARKDOWN_REFLINK_RE ) do |m|
802 803 text, id = $~[1..2]
803 804
804 805 if id.empty?
805 806 url, title = check_refs( text )
806 807 else
807 808 url, title = check_refs( id )
808 809 end
809 810
810 811 atts = " href=\"#{ url }\""
811 812 atts << " title=\"#{ title }\"" if title
812 813 atts = shelve( atts )
813 814
814 815 "<a#{ atts }>#{ text }</a>"
815 816 end
816 817 end
817 818
818 819 MARKDOWN_LINK_RE = /
819 820 \[([^\[\]]+)\] # $text
820 821 \( # open paren
821 822 [ \t]* # opt space
822 823 <?(.+?)>? # $href
823 824 [ \t]* # opt space
824 825 (?: # whole title
825 826 (['"]) # $quote
826 827 (.*?) # $title
827 828 \3 # matching quote
828 829 )? # title is optional
829 830 \)
830 831 /x
831 832
832 833 def inline_markdown_link( text )
833 834 text.gsub!( MARKDOWN_LINK_RE ) do |m|
834 835 text, url, quote, title = $~[1..4]
835 836
836 837 atts = " href=\"#{ url }\""
837 838 atts << " title=\"#{ title }\"" if title
838 839 atts = shelve( atts )
839 840
840 841 "<a#{ atts }>#{ text }</a>"
841 842 end
842 843 end
843 844
844 845 TEXTILE_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
845 846 MARKDOWN_REFS_RE = /(^ *)\[([^\n]+?)\]:\s+<?(#{HYPERLINK})>?(?:\s+"((?:[^"]|\\")+)")?(?=\s|$)/m
846 847
847 848 def refs( text )
848 849 @rules.each do |rule_name|
849 850 method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
850 851 end
851 852 end
852 853
853 854 def refs_textile( text )
854 855 text.gsub!( TEXTILE_REFS_RE ) do |m|
855 856 flag, url = $~[2..3]
856 857 @urlrefs[flag.downcase] = [url, nil]
857 858 nil
858 859 end
859 860 end
860 861
861 862 def refs_markdown( text )
862 863 text.gsub!( MARKDOWN_REFS_RE ) do |m|
863 864 flag, url = $~[2..3]
864 865 title = $~[6]
865 866 @urlrefs[flag.downcase] = [url, title]
866 867 nil
867 868 end
868 869 end
869 870
870 871 def check_refs( text )
871 872 ret = @urlrefs[text.downcase] if text
872 873 ret || [text, nil]
873 874 end
874 875
875 876 IMAGE_RE = /
876 877 (<p>|.|^) # start of line?
877 878 \! # opening
878 879 (\<|\=|\>)? # optional alignment atts
879 880 (#{C}) # optional style,class atts
880 881 (?:\. )? # optional dot-space
881 882 ([^\s(!]+?) # presume this is the src
882 883 \s? # optional space
883 884 (?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
884 885 \! # closing
885 886 (?::#{ HYPERLINK })? # optional href
886 887 /x
887 888
888 889 def inline_textile_image( text )
889 890 text.gsub!( IMAGE_RE ) do |m|
890 891 stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
891 892 atts = pba( atts )
892 893 atts = " src=\"#{ url }\"#{ atts }"
893 894 atts << " title=\"#{ title }\"" if title
894 895 atts << " alt=\"#{ title }\""
895 896 # size = @getimagesize($url);
896 897 # if($size) $atts.= " $size[3]";
897 898
898 899 href, alt_title = check_refs( href ) if href
899 900 url, url_title = check_refs( url )
900 901
901 902 out = ''
902 903 out << "<a#{ shelve( " href=\"#{ href }\"" ) }>" if href
903 904 out << "<img#{ shelve( atts ) } />"
904 905 out << "</a>#{ href_a1 }#{ href_a2 }" if href
905 906
906 907 if algn
907 908 algn = h_align( algn )
908 909 if stln == "<p>"
909 910 out = "<p style=\"float:#{ algn }\">#{ out }"
910 911 else
911 912 out = "#{ stln }<div style=\"float:#{ algn }\">#{ out }</div>"
912 913 end
913 914 else
914 915 out = stln + out
915 916 end
916 917
917 918 out
918 919 end
919 920 end
920 921
921 922 def shelve( val )
922 923 @shelf << val
923 924 " :redsh##{ @shelf.length }:"
924 925 end
925 926
926 927 def retrieve( text )
927 928 @shelf.each_with_index do |r, i|
928 929 text.gsub!( " :redsh##{ i + 1 }:", r )
929 930 end
930 931 end
931 932
932 933 def incoming_entities( text )
933 934 ## turn any incoming ampersands into a dummy character for now.
934 935 ## This uses a negative lookahead for alphanumerics followed by a semicolon,
935 936 ## implying an incoming html entity, to be skipped
936 937
937 938 text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
938 939 end
939 940
940 941 def no_textile( text )
941 942 text.gsub!( /(^|\s)==([^=]+.*?)==(\s|$)?/,
942 943 '\1<notextile>\2</notextile>\3' )
943 944 text.gsub!( /^ *==([^=]+.*?)==/m,
944 945 '\1<notextile>\2</notextile>\3' )
945 946 end
946 947
947 948 def clean_white_space( text )
948 949 # normalize line breaks
949 950 text.gsub!( /\r\n/, "\n" )
950 951 text.gsub!( /\r/, "\n" )
951 952 text.gsub!( /\t/, ' ' )
952 953 text.gsub!( /^ +$/, '' )
953 954 text.gsub!( /\n{3,}/, "\n\n" )
954 955 text.gsub!( /"$/, "\" " )
955 956
956 957 # if entire document is indented, flush
957 958 # to the left side
958 959 flush_left text
959 960 end
960 961
961 962 def flush_left( text )
962 963 indt = 0
963 964 if text =~ /^ /
964 965 while text !~ /^ {#{indt}}\S/
965 966 indt += 1
966 967 end unless text.empty?
967 968 if indt.nonzero?
968 969 text.gsub!( /^ {#{indt}}/, '' )
969 970 end
970 971 end
971 972 end
972 973
973 974 def footnote_ref( text )
974 975 text.gsub!( /\b\[([0-9]+?)\](\s)?/,
975 976 '<sup><a href="#fn\1">\1</a></sup>\2' )
976 977 end
977 978
978 979 OFFTAGS = /(code|pre|kbd|notextile)/
979 980 OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }|\Z)/mi
980 981 OFFTAG_OPEN = /<#{ OFFTAGS }/
981 982 OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
982 983 HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
983 984 ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
984 985
985 986 def glyphs_textile( text, level = 0 )
986 987 if text !~ HASTAG_MATCH
987 988 pgl text
988 989 footnote_ref text
989 990 else
990 991 codepre = 0
991 992 text.gsub!( ALLTAG_MATCH ) do |line|
992 993 ## matches are off if we're between <code>, <pre> etc.
993 994 if $1
994 995 if line =~ OFFTAG_OPEN
995 996 codepre += 1
996 997 elsif line =~ OFFTAG_CLOSE
997 998 codepre -= 1
998 999 codepre = 0 if codepre < 0
999 1000 end
1000 1001 elsif codepre.zero?
1001 1002 glyphs_textile( line, level + 1 )
1002 1003 else
1003 1004 htmlesc( line, :NoQuotes )
1004 1005 end
1005 1006 # p [level, codepre, line]
1006 1007
1007 1008 line
1008 1009 end
1009 1010 end
1010 1011 end
1011 1012
1012 1013 def rip_offtags( text )
1013 1014 if text =~ /<.*>/
1014 1015 ## strip and encode <pre> content
1015 1016 codepre, used_offtags = 0, {}
1016 1017 text.gsub!( OFFTAG_MATCH ) do |line|
1017 1018 if $3
1018 1019 offtag, aftertag = $4, $5
1019 1020 codepre += 1
1020 1021 used_offtags[offtag] = true
1021 1022 if codepre - used_offtags.length > 0
1022 1023 htmlesc( line, :NoQuotes ) unless used_offtags['notextile']
1023 1024 @pre_list.last << line
1024 1025 line = ""
1025 1026 else
1026 1027 htmlesc( aftertag, :NoQuotes ) if aftertag and not used_offtags['notextile']
1027 1028 line = "<redpre##{ @pre_list.length }>"
1028 1029 @pre_list << "#{ $3 }#{ aftertag }"
1029 1030 end
1030 1031 elsif $1 and codepre > 0
1031 1032 if codepre - used_offtags.length > 0
1032 1033 htmlesc( line, :NoQuotes ) unless used_offtags['notextile']
1033 1034 @pre_list.last << line
1034 1035 line = ""
1035 1036 end
1036 1037 codepre -= 1 unless codepre.zero?
1037 1038 used_offtags = {} if codepre.zero?
1038 1039 end
1039 1040 line
1040 1041 end
1041 1042 end
1042 1043 text
1043 1044 end
1044 1045
1045 1046 def smooth_offtags( text )
1046 1047 unless @pre_list.empty?
1047 1048 ## replace <pre> content
1048 1049 text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
1049 1050 end
1050 1051 end
1051 1052
1052 1053 def inline( text )
1053 1054 [/^inline_/, /^glyphs_/].each do |meth_re|
1054 1055 @rules.each do |rule_name|
1055 1056 method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
1056 1057 end
1057 1058 end
1058 1059 end
1059 1060
1060 1061 def h_align( text )
1061 1062 H_ALGN_VALS[text]
1062 1063 end
1063 1064
1064 1065 def v_align( text )
1065 1066 V_ALGN_VALS[text]
1066 1067 end
1067 1068
1068 1069 def textile_popup_help( name, windowW, windowH )
1069 1070 ' <a target="_blank" href="http://hobix.com/textile/#' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
1070 1071 end
1071 1072
1072 1073 # HTML cleansing stuff
1073 1074 BASIC_TAGS = {
1074 1075 'a' => ['href', 'title'],
1075 1076 'img' => ['src', 'alt', 'title'],
1076 1077 'br' => [],
1077 1078 'i' => nil,
1078 1079 'u' => nil,
1079 1080 'b' => nil,
1080 1081 'pre' => nil,
1081 1082 'kbd' => nil,
1082 1083 'code' => ['lang'],
1083 1084 'cite' => nil,
1084 1085 'strong' => nil,
1085 1086 'em' => nil,
1086 1087 'ins' => nil,
1087 1088 'sup' => nil,
1088 1089 'sub' => nil,
1089 1090 'del' => nil,
1090 1091 'table' => nil,
1091 1092 'tr' => nil,
1092 1093 'td' => ['colspan', 'rowspan'],
1093 1094 'th' => nil,
1094 1095 'ol' => nil,
1095 1096 'ul' => nil,
1096 1097 'li' => nil,
1097 1098 'p' => nil,
1098 1099 'h1' => nil,
1099 1100 'h2' => nil,
1100 1101 'h3' => nil,
1101 1102 'h4' => nil,
1102 1103 'h5' => nil,
1103 1104 'h6' => nil,
1104 1105 'blockquote' => ['cite']
1105 1106 }
1106 1107
1107 1108 def clean_html( text, tags = BASIC_TAGS )
1108 1109 text.gsub!( /<!\[CDATA\[/, '' )
1109 1110 text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
1110 1111 raw = $~
1111 1112 tag = raw[2].downcase
1112 1113 if tags.has_key? tag
1113 1114 pcs = [tag]
1114 1115 tags[tag].each do |prop|
1115 1116 ['"', "'", ''].each do |q|
1116 1117 q2 = ( q != '' ? q : '\s' )
1117 1118 if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
1118 1119 attrv = $1
1119 1120 next if prop == 'src' and attrv =~ %r{^(?!http)\w+:}
1120 1121 pcs << "#{prop}=\"#{$1.gsub('"', '\\"')}\""
1121 1122 break
1122 1123 end
1123 1124 end
1124 1125 end if tags[tag]
1125 1126 "<#{raw[1]}#{pcs.join " "}>"
1126 1127 else
1127 1128 " "
1128 1129 end
1129 1130 end
1130 1131 end
1132
1133 ALLOWED_TAGS = %w(redpre pre)
1134
1135 def escape_html_tags(text)
1136 text.gsub!(%r{<((\/?)(\w+))}) {|m| ALLOWED_TAGS.include?($3) ? "<#{$1}" : "&lt;#{$1}" }
1137 end
1131 1138 end
1132 1139
General Comments 0
You need to be logged in to leave comments. Login now