c.rb
203 lines
| 5.3 KiB
| text/x-ruby
|
RubyLexer
|
r638 | module CodeRay | ||
module Scanners | ||||
class C < Scanner | ||||
|
r2965 | include Streamable | ||
|
r3478 | |||
register_for :c | ||||
file_extension 'c' | ||||
|
r638 | |||
RESERVED_WORDS = [ | ||||
|
r3478 | 'asm', 'break', 'case', 'continue', 'default', 'do', | ||
'else', 'enum', 'for', 'goto', 'if', 'return', | ||||
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while', | ||||
'restrict', # added in C99 | ||||
|
r638 | ] | ||
PREDEFINED_TYPES = [ | ||||
|
r3478 | 'int', 'long', 'short', 'char', | ||
|
r638 | 'signed', 'unsigned', 'float', 'double', | ||
|
r3478 | 'bool', 'complex', # added in C99 | ||
|
r638 | ] | ||
PREDEFINED_CONSTANTS = [ | ||||
'EOF', 'NULL', | ||||
|
r3478 | 'true', 'false', # added in C99 | ||
] | ||||
DIRECTIVES = [ | ||||
'auto', 'extern', 'register', 'static', 'void', | ||||
'const', 'volatile', # added in C89 | ||||
'inline', # added in C99 | ||||
|
r638 | ] | ||
IDENT_KIND = WordList.new(:ident). | ||||
add(RESERVED_WORDS, :reserved). | ||||
add(PREDEFINED_TYPES, :pre_type). | ||||
|
r3478 | add(DIRECTIVES, :directive). | ||
|
r638 | add(PREDEFINED_CONSTANTS, :pre_constant) | ||
|
r3478 | ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x | ||
|
r638 | UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x | ||
def scan_tokens tokens, options | ||||
state = :initial | ||||
|
r3478 | label_expected = true | ||
case_expected = false | ||||
label_expected_before_preproc_line = nil | ||||
in_preproc_line = false | ||||
|
r638 | |||
until eos? | ||||
kind = nil | ||||
match = nil | ||||
case state | ||||
when :initial | ||||
|
r3478 | if match = scan(/ \s+ | \\\n /x) | ||
if in_preproc_line && match != "\\\n" && match.index(?\n) | ||||
in_preproc_line = false | ||||
label_expected = label_expected_before_preproc_line | ||||
end | ||||
tokens << [match, :space] | ||||
next | ||||
|
r638 | |||
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) | ||||
kind = :comment | ||||
elsif match = scan(/ \# \s* if \s* 0 /x) | ||||
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? | ||||
kind = :comment | ||||
|
r3478 | elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) | ||
label_expected = match =~ /[;\{\}]/ | ||||
if case_expected | ||||
label_expected = true if match == ':' | ||||
case_expected = false | ||||
end | ||||
|
r638 | kind = :operator | ||
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) | ||||
kind = IDENT_KIND[match] | ||||
|
r3478 | if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) | ||
|
r638 | kind = :label | ||
|
r3478 | match << matched | ||
else | ||||
label_expected = false | ||||
if kind == :reserved | ||||
case match | ||||
when 'case', 'default' | ||||
case_expected = true | ||||
end | ||||
end | ||||
|
r638 | end | ||
|
r3478 | elsif scan(/\$/) | ||
kind = :ident | ||||
|
r638 | elsif match = scan(/L?"/) | ||
tokens << [:open, :string] | ||||
if match[0] == ?L | ||||
tokens << ['L', :modifier] | ||||
match = '"' | ||||
end | ||||
state = :string | ||||
kind = :delimiter | ||||
|
r3478 | elsif scan(/#[ \t]*(\w*)/) | ||
kind = :preprocessor | ||||
in_preproc_line = true | ||||
label_expected_before_preproc_line = label_expected | ||||
|
r638 | state = :include_expected if self[1] == 'include' | ||
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) | ||||
|
r3478 | label_expected = false | ||
|
r638 | kind = :char | ||
elsif scan(/0[xX][0-9A-Fa-f]+/) | ||||
|
r3478 | label_expected = false | ||
|
r638 | kind = :hex | ||
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) | ||||
|
r3478 | label_expected = false | ||
|
r638 | kind = :oct | ||
|
r3478 | elsif scan(/(?:\d+)(?![.eEfF])L?L?/) | ||
label_expected = false | ||||
|
r638 | kind = :integer | ||
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) | ||||
|
r3478 | label_expected = false | ||
|
r638 | kind = :float | ||
else | ||||
getch | ||||
kind = :error | ||||
end | ||||
when :string | ||||
if scan(/[^\\\n"]+/) | ||||
kind = :content | ||||
elsif scan(/"/) | ||||
tokens << ['"', :delimiter] | ||||
tokens << [:close, :string] | ||||
state = :initial | ||||
|
r3478 | label_expected = false | ||
|
r638 | next | ||
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) | ||||
kind = :char | ||||
elsif scan(/ \\ | $ /x) | ||||
tokens << [:close, :string] | ||||
kind = :error | ||||
state = :initial | ||||
|
r3478 | label_expected = false | ||
|
r638 | else | ||
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens | ||||
end | ||||
when :include_expected | ||||
|
r3469 | if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) | ||
|
r638 | kind = :include | ||
state = :initial | ||||
elsif match = scan(/\s+/) | ||||
kind = :space | ||||
state = :initial if match.index ?\n | ||||
else | ||||
|
r3478 | state = :initial | ||
next | ||||
|
r638 | |||
end | ||||
else | ||||
raise_inspect 'Unknown state', tokens | ||||
end | ||||
match ||= matched | ||||
|
r3478 | if $CODERAY_DEBUG and not kind | ||
|
r638 | raise_inspect 'Error token %p in line %d' % | ||
[[match, kind], line], tokens | ||||
end | ||||
raise_inspect 'Empty token', tokens unless match | ||||
tokens << [match, kind] | ||||
end | ||||
if state == :string | ||||
tokens << [:close, :string] | ||||
end | ||||
tokens | ||||
end | ||||
end | ||||
end | ||||
end | ||||