A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.

Methods
Constants
KEYWORDS = %w{if then elsif else end begin do rescue ensure while for class module def yield raise until unless and or not when case super undef break next redo retry in return alias defined?}
  The list of all identifiers recognized as keywords.
Public Instance methods
setup()

Perform ruby-specific setup

    # File lib/syntax/lang/ruby.rb, line 18
18:     def setup
19:       @selector = false
20:       @allow_operator = false
21:       @heredocs = []
22:     end
step()

Step through a single iteration of the tokenization process.

     # File lib/syntax/lang/ruby.rb, line 25
 25:     def step
 26:       case
 27:         when bol? && check( /=begin/ )
 28:           start_group( :comment, scan_until( /^=end#{EOL}/ ) )
 29:         when bol? && check( /__END__#{EOL}/ )
 30:           start_group( :comment, scan_until( /\Z/ ) )
 31:       else
 32:         case
 33:           when check( /def\s+/ )
 34:             start_group :keyword, scan( /def\s+/ )
 35:             start_group :method,  scan_until( /(?=[;(\s]|#{EOL})/ )
 36:           when check( /class\s+/ )
 37:             start_group :keyword, scan( /class\s+/ )
 38:             start_group :class,  scan_until( /(?=[;\s<]|#{EOL})/ )
 39:           when check( /module\s+/ )
 40:             start_group :keyword, scan( /module\s+/ )
 41:             start_group :module,  scan_until( /(?=[;\s]|#{EOL})/ )
 42:           when check( /::/ )
 43:             start_group :punct, scan(/::/)
 44:           when check( /:"/ )
 45:             start_group :symbol, scan(/:/)
 46:             scan_delimited_region :symbol, :symbol, "", true
 47:             @allow_operator = true
 48:           when check( /:'/ )
 49:             start_group :symbol, scan(/:/)
 50:             scan_delimited_region :symbol, :symbol, "", false
 51:             @allow_operator = true
 52:           when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
 53:             start_group :symbol, matched
 54:             @allow_operator = true
 55:           when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
 56:             start_group :char, matched
 57:             @allow_operator = true
 58:           when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
 59:             if @selector || matched[-1] == ?? || matched[-1] == ?!
 60:               start_group :ident,
 61:                 scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
 62:             else
 63:               start_group :constant,
 64:                 scan(/(__FILE__|__LINE__|true|false|nil|self)/)
 65:             end
 66:             @selector = false
 67:             @allow_operator = true
 68:           when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
 69:             start_group :number, matched
 70:             @allow_operator = true
 71:           else
 72:             case peek(2)
 73:               when "%r"
 74:                 scan_delimited_region :punct, :regex, scan( /../ ), true
 75:                 @allow_operator = true
 76:               when "%w", "%q"
 77:                 scan_delimited_region :punct, :string, scan( /../ ), false
 78:                 @allow_operator = true
 79:               when "%s"
 80:                 scan_delimited_region :punct, :symbol, scan( /../ ), false
 81:                 @allow_operator = true
 82:               when "%W", "%Q", "%x"
 83:                 scan_delimited_region :punct, :string, scan( /../ ), true
 84:                 @allow_operator = true
 85:               when /%[^\sa-zA-Z0-9]/
 86:                 scan_delimited_region :punct, :string, scan( /./ ), true
 87:                 @allow_operator = true
 88:               when "<<"
 89:                 saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
 90:                 start_group :punct, scan( /<</ )
 91:                 if saw_word
 92:                   @allow_operator = false
 93:                   return
 94:                 end
 95: 
 96:                 float_right = scan( /-/ )
 97:                 append "-" if float_right
 98:                 if ( type = scan( /['"]/ ) )
 99:                   append type
100:                   delim = scan_until( /(?=#{type})/ )
101:                   if delim.nil?
102:                     append scan_until( /\Z/ )
103:                     return
104:                   end
105:                 else
106:                   delim = scan( /\w+/ ) or return
107:                 end
108:                 start_group :constant, delim
109:                 start_group :punct, scan( /#{type}/ ) if type
110:                 @heredocs << [ float_right, type, delim ]
111:                 @allow_operator = true
112:               else
113:                 case peek(1)
114:                   when /[\n\r]/
115:                     unless @heredocs.empty?
116:                       scan_heredoc(*@heredocs.shift)
117:                     else
118:                       start_group :normal, scan( /\s+/ )
119:                     end
120:                     @allow_operator = false
121:                   when /\s/
122:                     start_group :normal, scan( /\s+/ )
123:                   when "#"
124:                     start_group :comment, scan( /#[^\n\r]*/ )
125:                   when /[A-Z]/
126:                     start_group @selector ? :ident : :constant, scan( /\w+/ )
127:                     @allow_operator = true
128:                   when /[a-z_]/
129:                     word = scan( /\w+[?!]?/ )
130:                     if !@selector && KEYWORDS.include?( word )
131:                       start_group :keyword, word
132:                       @allow_operator = false
133:                     elsif
134:                       start_group :ident, word
135:                       @allow_operator = true
136:                     end
137:                     @selector = false
138:                   when /\d/
139:                     start_group :number,
140:                       scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
141:                     @allow_operator = true
142:                   when '"'
143:                     scan_delimited_region :punct, :string, "", true
144:                     @allow_operator = true
145:                   when '/'
146:                     if @allow_operator
147:                       start_group :punct, scan(%r{/})
148:                       @allow_operator = false
149:                     else
150:                       scan_delimited_region :punct, :regex, "", true
151:                       @allow_operator = true
152:                     end
153:                   when "'"
154:                     scan_delimited_region :punct, :string, "", false
155:                     @allow_operator = true
156:                   when "."
157:                     dots = scan( /\.{1,3}/ )
158:                     start_group :punct, dots
159:                     @selector = ( dots.length == 1 )
160:                   when /[@]/
161:                     start_group :attribute, scan( /@{1,2}\w*/ )
162:                     @allow_operator = true
163:                   when /[$]/
164:                     start_group :global, scan(/\$/)
165:                     start_group :global, scan( /\w+|./ ) if check(/./)
166:                     @allow_operator = true
167:                   when /[-!?*\/+=<>(\[\{}:;,&|%]/
168:                     start_group :punct, scan(/./)
169:                     @allow_operator = false
170:                   when /[)\]]/
171:                     start_group :punct, scan(/./)
172:                     @allow_operator = true
173:                   else
174:                     # all else just falls through this, to prevent
175:                     # infinite loops...
176:                     append getch
177:                 end
178:             end
179:         end
180:       end
181:     end