A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.
Methods
Constants
KEYWORDS | = | %w{if then elsif else end begin do rescue ensure while for class module def yield raise until unless and or not when case super undef break next redo retry in return alias defined?} |
The list of all identifiers recognized as keywords. |
Public Instance methods
Perform ruby-specific setup
[ show source ]
# File lib/syntax/lang/ruby.rb, line 18 18: def setup 19: @selector = false 20: @allow_operator = false 21: @heredocs = [] 22: end
Step through a single iteration of the tokenization process.
[ show source ]
# File lib/syntax/lang/ruby.rb, line 25 25: def step 26: case 27: when bol? && check( /=begin/ ) 28: start_group( :comment, scan_until( /^=end#{EOL}/ ) ) 29: when bol? && check( /__END__#{EOL}/ ) 30: start_group( :comment, scan_until( /\Z/ ) ) 31: else 32: case 33: when check( /def\s+/ ) 34: start_group :keyword, scan( /def\s+/ ) 35: start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ ) 36: when check( /class\s+/ ) 37: start_group :keyword, scan( /class\s+/ ) 38: start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ ) 39: when check( /module\s+/ ) 40: start_group :keyword, scan( /module\s+/ ) 41: start_group :module, scan_until( /(?=[;\s]|#{EOL})/ ) 42: when check( /::/ ) 43: start_group :punct, scan(/::/) 44: when check( /:"/ ) 45: start_group :symbol, scan(/:/) 46: scan_delimited_region :symbol, :symbol, "", true 47: @allow_operator = true 48: when check( /:'/ ) 49: start_group :symbol, scan(/:/) 50: scan_delimited_region :symbol, :symbol, "", false 51: @allow_operator = true 52: when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ ) 53: start_group :symbol, matched 54: @allow_operator = true 55: when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ ) 56: start_group :char, matched 57: @allow_operator = true 58: when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ ) 59: if @selector || matched[-1] == ?? || matched[-1] == ?! 60: start_group :ident, 61: scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/) 62: else 63: start_group :constant, 64: scan(/(__FILE__|__LINE__|true|false|nil|self)/) 65: end 66: @selector = false 67: @allow_operator = true 68: when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/) 69: start_group :number, matched 70: @allow_operator = true 71: else 72: case peek(2) 73: when "%r" 74: scan_delimited_region :punct, :regex, scan( /../ ), true 75: @allow_operator = true 76: when "%w", "%q" 77: scan_delimited_region :punct, :string, scan( /../ ), false 78: @allow_operator = true 79: when "%s" 80: scan_delimited_region :punct, :symbol, scan( /../ ), false 81: @allow_operator = true 82: when "%W", "%Q", "%x" 83: scan_delimited_region :punct, :string, scan( /../ ), true 84: @allow_operator = true 85: when /%[^\sa-zA-Z0-9]/ 86: scan_delimited_region :punct, :string, scan( /./ ), true 87: @allow_operator = true 88: when "<<" 89: saw_word = ( chunk[-1,1] =~ /[\w!?]/ ) 90: start_group :punct, scan( /<</ ) 91: if saw_word 92: @allow_operator = false 93: return 94: end 95: 96: float_right = scan( /-/ ) 97: append "-" if float_right 98: if ( type = scan( /['"]/ ) ) 99: append type 100: delim = scan_until( /(?=#{type})/ ) 101: if delim.nil? 102: append scan_until( /\Z/ ) 103: return 104: end 105: else 106: delim = scan( /\w+/ ) or return 107: end 108: start_group :constant, delim 109: start_group :punct, scan( /#{type}/ ) if type 110: @heredocs << [ float_right, type, delim ] 111: @allow_operator = true 112: else 113: case peek(1) 114: when /[\n\r]/ 115: unless @heredocs.empty? 116: scan_heredoc(*@heredocs.shift) 117: else 118: start_group :normal, scan( /\s+/ ) 119: end 120: @allow_operator = false 121: when /\s/ 122: start_group :normal, scan( /\s+/ ) 123: when "#" 124: start_group :comment, scan( /#[^\n\r]*/ ) 125: when /[A-Z]/ 126: start_group @selector ? :ident : :constant, scan( /\w+/ ) 127: @allow_operator = true 128: when /[a-z_]/ 129: word = scan( /\w+[?!]?/ ) 130: if !@selector && KEYWORDS.include?( word ) 131: start_group :keyword, word 132: @allow_operator = false 133: elsif 134: start_group :ident, word 135: @allow_operator = true 136: end 137: @selector = false 138: when /\d/ 139: start_group :number, 140: scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ ) 141: @allow_operator = true 142: when '"' 143: scan_delimited_region :punct, :string, "", true 144: @allow_operator = true 145: when '/' 146: if @allow_operator 147: start_group :punct, scan(%r{/}) 148: @allow_operator = false 149: else 150: scan_delimited_region :punct, :regex, "", true 151: @allow_operator = true 152: end 153: when "'" 154: scan_delimited_region :punct, :string, "", false 155: @allow_operator = true 156: when "." 157: dots = scan( /\.{1,3}/ ) 158: start_group :punct, dots 159: @selector = ( dots.length == 1 ) 160: when /[@]/ 161: start_group :attribute, scan( /@{1,2}\w*/ ) 162: @allow_operator = true 163: when /[$]/ 164: start_group :global, scan(/\$/) 165: start_group :global, scan( /\w+|./ ) if check(/./) 166: @allow_operator = true 167: when /[-!?*\/+=<>(\[\{}:;,&|%]/ 168: start_group :punct, scan(/./) 169: @allow_operator = false 170: when /[)\]]/ 171: start_group :punct, scan(/./) 172: @allow_operator = true 173: else 174: # all else just falls through this, to prevent 175: # infinite loops... 176: append getch 177: end 178: end 179: end 180: end 181: end