def tokenize(ss)
output = []
until ss.eos?
ss.skip(WHITESPACE_OR_NOTHING)
break if ss.eos?
start_pos = ss.pos
peeked = ss.peek_byte
if (special = SPECIAL_TABLE[peeked])
ss.scan_byte
if special == DOT && ss.peek_byte == DOT_ORD
ss.scan_byte
output << DOTDOT
elsif special == DASH
if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte]
ss.pos -= 1
output << [:number, ss.scan(NUMBER_LITERAL)]
else
output << special
end
else
output << special
end
elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
output << found
ss.scan_byte
else
raise_syntax_error(start_pos, ss)
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
output << found
ss.scan_byte
else
output << SINGLE_COMPARISON_TOKENS[peeked]
end
else
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]
if type && (t = ss.scan(pattern))
output << if type == :id && t == "contains" && output.last&.first != :dot
COMPARISON_CONTAINS
else
[type, t]
end
else
raise_syntax_error(start_pos, ss)
end
end
end
output << EOS
rescue ::ArgumentError => e
if e.message == "invalid byte sequence in #{ss.string.encoding}"
raise SyntaxError, "Invalid byte sequence in #{ss.string.encoding}"
else
raise
end
end