File: //usr/lib/ruby/site_ruby/1.8/puppet/pops/parser/lexer2.rb
# The Lexer is responsbile for turning source text into tokens.
# This version is a performance enhanced lexer (in comparison to the 3.x and earlier "future parser" lexer.
#
# Old returns tokens [:KEY, value, { locator = }
# Could return [[token], locator]
# or Token.new([token], locator) with the same API x[0] = token_symbol, x[1] = self, x[:key] = (:value, :file, :line, :pos) etc
require 'strscan'
require 'puppet/pops/parser/lexer_support'
require 'puppet/pops/parser/heredoc_support'
require 'puppet/pops/parser/interpolation_support'
require 'puppet/pops/parser/epp_support'
require 'puppet/pops/parser/slurp_support'
class Puppet::Pops::Parser::Lexer2
include Puppet::Pops::Parser::LexerSupport
include Puppet::Pops::Parser::HeredocSupport
include Puppet::Pops::Parser::InterpolationSupport
include Puppet::Pops::Parser::SlurpSupport
include Puppet::Pops::Parser::EppSupport
# ALl tokens have three slots, the token name (a Symbol), the token text (String), and a token text length.
# All operator and punctuation tokens reuse singleton arrays Tokens that require unique values create
# a unique array per token.
#
# PEFORMANCE NOTES:
# This construct reduces the amount of object that needs to be created for operators and punctuation.
# The length is pre-calculated for all singleton tokens. The length is used both to signal the length of
# the token, and to advance the scanner position (without having to advance it with a scan(regexp)).
#
TOKEN_LBRACK = [:LBRACK, '['.freeze, 1].freeze
TOKEN_LISTSTART = [:LISTSTART, '['.freeze, 1].freeze
TOKEN_RBRACK = [:RBRACK, ']'.freeze, 1].freeze
TOKEN_LBRACE = [:LBRACE, '{'.freeze, 1].freeze
TOKEN_RBRACE = [:RBRACE, '}'.freeze, 1].freeze
TOKEN_SELBRACE = [:SELBRACE, '{'.freeze, 1].freeze
TOKEN_LPAREN = [:LPAREN, '('.freeze, 1].freeze
TOKEN_RPAREN = [:RPAREN, ')'.freeze, 1].freeze
TOKEN_EQUALS = [:EQUALS, '='.freeze, 1].freeze
TOKEN_APPENDS = [:APPENDS, '+='.freeze, 2].freeze
TOKEN_DELETES = [:DELETES, '-='.freeze, 2].freeze
TOKEN_ISEQUAL = [:ISEQUAL, '=='.freeze, 2].freeze
TOKEN_NOTEQUAL = [:NOTEQUAL, '!='.freeze, 2].freeze
TOKEN_MATCH = [:MATCH, '=~'.freeze, 2].freeze
TOKEN_NOMATCH = [:NOMATCH, '!~'.freeze, 2].freeze
TOKEN_GREATEREQUAL = [:GREATEREQUAL, '>='.freeze, 2].freeze
TOKEN_GREATERTHAN = [:GREATERTHAN, '>'.freeze, 1].freeze
TOKEN_LESSEQUAL = [:LESSEQUAL, '<='.freeze, 2].freeze
TOKEN_LESSTHAN = [:LESSTHAN, '<'.freeze, 1].freeze
TOKEN_FARROW = [:FARROW, '=>'.freeze, 2].freeze
TOKEN_PARROW = [:PARROW, '+>'.freeze, 2].freeze
TOKEN_LSHIFT = [:LSHIFT, '<<'.freeze, 2].freeze
TOKEN_LLCOLLECT = [:LLCOLLECT, '<<|'.freeze, 3].freeze
TOKEN_LCOLLECT = [:LCOLLECT, '<|'.freeze, 2].freeze
TOKEN_RSHIFT = [:RSHIFT, '>>'.freeze, 2].freeze
TOKEN_RRCOLLECT = [:RRCOLLECT, '|>>'.freeze, 3].freeze
TOKEN_RCOLLECT = [:RCOLLECT, '|>'.freeze, 2].freeze
TOKEN_PLUS = [:PLUS, '+'.freeze, 1].freeze
TOKEN_MINUS = [:MINUS, '-'.freeze, 1].freeze
TOKEN_DIV = [:DIV, '/'.freeze, 1].freeze
TOKEN_TIMES = [:TIMES, '*'.freeze, 1].freeze
TOKEN_MODULO = [:MODULO, '%'.freeze, 1].freeze
TOKEN_NOT = [:NOT, '!'.freeze, 1].freeze
TOKEN_DOT = [:DOT, '.'.freeze, 1].freeze
TOKEN_PIPE = [:PIPE, '|'.freeze, 1].freeze
TOKEN_AT = [:AT , '@'.freeze, 1].freeze
TOKEN_ATAT = [:ATAT , '@@'.freeze, 2].freeze
TOKEN_COLON = [:COLON, ':'.freeze, 1].freeze
TOKEN_COMMA = [:COMMA, ','.freeze, 1].freeze
TOKEN_SEMIC = [:SEMIC, ';'.freeze, 1].freeze
TOKEN_QMARK = [:QMARK, '?'.freeze, 1].freeze
TOKEN_TILDE = [:TILDE, '~'.freeze, 1].freeze # lexed but not an operator in Puppet
TOKEN_REGEXP = [:REGEXP, nil, 0].freeze
TOKEN_IN_EDGE = [:IN_EDGE, '->'.freeze, 2].freeze
TOKEN_IN_EDGE_SUB = [:IN_EDGE_SUB, '~>'.freeze, 2].freeze
TOKEN_OUT_EDGE = [:OUT_EDGE, '<-'.freeze, 2].freeze
TOKEN_OUT_EDGE_SUB = [:OUT_EDGE_SUB, '<~'.freeze, 2].freeze
# Tokens that are always unique to what has been lexed
TOKEN_STRING = [:STRING, nil, 0].freeze
TOKEN_WORD = [:WORD, nil, 0].freeze
TOKEN_DQPRE = [:DQPRE, nil, 0].freeze
TOKEN_DQMID = [:DQPRE, nil, 0].freeze
TOKEN_DQPOS = [:DQPRE, nil, 0].freeze
TOKEN_NUMBER = [:NUMBER, nil, 0].freeze
TOKEN_VARIABLE = [:VARIABLE, nil, 1].freeze
TOKEN_VARIABLE_EMPTY = [:VARIABLE, ''.freeze, 1].freeze
# HEREDOC has syntax as an argument.
TOKEN_HEREDOC = [:HEREDOC, nil, 0].freeze
# EPP_START is currently a marker token, may later get syntax
TOKEN_EPPSTART = [:EPP_START, nil, 0].freeze
TOKEN_EPPEND = [:EPP_END, '%>', 2].freeze
TOKEN_EPPEND_TRIM = [:EPP_END_TRIM, '-%>', 3].freeze
# This is used for unrecognized tokens, will always be a single character. This particular instance
# is not used, but is kept here for documentation purposes.
TOKEN_OTHER = [:OTHER, nil, 0]
# Keywords are all singleton tokens with pre calculated lengths.
# Booleans are pre-calculated (rather than evaluating the strings "false" "true" repeatedly.
#
KEYWORDS = {
"case" => [:CASE, 'case', 4],
"class" => [:CLASS, 'class', 5],
"default" => [:DEFAULT, 'default', 7],
"define" => [:DEFINE, 'define', 6],
"if" => [:IF, 'if', 2],
"elsif" => [:ELSIF, 'elsif', 5],
"else" => [:ELSE, 'else', 4],
"inherits" => [:INHERITS, 'inherits', 8],
"node" => [:NODE, 'node', 4],
"and" => [:AND, 'and', 3],
"or" => [:OR, 'or', 2],
"undef" => [:UNDEF, 'undef', 5],
"false" => [:BOOLEAN, false, 5],
"true" => [:BOOLEAN, true, 4],
"in" => [:IN, 'in', 2],
"unless" => [:UNLESS, 'unless', 6],
"function" => [:FUNCTION, 'function', 8],
"type" => [:TYPE, 'type', 4],
"attr" => [:ATTR, 'attr', 4],
"private" => [:PRIVATE, 'private', 7],
# The following tokens exist in reserved form. Later they will be made
# live subject to a feature switch.
"application" => [:APPLICATION_R, 'application', 11],
"consumes" => [:CONSUMES_R, 'consumes', 8],
"produces" => [:PRODUCES_R, 'produces', 8],
}
KEYWORDS.each {|k,v| v[1].freeze; v.freeze }
KEYWORDS.freeze
# Reverse lookup of keyword name to string
KEYWORD_NAMES = {}
KEYWORDS.each {|k, v| KEYWORD_NAMES[v[0]] = k }
KEYWORD_NAMES.freeze
PATTERN_WS = %r{[[:blank:]\r]+}
# The single line comment includes the line ending.
PATTERN_COMMENT = %r{#.*\r?}
PATTERN_MLCOMMENT = %r{/\*(.*?)\*/}m
PATTERN_REGEX = %r{/[^/\n]*/}
PATTERN_REGEX_END = %r{/}
PATTERN_REGEX_A = %r{\A/} # for replacement to ""
PATTERN_REGEX_Z = %r{/\Z} # for replacement to ""
PATTERN_REGEX_ESC = %r{\\/} # for replacement to "/"
# The 3x patterns:
# PATTERN_CLASSREF = %r{((::){0,1}[A-Z][-\w]*)+}
# PATTERN_NAME = %r{((::)?[a-z0-9][-\w]*)(::[a-z0-9][-\w]*)*}
# The NAME and CLASSREF in 4x are strict. Each segment must start with
# a letter a-z and may not contain dashes (\w includes letters, digits and _).
#
PATTERN_CLASSREF = %r{((::){0,1}[A-Z][\w]*)+}
PATTERN_NAME = %r{^((::)?[a-z][\w]*)(::[a-z][\w]*)*$}
PATTERN_BARE_WORD = %r{((?:::){0,1}(?:[a-z_](?:[\w-]*[\w])?))+}
PATTERN_DOLLAR_VAR = %r{\$(::)?(\w+::)*\w+}
PATTERN_NUMBER = %r{\b(?:0[xX][0-9A-Fa-f]+|0?\d+(?:\.\d+)?(?:[eE]-?\d+)?)\b}
# PERFORMANCE NOTE:
# Comparison against a frozen string is faster (than unfrozen).
#
STRING_BSLASH_BSLASH = '\\'.freeze
attr_reader :locator
def initialize()
end
# Clears the lexer state (it is not required to call this as it will be garbage collected
# and the next lex call (lex_string, lex_file) will reset the internal state.
#
def clear()
# not really needed, but if someone wants to ensure garbage is collected as early as possible
@scanner = nil
@locator = nil
@lexing_context = nil
end
# Convenience method, and for compatibility with older lexer. Use the lex_string instead which allows
# passing the path to use without first having to call file= (which reads the file if it exists).
# (Bad form to use overloading of assignment operator for something that is not really an assignment. Also,
# overloading of = does not allow passing more than one argument).
#
def string=(string)
lex_string(string, '')
end
def lex_string(string, path='')
initvars
@scanner = StringScanner.new(string)
@locator = Puppet::Pops::Parser::Locator.locator(string, path)
end
# Lexes an unquoted string.
# @param string [String] the string to lex
# @param locator [Puppet::Pops::Parser::Locator] the locator to use (a default is used if nil is given)
# @param escapes [Array<String>] array of character strings representing the escape sequences to transform
# @param interpolate [Boolean] whether interpolation of expressions should be made or not.
#
def lex_unquoted_string(string, locator, escapes, interpolate)
initvars
@scanner = StringScanner.new(string)
@locator = locator || Puppet::Pops::Parser::Locator.locator(string, '')
@lexing_context[:escapes] = escapes || UQ_ESCAPES
@lexing_context[:uq_slurp_pattern] = interpolate ? (escapes.include?('$') ? SLURP_UQ_PATTERN : SLURP_UQNE_PATTERN) : SLURP_ALL_PATTERN
end
# Convenience method, and for compatibility with older lexer. Use the lex_file instead.
# (Bad form to use overloading of assignment operator for something that is not really an assignment).
#
def file=(file)
lex_file(file)
end
# TODO: This method should not be used, callers should get the locator since it is most likely required to
# compute line, position etc given offsets.
#
def file
@locator ? @locator.file : nil
end
# Initializes lexing of the content of the given file. An empty string is used if the file does not exist.
#
def lex_file(file)
initvars
contents = Puppet::FileSystem.exist?(file) ? Puppet::FileSystem.read(file) : ""
@scanner = StringScanner.new(contents.freeze)
@locator = Puppet::Pops::Parser::Locator.locator(contents, file)
end
def initvars
@token_queue = []
# NOTE: additional keys are used; :escapes, :uq_slurp_pattern, :newline_jump, :epp_*
@lexing_context = {
:brace_count => 0,
:after => nil,
}
end
# Scans all of the content and returns it in an array
# Note that the terminating [false, false] token is included in the result.
#
def fullscan
result = []
scan {|token, value| result.push([token, value]) }
result
end
# A block must be passed to scan. It will be called with two arguments, a symbol for the token,
# and an instance of LexerSupport::TokenValue
# PERFORMANCE NOTE: The TokenValue is designed to reduce the amount of garbage / temporary data
# and to only convert the lexer's internal tokens on demand. It is slightly more costly to create an
# instance of a class defined in Ruby than an Array or Hash, but the gain is much bigger since transformation
# logic is avoided for many of its members (most are never used (e.g. line/pos information which is only of
# value in general for error messages, and for some expressions (which the lexer does not know about).
#
def scan
# PERFORMANCE note: it is faster to access local variables than instance variables.
# This makes a small but notable difference since instance member access is avoided for
# every token in the lexed content.
#
scn = @scanner
ctx = @lexing_context
queue = @token_queue
lex_error_without_pos(Puppet::Pops::Issues::NO_INPUT_TO_LEXER) unless scn
scn.skip(PATTERN_WS)
# This is the lexer's main loop
until queue.empty? && scn.eos? do
if token = queue.shift || lex_token
ctx[:after] = token[0]
yield token
end
end
# Signals end of input
yield [false, false]
end
# This lexes one token at the current position of the scanner.
# PERFORMANCE NOTE: Any change to this logic should be performance measured.
#
def lex_token
# Using three char look ahead (may be faster to do 2 char look ahead since only 2 tokens require a third
scn = @scanner
ctx = @lexing_context
before = @scanner.pos
# A look ahead of 3 characters is used since the longest operator ambiguity is resolved at that point.
# PERFORMANCE NOTE: It is faster to peek once and use three separate variables for lookahead 0, 1 and 2.
#
la = scn.peek(3)
return nil if la.empty?
# Ruby 1.8.7 requires using offset and length (or integers are returned.
# PERFORMANCE NOTE.
# It is slightly faster to use these local variables than accessing la[0], la[1] etc. in ruby 1.9.3
# But not big enough to warrant two completely different implementations.
#
la0 = la[0,1]
la1 = la[1,1]
la2 = la[2,1]
# PERFORMANCE NOTE:
# A case when, where all the cases are literal values is the fastest way to map from data to code.
# It is much faster than using a hash with lambdas, hash with symbol used to then invoke send etc.
# This case statement is evaluated for most character positions in puppet source, and great care must
# be taken to not introduce performance regressions.
#
case la0
when '.'
emit(TOKEN_DOT, before)
when ','
emit(TOKEN_COMMA, before)
when '['
if (before == 0 || scn.string[locator.char_offset(before)-1,1] =~ /[[:blank:]\r\n]+/)
emit(TOKEN_LISTSTART, before)
else
emit(TOKEN_LBRACK, before)
end
when ']'
emit(TOKEN_RBRACK, before)
when '('
emit(TOKEN_LPAREN, before)
when ')'
emit(TOKEN_RPAREN, before)
when ';'
emit(TOKEN_SEMIC, before)
when '?'
emit(TOKEN_QMARK, before)
when '*'
emit(TOKEN_TIMES, before)
when '%'
if la1 == '>' && ctx[:epp_mode]
scn.pos += 2
if ctx[:epp_mode] == :expr
enqueue_completed(TOKEN_EPPEND, before)
end
ctx[:epp_mode] = :text
interpolate_epp
else
emit(TOKEN_MODULO, before)
end
when '{'
# The lexer needs to help the parser since the technology used cannot deal with
# lookahead of same token with different precedence. This is solved by making left brace
# after ? into a separate token.
#
ctx[:brace_count] += 1
emit(if ctx[:after] == :QMARK
TOKEN_SELBRACE
else
TOKEN_LBRACE
end, before)
when '}'
ctx[:brace_count] -= 1
emit(TOKEN_RBRACE, before)
# TOKENS @, @@, @(
when '@'
case la1
when '@'
emit(TOKEN_ATAT, before) # TODO; Check if this is good for the grammar
when '('
heredoc
else
emit(TOKEN_AT, before)
end
# TOKENS |, |>, |>>
when '|'
emit(case la1
when '>'
la2 == '>' ? TOKEN_RRCOLLECT : TOKEN_RCOLLECT
else
TOKEN_PIPE
end, before)
# TOKENS =, =>, ==, =~
when '='
emit(case la1
when '='
TOKEN_ISEQUAL
when '>'
TOKEN_FARROW
when '~'
TOKEN_MATCH
else
TOKEN_EQUALS
end, before)
# TOKENS '+', '+=', and '+>'
when '+'
emit(case la1
when '='
TOKEN_APPENDS
when '>'
TOKEN_PARROW
else
TOKEN_PLUS
end, before)
# TOKENS '-', '->', and epp '-%>' (end of interpolation with trim)
when '-'
if ctx[:epp_mode] && la1 == '%' && la2 == '>'
scn.pos += 3
if ctx[:epp_mode] == :expr
enqueue_completed(TOKEN_EPPEND_TRIM, before)
end
interpolate_epp(:with_trim)
else
emit(case la1
when '>'
TOKEN_IN_EDGE
when '='
TOKEN_DELETES
else
TOKEN_MINUS
end, before)
end
# TOKENS !, !=, !~
when '!'
emit(case la1
when '='
TOKEN_NOTEQUAL
when '~'
TOKEN_NOMATCH
else
TOKEN_NOT
end, before)
# TOKENS ~>, ~
when '~'
emit(la1 == '>' ? TOKEN_IN_EDGE_SUB : TOKEN_TILDE, before)
when '#'
scn.skip(PATTERN_COMMENT)
nil
# TOKENS '/', '/*' and '/ regexp /'
when '/'
case la1
when '*'
scn.skip(PATTERN_MLCOMMENT)
nil
else
# regexp position is a regexp, else a div
if regexp_acceptable? && value = scn.scan(PATTERN_REGEX)
# Ensure an escaped / was not matched
while value[-2..-2] == STRING_BSLASH_BSLASH # i.e. \\
value += scn.scan_until(PATTERN_REGEX_END)
end
regex = value.sub(PATTERN_REGEX_A, '').sub(PATTERN_REGEX_Z, '').gsub(PATTERN_REGEX_ESC, '/')
emit_completed([:REGEX, Regexp.new(regex), scn.pos-before], before)
else
emit(TOKEN_DIV, before)
end
end
# TOKENS <, <=, <|, <<|, <<, <-, <~
when '<'
emit(case la1
when '<'
if la2 == '|'
TOKEN_LLCOLLECT
else
TOKEN_LSHIFT
end
when '='
TOKEN_LESSEQUAL
when '|'
TOKEN_LCOLLECT
when '-'
TOKEN_OUT_EDGE
when '~'
TOKEN_OUT_EDGE_SUB
else
TOKEN_LESSTHAN
end, before)
# TOKENS >, >=, >>
when '>'
emit(case la1
when '>'
TOKEN_RSHIFT
when '='
TOKEN_GREATEREQUAL
else
TOKEN_GREATERTHAN
end, before)
# TOKENS :, ::CLASSREF, ::NAME
when ':'
if la1 == ':'
before = scn.pos
# PERFORMANCE NOTE: This could potentially be speeded up by using a case/when listing all
# upper case letters. Alternatively, the 'A', and 'Z' comparisons may be faster if they are
# frozen.
#
if la2 >= 'A' && la2 <= 'Z'
# CLASSREF or error
value = scn.scan(PATTERN_CLASSREF)
if value
after = scn.pos
emit_completed([:CLASSREF, value.freeze, after-before], before)
else
# move to faulty position ('::<uc-letter>' was ok)
scn.pos = scn.pos + 3
lex_error(Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_CLASS_REFERENCE)
end
else
value = scn.scan(PATTERN_BARE_WORD)
if value
if value =~ PATTERN_NAME
emit_completed([:NAME, value.freeze, scn.pos-before], before)
else
emit_completed([:WORD, value.freeze, scn.pos - before], before)
end
else
# move to faulty position ('::' was ok)
scn.pos = scn.pos + 2
lex_error(Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_NAME)
end
end
else
emit(TOKEN_COLON, before)
end
when '$'
if value = scn.scan(PATTERN_DOLLAR_VAR)
emit_completed([:VARIABLE, value[1..-1].freeze, scn.pos - before], before)
else
# consume the $ and let higher layer complain about the error instead of getting a syntax error
emit(TOKEN_VARIABLE_EMPTY, before)
end
when '"'
# Recursive string interpolation, 'interpolate' either returns a STRING token, or
# a DQPRE with the rest of the string's tokens placed in the @token_queue
interpolate_dq
when "'"
emit_completed([:STRING, slurp_sqstring.freeze, scn.pos - before], before)
when '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
value = scn.scan(PATTERN_NUMBER)
if value
length = scn.pos - before
assert_numeric(value, length)
emit_completed([:NUMBER, value.freeze, length], before)
else
# move to faulty position ([0-9] was ok)
scn.pos = scn.pos + 1
lex_error(Puppet::Pops::Issues::ILLEGAL_NUMBER)
end
when 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '_'
value = scn.scan(PATTERN_BARE_WORD)
if value && value =~ PATTERN_NAME
emit_completed(KEYWORDS[value] || [:NAME, value.freeze, scn.pos - before], before)
elsif value
emit_completed([:WORD, value.freeze, scn.pos - before], before)
else
# move to faulty position ([a-z_] was ok)
scn.pos = scn.pos + 1
fully_qualified = scn.match?(/::/)
if fully_qualified
lex_error(Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_NAME)
else
lex_error(Puppet::Pops::Issues::ILLEGAL_NAME_OR_BARE_WORD)
end
end
when 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
value = scn.scan(PATTERN_CLASSREF)
if value
emit_completed([:CLASSREF, value.freeze, scn.pos - before], before)
else
# move to faulty position ([A-Z] was ok)
scn.pos = scn.pos + 1
lex_error(Puppet::Pops::Issues::ILLEGAL_CLASS_REFERENCE)
end
when "\n"
# If heredoc_cont is in effect there are heredoc text lines to skip over
# otherwise just skip the newline.
#
if ctx[:newline_jump]
scn.pos = ctx[:newline_jump]
ctx[:newline_jump] = nil
else
scn.pos += 1
end
return nil
when ' ', "\t", "\r"
scn.skip(PATTERN_WS)
return nil
else
# In case of unicode spaces of various kinds that are captured by a regexp, but not by the
# simpler case expression above (not worth handling those special cases with better performance).
if scn.skip(PATTERN_WS)
nil
else
# "unrecognized char"
emit([:OTHER, la0, 1], before)
end
end
end
# Emits (produces) a token [:tokensymbol, TokenValue] and moves the scanner's position past the token
#
def emit(token, byte_offset)
@scanner.pos = byte_offset + token[2]
[token[0], TokenValue.new(token, byte_offset, @locator)]
end
# Emits the completed token on the form [:tokensymbol, TokenValue. This method does not alter
# the scanner's position.
#
def emit_completed(token, byte_offset)
[token[0], TokenValue.new(token, byte_offset, @locator)]
end
# Enqueues a completed token at the given offset
def enqueue_completed(token, byte_offset)
@token_queue << emit_completed(token, byte_offset)
end
# Allows subprocessors for heredoc etc to enqueue tokens that are tokenized by a different lexer instance
#
def enqueue(emitted_token)
@token_queue << emitted_token
end
# Answers after which tokens it is acceptable to lex a regular expression.
# PERFORMANCE NOTE:
# It may be beneficial to turn this into a hash with default value of true for missing entries.
# A case expression with literal values will however create a hash internally. Since a reference is
# always needed to the hash, this access is almost as costly as a method call.
#
def regexp_acceptable?
case @lexing_context[:after]
# Ends of (potential) R-value generating expressions
when :RPAREN, :RBRACK, :RRCOLLECT, :RCOLLECT
false
# End of (potential) R-value - but must be allowed because of case expressions
# Called out here to not be mistaken for a bug.
when :RBRACE
true
# Operands (that can be followed by DIV (even if illegal in grammar)
when :NAME, :CLASSREF, :NUMBER, :STRING, :BOOLEAN, :DQPRE, :DQMID, :DQPOST, :HEREDOC, :REGEX, :VARIABLE, :WORD
false
else
true
end
end
end