#!/usr/bin/ruby
# encoding: utf-8
=begin LICENSE
[The "BSD licence"]
Copyright (c) 2009-2010 Kyle Yetter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=end
module ANTLR3
=begin rdoc ANTLR3::DFA
DFA is a class that implements a finite state machine that chooses between
alternatives in a rule based upon lookahead symbols from an input stream.
Deterministic Finite Automata (DFA) are finite state machines that are capable
of recognizing regular languages. For more background on the subject,
check out http://en.wikipedia.org/wiki/Deterministic_finite-state_machine or
check out general ANTLR documentation at http://www.antlr.org
ANTLR implements most of its decision logic directly using code branching
structures in methods. However, for certain types of decisions, ANTLR will
generate a special DFA class definition to implement a decision.
Conceptually, these state machines are defined by a number of states, each state
represented by an integer indexed upward from zero. State number +0+ is the
start state of the machine; every prediction will begin in state +0+. At
each step, the machine examines the next symbol on the input stream, checks the
value against the transition parameters associated with the current state, and
either moves to a new state number to repeat the process or decides that the
machine cannot transition any further. If the machine cannot transition any
further and the current state is defined as an accept state, an
alternative has been chosen successfully and the prediction procedure ends. If
the current state is not an accept state, the prediction has failed and
there is no viable alternative.
In generated code, ANTLR defines DFA states using seven parameters, each defined
as a member of seven seperate array constants -- +MIN+, +MAX+, +EOT+, +EOF+,
+SPECIAL+, +ACCEPT+, and +TRANSITION+. The parameters that characterize state
+s+ are defined by the value of these lists at index +s+.
MIN[s]::
The smallest value of the next input symbol that has
a transition for state +s+
MAX[s]::
The largest value of the next input symbol that has
a transition for state +s+
TRANSITION[s]::
A list that defines the next state number based upon
the current input symbol.
EOT[s]::
If positive, it specifies a state transition in
situations where a non-matching input symbol does
not indicate failure.
SPECIAL[s]::
If positive, it indicates that the prediction
algorithm must defer to a special code block
to determine the next state. The value is used
by the special state code to determine the next
state.
ACCEPT[s]::
If positive and there are no possible state
transitions, this is the alternative number
that has been predicted
EOF[s]::
If positive and the input stream has been exhausted,
this is the alternative number that has been predicted.
For more information on the prediction algorithm, check out the #predict method
below.
=end
class DFA
include Constants
include Error
attr_reader :recognizer, :decision_number, :eot, :eof, :min, :max,
:accept, :special, :transition, :special_block
class << self
attr_reader :decision, :eot, :eof, :min, :max,
:accept, :special, :transition
def unpack( *data )
data.empty? and return [].freeze
n = data.length / 2
size = 0
n.times { |i| size += data[ 2*i ] }
if size > 1024
values = Hash.new( 0 )
data.each_slice( 2 ) do |count, value|
values[ value ] += count
end
default = values.keys.max_by { |v| values[ v ] }
unpacked = Hash.new( default )
position = 0
data.each_slice( 2 ) do |count, value|
unless value == default
count.times { |i| unpacked[ position + i ] = value }
end
position += count
end
else
unpacked = []
data.each_slice( 2 ) do |count, value|
unpacked.fill( value, unpacked.length, count )
end
end
return unpacked
end
end
def initialize( recognizer, decision_number = nil,
eot = nil, eof = nil, min = nil, max = nil,
accept = nil, special = nil,
transition = nil, &special_block )
@recognizer = recognizer
@decision_number = decision_number || self.class.decision
@eot = eot || self.class::EOT #.eot
@eof = eof || self.class::EOF #.eof
@min = min || self.class::MIN #.min
@max = max || self.class::MAX #.max
@accept = accept || self.class::ACCEPT #.accept
@special = special || self.class::SPECIAL #.special
@transition = transition || self.class::TRANSITION #.transition
@special_block = special_block
rescue NameError => e
raise unless e.message =~ /uninitialized constant/
constant = e.name
message = Util.tidy( <<-END )
| No #{ constant } information provided.
| DFA cannot be instantiated without providing state array information.
| When DFAs are generated by ANTLR, this information should already be
| provided in the DFA subclass constants.
END
end
if RUBY_VERSION =~ /^1\.9/
def predict( input )
mark = input.mark
state = 0
50000.times do
special_state = @special[ state ]
if special_state >= 0
state = @special_block.call( special_state )
if state == -1
no_viable_alternative( state, input )
return 0
end
input.consume
next
end
@accept[ state ] >= 1 and return @accept[ state ]
# look for a normal char transition
c = input.peek.ord
# the @min and @max arrays contain the bounds of the character (or token type)
# ranges for the transition decisions
if c.between?( @min[ state ], @max[ state ] )
# c - @min[state] is the position of the character within the range
# so for a range like ?a..?z, a match of ?a would be 0,
# ?c would be 2, and ?z would be 25
next_state = @transition[ state ][ c - @min[ state ] ]
if next_state < 0
if @eot[ state ] >= 0
state = @eot[ state ]
input.consume
next
end
no_viable_alternative( state, input )
return 0
end
state = next_state
input.consume
next
end
if @eot[ state ] >= 0
state = @eot[ state ]
input.consume()
next
end
( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ]
no_viable_alternative( state, input )
return 0
end
ANTLR3.bug!( Util.tidy( <<-END ) )
| DFA BANG!
| The prediction loop has exceeded a maximum limit of 50000 iterations
| ----
| decision: #@decision_number
| description: #{ description }
END
ensure
input.rewind( mark )
end
else
def predict( input )
mark = input.mark
state = 0
50000.times do
special_state = @special[ state ]
if special_state >= 0
state = @special_block.call( special_state )
if state == -1
no_viable_alternative( state, input )
return 0
end
input.consume
next
end
@accept[ state ] >= 1 and return @accept[ state ]
# look for a normal char transition
c = input.peek
# the @min and @max arrays contain the bounds of the character (or token type)
# ranges for the transition decisions
if c.between?( @min[ state ], @max[ state ] )
# c - @min[state] is the position of the character within the range
# so for a range like ?a..?z, a match of ?a would be 0,
# ?c would be 2, and ?z would be 25
next_state = @transition[ state ][ c - @min[ state ] ]
if next_state < 0
if @eot[ state ] >= 0
state = @eot[ state ]
input.consume
next
end
no_viable_alternative( state, input )
return 0
end
state = next_state
input.consume()
next
end
if @eot[ state ] >= 0
state = @eot[ state ]
input.consume()
next
end
( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ]
no_viable_alternative( state, input )
return 0
end
ANTLR3.bug!( Util.tidy( <<-END ) )
| DFA BANG!
| The prediction loop has exceeded a maximum limit of 50000 iterations
| ----
| decision: #@decision_number
| description: #{ description }
END
ensure
input.rewind( mark )
end
end
def no_viable_alternative( state, input )
raise( BacktrackingFailed ) if @recognizer.state.backtracking > 0
except = NoViableAlternative.new( description, @decision_number, state, input )
error( except )
raise( except )
end
def error( except )
# overridable debugging hook
end
def special_state_transition( state, input )
return -1
end
def description
return "n/a"
end
end
end