#!/usr/bin/ruby # encoding: utf-8 =begin LICENSE [The "BSD licence"] Copyright (c) 2009-2010 Kyle Yetter All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =end module ANTLR3 =begin rdoc ANTLR3::DFA DFA is a class that implements a finite state machine that chooses between alternatives in a rule based upon lookahead symbols from an input stream. Deterministic Finite Automata (DFA) are finite state machines that are capable of recognizing regular languages. For more background on the subject, check out http://en.wikipedia.org/wiki/Deterministic_finite-state_machine or check out general ANTLR documentation at http://www.antlr.org ANTLR implements most of its decision logic directly using code branching structures in methods. However, for certain types of decisions, ANTLR will generate a special DFA class definition to implement a decision. Conceptually, these state machines are defined by a number of states, each state represented by an integer indexed upward from zero. State number +0+ is the start state of the machine; every prediction will begin in state +0+. At each step, the machine examines the next symbol on the input stream, checks the value against the transition parameters associated with the current state, and either moves to a new state number to repeat the process or decides that the machine cannot transition any further. If the machine cannot transition any further and the current state is defined as an accept state, an alternative has been chosen successfully and the prediction procedure ends. If the current state is not an accept state, the prediction has failed and there is no viable alternative. In generated code, ANTLR defines DFA states using seven parameters, each defined as a member of seven seperate array constants -- +MIN+, +MAX+, +EOT+, +EOF+, +SPECIAL+, +ACCEPT+, and +TRANSITION+. The parameters that characterize state +s+ are defined by the value of these lists at index +s+. MIN[s]:: The smallest value of the next input symbol that has a transition for state +s+ MAX[s]:: The largest value of the next input symbol that has a transition for state +s+ TRANSITION[s]:: A list that defines the next state number based upon the current input symbol. EOT[s]:: If positive, it specifies a state transition in situations where a non-matching input symbol does not indicate failure. SPECIAL[s]:: If positive, it indicates that the prediction algorithm must defer to a special code block to determine the next state. The value is used by the special state code to determine the next state. ACCEPT[s]:: If positive and there are no possible state transitions, this is the alternative number that has been predicted EOF[s]:: If positive and the input stream has been exhausted, this is the alternative number that has been predicted. For more information on the prediction algorithm, check out the #predict method below. =end class DFA include Constants include Error attr_reader :recognizer, :decision_number, :eot, :eof, :min, :max, :accept, :special, :transition, :special_block class << self attr_reader :decision, :eot, :eof, :min, :max, :accept, :special, :transition def unpack( *data ) data.empty? and return [].freeze n = data.length / 2 size = 0 n.times { |i| size += data[ 2*i ] } if size > 1024 values = Hash.new( 0 ) data.each_slice( 2 ) do |count, value| values[ value ] += count end default = values.keys.max_by { |v| values[ v ] } unpacked = Hash.new( default ) position = 0 data.each_slice( 2 ) do |count, value| unless value == default count.times { |i| unpacked[ position + i ] = value } end position += count end else unpacked = [] data.each_slice( 2 ) do |count, value| unpacked.fill( value, unpacked.length, count ) end end return unpacked end end def initialize( recognizer, decision_number = nil, eot = nil, eof = nil, min = nil, max = nil, accept = nil, special = nil, transition = nil, &special_block ) @recognizer = recognizer @decision_number = decision_number || self.class.decision @eot = eot || self.class::EOT #.eot @eof = eof || self.class::EOF #.eof @min = min || self.class::MIN #.min @max = max || self.class::MAX #.max @accept = accept || self.class::ACCEPT #.accept @special = special || self.class::SPECIAL #.special @transition = transition || self.class::TRANSITION #.transition @special_block = special_block rescue NameError => e raise unless e.message =~ /uninitialized constant/ constant = e.name message = Util.tidy( <<-END ) | No #{ constant } information provided. | DFA cannot be instantiated without providing state array information. | When DFAs are generated by ANTLR, this information should already be | provided in the DFA subclass constants. END end if RUBY_VERSION =~ /^1\.9/ def predict( input ) mark = input.mark state = 0 50000.times do special_state = @special[ state ] if special_state >= 0 state = @special_block.call( special_state ) if state == -1 no_viable_alternative( state, input ) return 0 end input.consume next end @accept[ state ] >= 1 and return @accept[ state ] # look for a normal char transition c = input.peek.ord # the @min and @max arrays contain the bounds of the character (or token type) # ranges for the transition decisions if c.between?( @min[ state ], @max[ state ] ) # c - @min[state] is the position of the character within the range # so for a range like ?a..?z, a match of ?a would be 0, # ?c would be 2, and ?z would be 25 next_state = @transition[ state ][ c - @min[ state ] ] if next_state < 0 if @eot[ state ] >= 0 state = @eot[ state ] input.consume next end no_viable_alternative( state, input ) return 0 end state = next_state input.consume next end if @eot[ state ] >= 0 state = @eot[ state ] input.consume() next end ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ] no_viable_alternative( state, input ) return 0 end ANTLR3.bug!( Util.tidy( <<-END ) ) | DFA BANG! | The prediction loop has exceeded a maximum limit of 50000 iterations | ---- | decision: #@decision_number | description: #{ description } END ensure input.rewind( mark ) end else def predict( input ) mark = input.mark state = 0 50000.times do special_state = @special[ state ] if special_state >= 0 state = @special_block.call( special_state ) if state == -1 no_viable_alternative( state, input ) return 0 end input.consume next end @accept[ state ] >= 1 and return @accept[ state ] # look for a normal char transition c = input.peek # the @min and @max arrays contain the bounds of the character (or token type) # ranges for the transition decisions if c.between?( @min[ state ], @max[ state ] ) # c - @min[state] is the position of the character within the range # so for a range like ?a..?z, a match of ?a would be 0, # ?c would be 2, and ?z would be 25 next_state = @transition[ state ][ c - @min[ state ] ] if next_state < 0 if @eot[ state ] >= 0 state = @eot[ state ] input.consume next end no_viable_alternative( state, input ) return 0 end state = next_state input.consume() next end if @eot[ state ] >= 0 state = @eot[ state ] input.consume() next end ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ] no_viable_alternative( state, input ) return 0 end ANTLR3.bug!( Util.tidy( <<-END ) ) | DFA BANG! | The prediction loop has exceeded a maximum limit of 50000 iterations | ---- | decision: #@decision_number | description: #{ description } END ensure input.rewind( mark ) end end def no_viable_alternative( state, input ) raise( BacktrackingFailed ) if @recognizer.state.backtracking > 0 except = NoViableAlternative.new( description, @decision_number, state, input ) error( except ) raise( except ) end def error( except ) # overridable debugging hook end def special_state_transition( state, input ) return -1 end def description return "n/a" end end end