• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1package ANTLR::Runtime::Lexer;
2
3use English qw( -no_match_vars );
4use Readonly;
5use Carp;
6use Switch;
7
8use ANTLR::Runtime::Token;
9use ANTLR::Runtime::CommonToken;
10use ANTLR::Runtime::CharStream;
11use ANTLR::Runtime::MismatchedTokenException;
12
13use Moose;
14
15extends 'ANTLR::Runtime::BaseRecognizer';
16with 'ANTLR::Runtime::TokenSource';
17
18has 'input' => (
19    is => 'rw',
20    does => 'ANTLR::Runtime::CharStream',
21);
22
23sub reset {
24    my ($self) = @_;
25
26    # reset all recognizer state variables
27    $self->SUPER::reset();
28
29    # wack Lexer state variables
30    if (defined $self->input) {
31        # rewind the input
32        $self->input->seek(0);
33    }
34
35    if (defined $self->state) {
36        $self->state->token(undef);
37        $self->state->type(ANTLR::Runtime::Token->INVALID_TOKEN_TYPE);
38        $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL);
39        $self->state->token_start_char_index(-1);
40        $self->state->token_start_char_position_in_line(-1);
41        $self->state->start_line(-1);
42        $self->state->text(undef);
43    }
44}
45
46# Return a token from this source; i.e., match a token on the char
47# stream.
48sub next_token {
49    my ($self) = @_;
50
51    while (1) {
52        $self->state->token(undef);
53        $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL);
54        $self->state->token_start_char_index($self->input->index());
55        $self->state->token_start_char_position_in_line($self->input->get_char_position_in_line());
56        $self->state->token_start_line($self->input->get_line());
57        $self->state->text(undef);
58
59        if ($self->input->LA(1) eq ANTLR::Runtime::CharStream->EOF) {
60            return ANTLR::Runtime::Token->EOF_TOKEN;
61        }
62
63        my $rv;
64        my $op = '';
65        eval {
66            $self->m_tokens();
67            if (!defined $self->state->token) {
68                $self->emit();
69            }
70            elsif ($self->state->token == ANTLR::Runtime::Token->SKIP_TOKEN) {
71                $op = 'next';
72                return;
73            }
74            $op = 'return';
75            $rv = $self->state->token;
76        };
77        return $rv if $op eq 'return';
78        next if $op eq 'next';
79
80        if ($EVAL_ERROR) {
81            my $exception = $EVAL_ERROR;
82            if ($exception->isa('ANTLR::Runtime::RecognitionException')) {
83                $self->report_error($exception);
84                $self->recover($exception);
85            } else {
86                croak $exception;
87            }
88        }
89    }
90}
91
92# Instruct the lexer to skip creating a token for current lexer rule
93# and look for another token.  nextToken() knows to keep looking when
94# a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
95# if token==null at end of any token rule, it creates one for you
96# and emits it.
97sub skip {
98    my ($self) = @_;
99
100    $self->state->token(ANTLR::Runtime::Token->SKIP_TOKEN);
101    return;
102}
103
104# This is the lexer entry point that sets instance var 'token'
105sub m_tokens {
106    croak "Unimplemented";
107}
108
109# Set the char stream and reset the lexer
110sub set_char_stream {
111    my ($self, $input) = @_;
112
113    $self->input(undef);
114    $self->reset();
115    $self->input($input);
116}
117
118sub get_char_stream {
119    my ($self) = @_;
120    return $self->input;
121}
122
123sub get_source_name {
124    my ($self) = @_;
125    return $self->input->get_source_name();
126}
127
128sub emit {
129    if (@_ == 1) {
130        my ($self) = @_;
131	# The standard method called to automatically emit a token at the
132	# outermost lexical rule.  The token object should point into the
133	# char buffer start..stop.  If there is a text override in 'text',
134	# use that to set the token's text.  Override this method to emit
135	# custom Token objects.
136        my $t = ANTLR::Runtime::CommonToken->new({
137            input => $self->input,
138            type  => $self->state->type,
139            channel => $self->state->channel,
140            start => $self->state->token_start_char_index,
141            stop => $self->get_char_index() - 1
142        });
143
144        $t->set_line($self->state->token_start_line);
145        $t->set_text($self->state->text);
146        $t->set_char_position_in_line($self->state->token_start_char_position_in_line);
147        $self->emit($t);
148        return $t;
149    } elsif (@_ == 2) {
150        my ($self, $token) = @_;
151	# Currently does not support multiple emits per nextToken invocation
152	# for efficiency reasons.  Subclass and override this method and
153	# nextToken (to push tokens into a list and pull from that list rather
154	# than a single variable as this implementation does).
155        $self->state->token($token);
156    }
157}
158
159sub match {
160    my ($self, $s) = @_;
161
162    foreach my $c (split //, $s) {
163        if ($self->input->LA(1) ne $c) {
164            if ($self->state->backtracking > 0) {
165                $self->state->failed(1);
166                return;
167            }
168            my $mte = ANTLR::Runtime::MismatchedTokenException->new({
169                expecting => $c,
170                input => $self->input
171            });
172            $self->recover($mte);
173            croak $mte;
174        }
175        $self->input->consume();
176        $self->state->failed(0);
177    }
178}
179
180sub match_any {
181    my ($self) = @_;
182
183    $self->input->consume();
184}
185
186sub match_range {
187    my ($self, $a, $b) = @_;
188
189    if ($self->input->LA(1) lt $a || $self->input->LA(1) gt $b) {
190        if ($self->state->backtracking > 0) {
191            $self->state->failed(1);
192            return;
193        }
194
195        my $mre = ANTLR::Runtime::MismatchedRangeException($a, $b, $self->input);
196        $self->recover($mre);
197        croak $mre;
198    }
199
200    $self->input->consume();
201    $self->state->failed(0);
202}
203
204sub get_line {
205    my ($self) = @_;
206
207    return $self->input->get_line();
208}
209
210sub get_char_position_in_line {
211    my ($self) = @_;
212
213    return $self->input->get_char_position_in_line();
214}
215
216# What is the index of the current character of lookahead?
217sub get_char_index {
218    my ($self) = @_;
219
220    return $self->input->index();
221}
222
223# Return the text matched so far for the current token or any
224# text override.
225sub get_text {
226    my ($self) = @_;
227
228    if (defined $self->state->text) {
229        return $self->state->text;
230    }
231    return $self->input->substring($self->state->token_start_char_index, $self->get_char_index() - 1);
232}
233
234# Set the complete text of this token; it wipes any previous
235# changes to the text.
236sub set_text {
237    my ($self, $text) = @_;
238
239    $self->state->text($text);
240}
241
242sub report_error {
243    Readonly my $usage => 'void report_error(RecognitionException e)';
244    croak $usage if @_ != 2;
245    my ($self, $e) = @_;
246
247    $self->display_recognition_error($self->get_token_names(), $e);
248}
249
250sub get_error_message {
251    my ($self, $e, $token_names) = @_;
252
253    my $msg;
254    if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) {
255        $msg = 'mismatched character '
256          . $self->get_char_error_display($e->get_c())
257          . ' expecting '
258          . $self->get_char_error_display($e->expecting);
259    } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) {
260        $msg = 'no viable alternative at character ' . $self->get_char_error_display($e->get_c());
261    } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) {
262        $msg = 'required (...)+ loop did not match anything at character '
263          . $self->get_char_error_display($e->get_c());
264    } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) {
265        $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c())
266          . ' expecting set ' . $e->expecting;
267    } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) {
268        $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c())
269          . ' expecting set ' . $e->expecting;
270    } elsif ($e->isa('ANTLR::Runtime::MismatchedRangeException')) {
271        $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c())
272          . ' expecting set ' . $self->get_char_error_display($e->a)
273          . '..' . $self->get_char_error_display($e->b);
274    } else {
275        $msg = $self->SUPER::get_error_message($e, $token_names);
276    }
277    return $msg;
278}
279
280sub get_char_error_display {
281    my ($self, $c) = @_;
282
283    my $s;
284    if ($c eq ANTLR::Runtime::Token->EOF) {
285        $s = '<EOF>';
286    } elsif ($c eq "\n") {
287        $s = '\n';
288    } elsif ($c eq "\t") {
289        $s = '\t';
290    } elsif ($c eq "\r") {
291        $s = '\r';
292    } else {
293        $s = $c;
294    }
295
296    return "'$s'";
297}
298
299# Lexers can normally match any char in it's vocabulary after matching
300# a token, so do the easy thing and just kill a character and hope
301# it all works out.  You can instead use the rule invocation stack
302# to do sophisticated error recovery if you are in a fragment rule.
303sub recover {
304    my ($self, $re) = @_;
305
306    $self->input->consume();
307}
308
309sub trace_in {
310    my ($self, $rule_name, $rule_index) = @_;
311
312    my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line();
313    $self->SUPER::trace_in($rule_name, $rule_index, $input_symbol);
314}
315
316sub trace_out {
317    my ($self, $rule_name, $rule_index) = @_;
318
319    my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line();
320    $self->SUPER::trace_out($rule_name, $rule_index, $input_symbol);
321}
322
323no Moose;
324__PACKAGE__->meta->make_immutable();
3251;
326