1package ANTLR::Runtime::Lexer; 2 3use English qw( -no_match_vars ); 4use Readonly; 5use Carp; 6use Switch; 7 8use ANTLR::Runtime::Token; 9use ANTLR::Runtime::CommonToken; 10use ANTLR::Runtime::CharStream; 11use ANTLR::Runtime::MismatchedTokenException; 12 13use Moose; 14 15extends 'ANTLR::Runtime::BaseRecognizer'; 16with 'ANTLR::Runtime::TokenSource'; 17 18has 'input' => ( 19 is => 'rw', 20 does => 'ANTLR::Runtime::CharStream', 21); 22 23sub reset { 24 my ($self) = @_; 25 26 # reset all recognizer state variables 27 $self->SUPER::reset(); 28 29 # wack Lexer state variables 30 if (defined $self->input) { 31 # rewind the input 32 $self->input->seek(0); 33 } 34 35 if (defined $self->state) { 36 $self->state->token(undef); 37 $self->state->type(ANTLR::Runtime::Token->INVALID_TOKEN_TYPE); 38 $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); 39 $self->state->token_start_char_index(-1); 40 $self->state->token_start_char_position_in_line(-1); 41 $self->state->start_line(-1); 42 $self->state->text(undef); 43 } 44} 45 46# Return a token from this source; i.e., match a token on the char 47# stream. 48sub next_token { 49 my ($self) = @_; 50 51 while (1) { 52 $self->state->token(undef); 53 $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); 54 $self->state->token_start_char_index($self->input->index()); 55 $self->state->token_start_char_position_in_line($self->input->get_char_position_in_line()); 56 $self->state->token_start_line($self->input->get_line()); 57 $self->state->text(undef); 58 59 if ($self->input->LA(1) eq ANTLR::Runtime::CharStream->EOF) { 60 return ANTLR::Runtime::Token->EOF_TOKEN; 61 } 62 63 my $rv; 64 my $op = ''; 65 eval { 66 $self->m_tokens(); 67 if (!defined $self->state->token) { 68 $self->emit(); 69 } 70 elsif ($self->state->token == ANTLR::Runtime::Token->SKIP_TOKEN) { 71 $op = 'next'; 72 return; 73 } 74 $op = 'return'; 75 $rv = $self->state->token; 76 }; 77 return $rv if $op eq 'return'; 78 next if $op eq 'next'; 79 80 if ($EVAL_ERROR) { 81 my $exception = $EVAL_ERROR; 82 if ($exception->isa('ANTLR::Runtime::RecognitionException')) { 83 $self->report_error($exception); 84 $self->recover($exception); 85 } else { 86 croak $exception; 87 } 88 } 89 } 90} 91 92# Instruct the lexer to skip creating a token for current lexer rule 93# and look for another token. nextToken() knows to keep looking when 94# a lexer rule finishes with token set to SKIP_TOKEN. Recall that 95# if token==null at end of any token rule, it creates one for you 96# and emits it. 97sub skip { 98 my ($self) = @_; 99 100 $self->state->token(ANTLR::Runtime::Token->SKIP_TOKEN); 101 return; 102} 103 104# This is the lexer entry point that sets instance var 'token' 105sub m_tokens { 106 croak "Unimplemented"; 107} 108 109# Set the char stream and reset the lexer 110sub set_char_stream { 111 my ($self, $input) = @_; 112 113 $self->input(undef); 114 $self->reset(); 115 $self->input($input); 116} 117 118sub get_char_stream { 119 my ($self) = @_; 120 return $self->input; 121} 122 123sub get_source_name { 124 my ($self) = @_; 125 return $self->input->get_source_name(); 126} 127 128sub emit { 129 if (@_ == 1) { 130 my ($self) = @_; 131 # The standard method called to automatically emit a token at the 132 # outermost lexical rule. The token object should point into the 133 # char buffer start..stop. If there is a text override in 'text', 134 # use that to set the token's text. Override this method to emit 135 # custom Token objects. 136 my $t = ANTLR::Runtime::CommonToken->new({ 137 input => $self->input, 138 type => $self->state->type, 139 channel => $self->state->channel, 140 start => $self->state->token_start_char_index, 141 stop => $self->get_char_index() - 1 142 }); 143 144 $t->set_line($self->state->token_start_line); 145 $t->set_text($self->state->text); 146 $t->set_char_position_in_line($self->state->token_start_char_position_in_line); 147 $self->emit($t); 148 return $t; 149 } elsif (@_ == 2) { 150 my ($self, $token) = @_; 151 # Currently does not support multiple emits per nextToken invocation 152 # for efficiency reasons. Subclass and override this method and 153 # nextToken (to push tokens into a list and pull from that list rather 154 # than a single variable as this implementation does). 155 $self->state->token($token); 156 } 157} 158 159sub match { 160 my ($self, $s) = @_; 161 162 foreach my $c (split //, $s) { 163 if ($self->input->LA(1) ne $c) { 164 if ($self->state->backtracking > 0) { 165 $self->state->failed(1); 166 return; 167 } 168 my $mte = ANTLR::Runtime::MismatchedTokenException->new({ 169 expecting => $c, 170 input => $self->input 171 }); 172 $self->recover($mte); 173 croak $mte; 174 } 175 $self->input->consume(); 176 $self->state->failed(0); 177 } 178} 179 180sub match_any { 181 my ($self) = @_; 182 183 $self->input->consume(); 184} 185 186sub match_range { 187 my ($self, $a, $b) = @_; 188 189 if ($self->input->LA(1) lt $a || $self->input->LA(1) gt $b) { 190 if ($self->state->backtracking > 0) { 191 $self->state->failed(1); 192 return; 193 } 194 195 my $mre = ANTLR::Runtime::MismatchedRangeException($a, $b, $self->input); 196 $self->recover($mre); 197 croak $mre; 198 } 199 200 $self->input->consume(); 201 $self->state->failed(0); 202} 203 204sub get_line { 205 my ($self) = @_; 206 207 return $self->input->get_line(); 208} 209 210sub get_char_position_in_line { 211 my ($self) = @_; 212 213 return $self->input->get_char_position_in_line(); 214} 215 216# What is the index of the current character of lookahead? 217sub get_char_index { 218 my ($self) = @_; 219 220 return $self->input->index(); 221} 222 223# Return the text matched so far for the current token or any 224# text override. 225sub get_text { 226 my ($self) = @_; 227 228 if (defined $self->state->text) { 229 return $self->state->text; 230 } 231 return $self->input->substring($self->state->token_start_char_index, $self->get_char_index() - 1); 232} 233 234# Set the complete text of this token; it wipes any previous 235# changes to the text. 236sub set_text { 237 my ($self, $text) = @_; 238 239 $self->state->text($text); 240} 241 242sub report_error { 243 Readonly my $usage => 'void report_error(RecognitionException e)'; 244 croak $usage if @_ != 2; 245 my ($self, $e) = @_; 246 247 $self->display_recognition_error($self->get_token_names(), $e); 248} 249 250sub get_error_message { 251 my ($self, $e, $token_names) = @_; 252 253 my $msg; 254 if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { 255 $msg = 'mismatched character ' 256 . $self->get_char_error_display($e->get_c()) 257 . ' expecting ' 258 . $self->get_char_error_display($e->expecting); 259 } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { 260 $msg = 'no viable alternative at character ' . $self->get_char_error_display($e->get_c()); 261 } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { 262 $msg = 'required (...)+ loop did not match anything at character ' 263 . $self->get_char_error_display($e->get_c()); 264 } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { 265 $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) 266 . ' expecting set ' . $e->expecting; 267 } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { 268 $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) 269 . ' expecting set ' . $e->expecting; 270 } elsif ($e->isa('ANTLR::Runtime::MismatchedRangeException')) { 271 $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) 272 . ' expecting set ' . $self->get_char_error_display($e->a) 273 . '..' . $self->get_char_error_display($e->b); 274 } else { 275 $msg = $self->SUPER::get_error_message($e, $token_names); 276 } 277 return $msg; 278} 279 280sub get_char_error_display { 281 my ($self, $c) = @_; 282 283 my $s; 284 if ($c eq ANTLR::Runtime::Token->EOF) { 285 $s = '<EOF>'; 286 } elsif ($c eq "\n") { 287 $s = '\n'; 288 } elsif ($c eq "\t") { 289 $s = '\t'; 290 } elsif ($c eq "\r") { 291 $s = '\r'; 292 } else { 293 $s = $c; 294 } 295 296 return "'$s'"; 297} 298 299# Lexers can normally match any char in it's vocabulary after matching 300# a token, so do the easy thing and just kill a character and hope 301# it all works out. You can instead use the rule invocation stack 302# to do sophisticated error recovery if you are in a fragment rule. 303sub recover { 304 my ($self, $re) = @_; 305 306 $self->input->consume(); 307} 308 309sub trace_in { 310 my ($self, $rule_name, $rule_index) = @_; 311 312 my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); 313 $self->SUPER::trace_in($rule_name, $rule_index, $input_symbol); 314} 315 316sub trace_out { 317 my ($self, $rule_name, $rule_index) = @_; 318 319 my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); 320 $self->SUPER::trace_out($rule_name, $rule_index, $input_symbol); 321} 322 323no Moose; 324__PACKAGE__->meta->make_immutable(); 3251; 326