#!/usr/bin/ruby # encoding: utf-8 require 'antlr3' require 'set' require 'rake' require 'rake/tasklib' require 'shellwords' module ANTLR3 =begin rdoc ANTLR3::CompileTask A rake task-generating utility concerning ANTLR grammar file compilation. This is a general utility -- the grammars do not have to be targetted for Ruby output; it handles all known ANTLR language targets. require 'antlr3/task' ANTLR3::CompileTask.define( :name => 'grammars', :output_directory => 'lib/parsers' ) do | t | t.grammar_set( 'antlr/MainParser.g', 'antlr/MainTree.g' ) t.grammar_set( 'antlr/Template.g' ) do | gram | gram.output_directory = 'lib/parsers/template' gram.debug = true end end TODO: finish documentation =end class CompileTask < Rake::TaskLib attr_reader :grammar_sets, :options attr_accessor :name def self.define( *grammar_files ) lib = new( *grammar_files ) block_given? and yield( lib ) lib.define return( lib ) end def initialize( *grammar_files ) grammar_files = [ grammar_files ].flatten! options = Hash === grammar_files.last ? grammar_files.pop : {} @grammar_sets = [] @name = options.fetch( :name, 'antlr-grammars' ) @options = options @namespace = Rake.application.current_scope grammar_files.empty? or grammar_set( grammar_files ) end def target_files @grammar_sets.inject( [] ) do | list, set | list.concat( set.target_files ) end end def grammar_set( *grammar_files ) grammar_files = [ grammar_files ].flatten! options = @options.merge( Hash === grammar_files.last ? grammar_files.pop : {} ) set = GrammarSet.new( grammar_files, options ) block_given? and yield( set ) @grammar_sets << set return( set ) end def compile_task full_name = ( @namespace + [ @name, 'compile' ] ).join( ':' ) Rake::Task[ full_name ] end def compile! compile_task.invoke end def clobber_task full_name = ( @namespace + [ @name, 'clobber' ] ).join( ':' ) Rake::Task[ full_name ] end def clobber! clobber_task.invoke end def define namespace( @name ) do desc( "trash all ANTLR-generated source code" ) task( 'clobber' ) do for set in @grammar_sets set.clean end end for set in @grammar_sets set.define_tasks end desc( "compile ANTLR grammars" ) task( 'compile' => target_files ) end end #class CompileTask::GrammarSet class GrammarSet attr_accessor :antlr_jar, :debug, :trace, :profile, :compile_options, :java_options attr_reader :load_path, :grammars attr_writer :output_directory def initialize( grammar_files, options = {} ) @load_path = grammar_files.map { | f | File.dirname( f ) } @load_path.push( '.', @output_directory ) if extra_load = options[ :load_path ] extra_load = [ extra_load ].flatten @load_path.unshift( extra_load ) end @load_path.uniq! @grammars = grammar_files.map do | file | GrammarFile.new( self, file ) end @output_directory = '.' dir = options[ :output_directory ] and @output_directory = dir.to_s @antlr_jar = options.fetch( :antlr_jar, ANTLR3.antlr_jar ) @debug = options.fetch( :debug, false ) @trace = options.fetch( :trace, false ) @profile = options.fetch( :profile, false ) @compile_options = case opts = options[ :compile_options ] when Array then opts else Shellwords.shellwords( opts.to_s ) end @java_options = case opts = options[ :java_options ] when Array then opts else Shellwords.shellwords( opts.to_s ) end end def target_files @grammars.map { | gram | gram.target_files }.flatten end def output_directory @output_directory || '.' end def define_tasks file( @antlr_jar ) for grammar in @grammars deps = [ @antlr_jar ] if vocab = grammar.token_vocab and tfile = find_tokens_file( vocab, grammar ) file( tfile ) deps << tfile end grammar.define_tasks( deps ) end end def clean for grammar in @grammars grammar.clean end if test( ?d, output_directory ) and ( Dir.entries( output_directory ) - %w( . .. ) ).empty? rmdir( output_directory ) end end def find_tokens_file( vocab, grammar ) gram = @grammars.find { | gram | gram.name == vocab } and return( gram.tokens_file ) file = locate( "#{ vocab }.tokens" ) and return( file ) warn( Util.tidy( <<-END, true ) ) | unable to locate .tokens file `#{ vocab }' referenced in #{ grammar.path } | -- ignoring dependency END return( nil ) end def locate( file_name ) dir = @load_path.find do | dir | File.file?( File.join( dir, file_name ) ) end dir and return( File.join( dir, file_name ) ) end def compile( grammar ) dir = output_directory test( ?d, dir ) or FileUtils.mkpath( dir ) sh( build_command( grammar ) ) end def build_command( grammar ) parts = [ 'java', '-cp', @antlr_jar ] parts.concat( @java_options ) parts << 'org.antlr.Tool' << '-fo' << output_directory parts << '-debug' if @debug parts << '-profile' if @profile parts << '-trace' if @trace parts.concat( @compile_options ) parts << grammar.path return parts.map! { | t | escape( t ) }.join( ' ' ) end def escape( token ) token = token.to_s.dup token.empty? and return( %('') ) token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1" ) token.gsub!( /\n/, "'\n'" ) return( token ) end end class GrammarFile LANGUAGES = { "ActionScript" => [ ".as" ], "CSharp2" => [ ".cs" ], "C" => [ ".c", ".h" ], "ObjC" => [ ".m", ".h" ], "CSharp3" => [ ".cs" ], "Cpp" => [ ".cpp", ".h" ], "Ruby" => [ ".rb" ], "Java" => [ ".java" ], "JavaScript" => [ ".js" ], "Python" => [ ".py" ], "Delphi" => [ ".pas" ], "Perl5" => [ ".pm" ] }.freeze GRAMMAR_TYPES = %w(lexer parser tree combined) ################################################################## ######## CONSTRUCTOR ############################################# ################################################################## def initialize( group, path, options = {} ) @group = group @path = path.to_s @imports = [] @language = 'Java' @token_vocab = nil @tasks_defined = false @extra_dependencies = [] if extra = options[ :extra_dependencies ] extra = [ extra ].flatten @extra_dependencies.concat( extra ) end study yield( self ) if block_given? fetch_imports end ################################################################## ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS #################### ################################################################## attr_reader :type, :name, :language, :source, :token_vocab, :imports, :imported_grammars, :path, :group for attr in [ :output_directory, :load_path, :antlr_jar ] class_eval( <<-END ) def #{ attr } @group.#{ attr } end END end def lexer_files if lexer? then base = @name elsif combined? then base = @name + 'Lexer' else return( [] ) end return( file_names( base ) ) end def parser_files if parser? then base = @name elsif combined? then base = @name + 'Parser' else return( [] ) end return( file_names( base ) ) end def tree_parser_files return( tree? ? file_names( @name ) : [] ) end def file_names( base ) LANGUAGES.fetch( @language ).map do | ext | File.join( output_directory, base + ext ) end end for type in GRAMMAR_TYPES class_eval( <<-END ) def #{ type }? @type == #{ type.inspect } end END end def delegate_files( delegate_suffix ) file_names( "#{ name }_#{ delegate_suffix }" ) end def tokens_file File.join( output_directory, name + '.tokens' ) end def target_files( all = true ) targets = [ tokens_file ] for target_type in %w( lexer parser tree_parser ) for file in self.send( :"#{ target_type }_files" ) targets << file end end if all for grammar in @imported_grammars targets.concat( grammar.target_files ) end end return targets end def update touch( @path ) end def all_imported_files imported_files = [] for grammar in @imported_grammars imported_files.push( grammar.path, *grammar.all_imported_files ) end return imported_files end def clean deleted = [] for target in target_files if test( ?f, target ) rm( target ) deleted << target end end for grammar in @imported_grammars deleted.concat( grammar.clean ) end return deleted end def define_tasks( shared_depends ) unless @tasks_defined depends = [ @path, *all_imported_files ] for f in depends file( f ) end depends = shared_depends + depends target_files.each do | target | file( target => ( depends - [ target ] ) ) do # prevents recursive .tokens file dependencies @group.compile( self ) end end @tasks_defined = true end end private def fetch_imports @imported_grammars = @imports.map do | imp | file = group.locate( "#{ imp }.g" ) or raise( Util.tidy( <<-END ) ) | #{ @path }: unable to locate imported grammar file #{ imp }.g | search directories ( @load_path ): | - #{ load_path.join( "\n - " ) } END Imported.new( self, file ) end end def study @source = File.read( @path ) @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or raise Grammar::FormatError[ @source, @path ] @name = $2 @type = $1 || 'combined' if @source =~ /^\s*options\s*\{(.*?)\}/m option_block = $1 if option_block =~ /\s*language\s*=\s*(\S+)\s*;/ @language = $1 LANGUAGES.has_key?( @language ) or raise( Grammar::FormatError, "Unknown ANTLR target language: %p" % @language ) end option_block =~ /\s*tokenVocab\s*=\s*(\S+)\s*;/ and @token_vocab = $1 end @source.scan( /^\s*import\s+(\w+\s*(?:,\s*\w+\s*)*);/ ) do list = $1.strip @imports.concat( list.split( /\s*,\s*/ ) ) end end end # class Grammar class GrammarFile::Imported < GrammarFile def initialize( owner, path ) @owner = owner @path = path.to_s @imports = [] @language = 'Java' @token_vocab = nil study fetch_imports end for attr in [ :load_path, :output_directory, :antlr_jar, :verbose, :group ] class_eval( <<-END ) def #{ attr } @owner.#{ attr } end END end def delegate_files( suffix ) @owner.delegate_files( "#{ @name }_#{ suffix }" ) end def target_files targets = [ tokens_file ] targets.concat( @owner.delegate_files( @name ) ) return( targets ) end end class GrammarFile::FormatError < StandardError attr_reader :file, :source def self.[]( *args ) new( *args ) end def initialize( source, file = nil ) @file = file @source = source message = '' if file.nil? # inline message << "bad inline grammar source:\n" message << ( "-" * 80 ) << "\n" message << @source message[ -1 ] == ?\n or message << "\n" message << ( "-" * 80 ) << "\n" message << "could not locate a grammar name and type declaration matching\n" message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/" else message << 'bad grammar source in file %p\n' % @file message << ( "-" * 80 ) << "\n" message << @source message[ -1 ] == ?\n or message << "\n" message << ( "-" * 80 ) << "\n" message << "could not locate a grammar name and type declaration matching\n" message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/" end super( message ) end end # error Grammar::FormatError end # class CompileTask end # module ANTLR3