// Copyright © 2024 Igalia S.L. // SPDX-License-Identifier: MIT use crate::isa::{BitSetEnumValue, ISA}; use proc_macro::TokenStream; use proc_macro2::TokenStream as TokenStream2; use quote::quote; use quote::ToTokens; use roxmltree::Document; use std::fs; use std::path::Path; use syn::{parse_macro_input, parse_quote, Attribute, DeriveInput, Expr, ExprLit, Lit, Meta}; mod isa; /// Parses the derive input to extract file paths from attributes /// /// # Returns /// A tuple containing the paths to the ISA and static rules files /// /// # Panics /// Panics if the necessary attributes are not found or are in the wrong format pub(crate) fn parse_derive(ast: &DeriveInput) -> (String, String) { // Collect attributes with the name "isa" let isa_attr = ast .attrs .iter() .find(|attr| { let path = attr.meta.path(); path.is_ident("isa") }) .expect("An ISA file needs to be provided with the #[isa = \"PATH\"] attribute"); // Get the path from the "isa" attribute let isa_path = get_attribute(isa_attr); // Collect attributes with the name "static_rules_file" let static_rules_attr = ast .attrs .iter() .find(|attr| { let path = attr.meta.path(); path.is_ident("static_rules_file") }) .expect("A static pest rules file needs to be provided with the #[static_rules_file = \"PATH\"] attribute"); // Get the path from the "static_rules_file" attribute let static_rules_path = get_attribute(static_rules_attr); (isa_path, static_rules_path) } /// Extracts the string value from a name-value attribute /// /// # Panics /// Panics if the attribute is not in the expected format fn get_attribute(attr: &Attribute) -> String { match &attr.meta { Meta::NameValue(name_value) => match &name_value.value { Expr::Lit(ExprLit { lit: Lit::Str(string), .. }) => { if name_value.path.is_ident("isa") || name_value.path.is_ident("static_rules_file") { string.value() } else { panic!("Attribute must be a file path") } } _ => panic!("Attribute must be a string"), }, _ => panic!("Attribute must be of the form `key = \"...\"`"), } } /// Formats an enum value as a string in uppercase with underscores fn format_enum_value_str(enum_name: &str, enum_value: &str) -> String { format!("{}_{}", enum_name, enum_value.replace(['.', '[', ']'], "")).to_ascii_uppercase() } /// Retrieves and formats the enum value string from a `BitSetEnumValue` fn get_enum_value_str(enum_name: &str, enum_value: &BitSetEnumValue) -> String { format_enum_value_str(enum_name, enum_value.name.unwrap_or(enum_value.display)) } /// Generates the implementation of `FromPestRule` for enums in the ISA fn generate_from_rule_impl_enums(isa: &ISA) -> TokenStream2 { isa.enums .values() .map(|e| { let enum_name_str = format!("isa_{}", e.name.trim_start_matches('#')); let enum_name = syn::Ident::new(&enum_name_str, proc_macro2::Span::call_site()); let match_arms: Vec<_> = e .values .iter() .filter(|v| !v.display.is_empty() && v.display != ".____") .map(|v| { let variant_name = syn::Ident::new( get_enum_value_str(&enum_name_str, v).as_str(), proc_macro2::Span::call_site(), ); let rule_name = syn::Ident::new( &to_upper_camel_case(v.name.unwrap_or(v.display), false), proc_macro2::Span::call_site(), ); quote! { Rule::#rule_name => #enum_name::#variant_name } }) .collect(); quote! { impl FromPestRule for #enum_name { fn from_rule(rule: Rule) -> Self where Self: Sized { match rule { #(#match_arms),*, _ => panic!("Unexpected rule: {:?}", rule), } } } } }) .collect() } /// Generates the implementation of `FromPestRule` for ISA opcodes fn generate_from_rule_impl_opc(isa: &ISA) -> TokenStream2 { let instr_name = syn::Ident::new("isa_opc", proc_macro2::Span::call_site()); let match_arms: Vec<_> = isa .bitsets .values() .filter(|bitset| !bitset.name.starts_with('#')) .map(|instr| { let variant_name = syn::Ident::new( format_enum_value_str("isa_opc", instr.name).as_str(), proc_macro2::Span::call_site(), ); let pest_rule = format!("Opc_{}", instr.name); let rule_name = syn::Ident::new( &to_upper_camel_case(pest_rule.as_str(), true), proc_macro2::Span::call_site(), ); quote! { Rule::#rule_name => #instr_name::#variant_name } }) .collect(); quote! { impl FromPestRule for isa_opc { fn from_rule(rule: Rule) -> Self where Self: Sized { match rule { #(#match_arms),*, _ => panic!("Unexpected rule: {:?}", rule), } } } } } /// Main derive function to generate the parser fn derive_parser(input: TokenStream) -> TokenStream { let mut ast: DeriveInput = parse_macro_input!(input as DeriveInput); let root = "../src/etnaviv/isa/"; let (isa_filename, static_rules_filename) = parse_derive(&ast); let isa_path = Path::new(&root).join(isa_filename); let static_rules_path = Path::new(&root).join(static_rules_filename); // Load the XML document let xml_content = fs::read_to_string(isa_path).expect("Failed to read XML file"); let doc = Document::parse(&xml_content).expect("Failed to parse XML"); let isa = ISA::new(&doc); // Load the static rules let mut grammar = fs::read_to_string(static_rules_path).expect("Failed to read static rules pest file"); // Append generated grammar rules grammar.push_str(&generate_peg_grammar(&isa)); // Add grammar as an attribute to the AST ast.attrs.push(parse_quote! { #[grammar_inline = #grammar] }); // Generate the token streams for the parser, trait, and rule implementations let tokens_parser = pest_generator::derive_parser(ast.to_token_stream(), false); let tokens_from_rule_enums = generate_from_rule_impl_enums(&isa); let tokens_from_rule_opc = generate_from_rule_impl_opc(&isa); // Combine all token streams into one let tokens = quote! { #tokens_parser pub trait FromPestRule { fn from_rule(rule: Rule) -> Self where Self: Sized; } #tokens_from_rule_enums #tokens_from_rule_opc }; tokens.into() } /// Generates PEG grammar rules for enums fn generate_peg_grammar_enums(isa: &ISA) -> String { let mut grammar = String::new(); for e in isa.enums.values() { let mut values: Vec<_> = e .values .iter() .filter(|v| !v.display.is_empty() && v.display != ".____") .collect(); // From the pest docs: // The choice operator, written as a vertical line |, is ordered. The PEG // expression first | second means "try first; but if it fails, try second instead". // // We need to sort our enum to be able to parse eg th1.xxxx and t1.xxxx values.sort_by(|a, b| b.display.cmp(a.display)); let rule_name = to_upper_camel_case(e.name.trim_start_matches('#'), true); let value_names: Vec<_> = values .iter() .map(|enum_value| { to_upper_camel_case(enum_value.name.unwrap_or(enum_value.display), false) }) .collect(); grammar.push_str(&format!( "{} = {{ {} }}\n", rule_name, value_names.join(" | ") )); for value in &values { let variant_name = to_upper_camel_case(value.name.unwrap_or(value.display), false); grammar.push_str(&format!( " {} = {{ \"{}\" }}\n", variant_name, value.display )); } grammar.push('\n') } grammar } /// Generates PEG grammar rules for instructions fn generate_peg_grammar_instructions(isa: &ISA) -> String { let mut grammar = String::new(); // Collect instructions that do not start with "#" let instructions: Vec<_> = isa .bitsets .values() .filter(|bitset| !bitset.name.starts_with('#')) .collect(); // Generate instruction names let instruction_names: Vec<_> = instructions .iter() .map(|instruction| format!("Opc{}", to_upper_camel_case(instruction.name, true))) .collect(); // Join instruction names and append to grammar grammar.push_str(&format!( "instruction = _{{ {} }}\n", instruction_names.join(" | ") )); for (instruction, opcode) in std::iter::zip(instructions, instruction_names) { let meta = isa.collect_meta(instruction.name); let type_ = meta.get("type").copied().unwrap_or(""); // Prepare rule parts let mut rule_parts = Vec::new(); rule_parts.push(format!( "\"{}\"", instruction.displayname.unwrap_or(instruction.name) )); let template_key = format!("INSTR_{}", type_.to_ascii_uppercase()); let flags = isa .templates .get(template_key.as_str()) .map_or("", |template| template.display.trim()); // Process flags // Convert the XML string to a vec and filter out not wanted NAME. // e.g.: {NAME}{DST_FULL}{SAT}{COND}{SKPHP}{TYPE}{PMODE}{THREAD}{RMODE} to // ["Dst_full", "Sat", "Cond", "Skphp", "Type", "Pmode", "Thread", "Rounding"] flags .split(&['{', '}']) .filter(|part| !part.trim().is_empty() && *part != "NAME") .for_each(|part| { let part = if part == "RMODE" { "Rounding" } else { part }; rule_parts.push(format!("{}?", to_upper_camel_case(part, false))); }); let has_dest = meta .get("has_dest") .map(|b| b.parse::()) .unwrap_or(Ok(false)) .expect("has_dest must be a bool value (true|false)"); let rule_part = match (has_dest, type_) { (true, "load_store") => "(Dest | DstMemAddr) ~ \",\"", (true, _) => "Dest ~ \",\"", (false, _) => "DestVoid ~ \",\"", }; rule_parts.push(rule_part.to_string()); if type_ == "tex" { rule_parts.push("TexSrc ~ \",\"".to_string()); } let possible_srcs = if type_ == "cf" { 2 } else { 3 }; let valid_srcs: Vec<_> = meta .get("valid_srcs") .unwrap_or(&"") .split('|') .filter_map(|s| s.parse::().ok()) .collect(); for i in 0..possible_srcs { if valid_srcs.contains(&i) { rule_parts.push("Src".to_string()); } else { rule_parts.push("SrcVoid".to_string()); } if i + 1 < possible_srcs { rule_parts.push("\",\"".to_string()); } } if type_ == "cf" { rule_parts.push("\",\"".to_string()); rule_parts.push("Target".to_string()); } grammar.push_str(&format!( " {} = {{ {} }}\n", opcode, rule_parts.join(" ~ ") )); } grammar } /// Combines the PEG grammar rules for enums and instructions fn generate_peg_grammar(isa: &ISA) -> String { let mut grammar = String::new(); grammar.push_str(&generate_peg_grammar_enums(isa)); grammar.push_str(&generate_peg_grammar_instructions(isa)); grammar.push_str("instructions = _{ SOI ~ (instruction ~ NEWLINE?)* ~ EOI }"); grammar } /// Converts a string to UpperCamelCase /// /// # Arguments /// * `s` - The input string /// * `replace_underscores` - Whether to replace underscores with spaces fn to_upper_camel_case(s: &str, replace_underscores: bool) -> String { // remove unwanted characters let mut s = s.replace(['.', '[', ']'], ""); // optionally replace underscores with spaces if replace_underscores { s = s.replace('_', " "); } // capitalize the first letter of each word and join them s.split_whitespace() .map(|word| { let mut chars = word.chars(); match chars.next() { Some(first) => first .to_uppercase() .chain(chars.flat_map(|c| c.to_lowercase())) .collect(), None => String::new(), } }) .collect() } /// Procedural macro to derive the ISA parser #[proc_macro_derive(IsaParser, attributes(isa, static_rules_file))] pub fn derive_isaspec_parser(input: TokenStream) -> TokenStream { derive_parser(input) } #[cfg(test)] mod lib { use super::*; use crate::isa::{BitSetEnum, BitSetEnumValue, Bitset, BitsetTemplate, ISA}; use indexmap::IndexMap; use std::collections::HashMap; #[test] fn derive_ok() { let definition = " #[other_attr] #[isa = \"myfile.isa\"] #[static_rules_file = \"static_rules.pest\"] pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); let (isa, static_rules) = parse_derive(&ast); assert_eq!(isa, "myfile.isa"); assert_eq!(static_rules, "static_rules.pest"); } #[test] #[should_panic(expected = "Attribute must be a string")] fn derive_wrong_arg_isa() { let definition = " #[other_attr] #[isa = 1] #[static_rules_file = \"static_rules.pest\"] pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); parse_derive(&ast); } #[test] #[should_panic(expected = "Attribute must be a string")] fn derive_wrong_arg_static_rules_file() { let definition = " #[other_attr] #[isa = \"test.xml\"] #[static_rules_file = 1] pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); parse_derive(&ast); } #[test] #[should_panic( expected = "An ISA file needs to be provided with the #[isa = \"PATH\"] attribute" )] fn derive_no_isa() { let definition = " #[other_attr] pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); parse_derive(&ast); } #[test] fn test_to_upper_camel_case() { assert_eq!(to_upper_camel_case("test_string", true), "TestString"); assert_eq!(to_upper_camel_case("test_string", false), "Test_string"); assert_eq!(to_upper_camel_case("[Test]_String", true), "TestString"); assert_eq!(to_upper_camel_case("[Test]_String", false), "Test_string"); assert_eq!( to_upper_camel_case("multiple_words_string", true), "MultipleWordsString" ); } fn mock_isa() -> ISA<'static> { let mut bitsets = IndexMap::new(); let mut enums = IndexMap::new(); let mut templates = IndexMap::new(); // Add mock data for bitsets, enums, and templates // Example for bitsets bitsets.insert( "bitset1", Bitset { name: "bitset1", extends: None, meta: HashMap::from([("type", "alu"), ("has_dest", "true"), ("valid_srcs", "0")]), }, ); // Example for enums enums.insert( "enum1", BitSetEnum { name: "enum1", values: vec![ BitSetEnumValue { display: "val1", name: Some("val1_name"), }, BitSetEnumValue { display: "val2", name: Some("val2_name"), }, ], }, ); // Example for templates templates.insert( "INSTR_ALU", BitsetTemplate { display: "{DST_FULL}{SAT}{COND}", }, ); ISA { bitsets, enums, templates, } } #[test] fn test_generate_peg_grammar_enums() { let isa = mock_isa(); let grammar = generate_peg_grammar_enums(&isa); assert!(grammar.contains("Enum1 = { Val2 | Val1 }")); assert!(grammar.contains("Val1 = { \"val1\" }")); assert!(grammar.contains("Val2 = { \"val2\" }")); } #[test] fn test_generate_peg_grammar_instructions() { let isa = mock_isa(); let grammar = generate_peg_grammar_instructions(&isa); assert!(grammar.contains("instructions = _{ OpcBitset1 }")); assert!(grammar.contains("OpcBitset1 = { \"bitset1\" ~ Dst_full? ~ Sat? ~ Cond? ~ Dest ~ \",\" ~ Src ~ \",\" ~ SrcVoid ~ \",\" ~ SrcVoid }")); } #[test] fn test_generate_peg_grammar() { let isa = mock_isa(); let grammar = generate_peg_grammar(&isa); assert!(grammar.contains("Enum1 = { Val2 | Val1 }")); assert!(grammar.contains("instructions = _{ OpcBitset1 }")); assert!(grammar.contains("OpcBitset1 = { \"bitset1\" ~ Dst_full? ~ Sat? ~ Cond? ~ Dest ~ \",\" ~ Src ~ \",\" ~ SrcVoid ~ \",\" ~ SrcVoid }")); } }