1 // Copyright © 2024 Igalia S.L.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::isa::{BitSetEnumValue, ISA};
5 
6 use proc_macro::TokenStream;
7 use proc_macro2::TokenStream as TokenStream2;
8 use quote::quote;
9 use quote::ToTokens;
10 use roxmltree::Document;
11 use std::fs;
12 use std::path::Path;
13 use syn::{parse_macro_input, parse_quote, Attribute, DeriveInput, Expr, ExprLit, Lit, Meta};
14 
15 mod isa;
16 
17 /// Parses the derive input to extract file paths from attributes
18 ///
19 /// # Returns
20 /// A tuple containing the paths to the ISA and static rules files
21 ///
22 /// # Panics
23 /// Panics if the necessary attributes are not found or are in the wrong format
parse_derive(ast: &DeriveInput) -> (String, String)24 pub(crate) fn parse_derive(ast: &DeriveInput) -> (String, String) {
25     // Collect attributes with the name "isa"
26     let isa_attr = ast
27         .attrs
28         .iter()
29         .find(|attr| {
30             let path = attr.meta.path();
31             path.is_ident("isa")
32         })
33         .expect("An ISA file needs to be provided with the #[isa = \"PATH\"] attribute");
34 
35     // Get the path from the "isa" attribute
36     let isa_path = get_attribute(isa_attr);
37 
38     // Collect attributes with the name "static_rules_file"
39     let static_rules_attr = ast
40         .attrs
41         .iter()
42         .find(|attr| {
43             let path = attr.meta.path();
44             path.is_ident("static_rules_file")
45         })
46         .expect("A static pest rules file needs to be provided with the #[static_rules_file = \"PATH\"] attribute");
47 
48     // Get the path from the "static_rules_file" attribute
49     let static_rules_path = get_attribute(static_rules_attr);
50 
51     (isa_path, static_rules_path)
52 }
53 
54 /// Extracts the string value from a name-value attribute
55 ///
56 /// # Panics
57 /// Panics if the attribute is not in the expected format
get_attribute(attr: &Attribute) -> String58 fn get_attribute(attr: &Attribute) -> String {
59     match &attr.meta {
60         Meta::NameValue(name_value) => match &name_value.value {
61             Expr::Lit(ExprLit {
62                 lit: Lit::Str(string),
63                 ..
64             }) => {
65                 if name_value.path.is_ident("isa") || name_value.path.is_ident("static_rules_file")
66                 {
67                     string.value()
68                 } else {
69                     panic!("Attribute must be a file path")
70                 }
71             }
72             _ => panic!("Attribute must be a string"),
73         },
74         _ => panic!("Attribute must be of the form `key = \"...\"`"),
75     }
76 }
77 
78 /// Formats an enum value as a string in uppercase with underscores
format_enum_value_str(enum_name: &str, enum_value: &str) -> String79 fn format_enum_value_str(enum_name: &str, enum_value: &str) -> String {
80     format!("{}_{}", enum_name, enum_value.replace(['.', '[', ']'], "")).to_ascii_uppercase()
81 }
82 
83 /// Retrieves and formats the enum value string from a `BitSetEnumValue`
get_enum_value_str(enum_name: &str, enum_value: &BitSetEnumValue) -> String84 fn get_enum_value_str(enum_name: &str, enum_value: &BitSetEnumValue) -> String {
85     format_enum_value_str(enum_name, enum_value.name.unwrap_or(enum_value.display))
86 }
87 
88 /// Generates the implementation of `FromPestRule` for enums in the ISA
generate_from_rule_impl_enums(isa: &ISA) -> TokenStream289 fn generate_from_rule_impl_enums(isa: &ISA) -> TokenStream2 {
90     isa.enums
91         .values()
92         .map(|e| {
93             let enum_name_str = format!("isa_{}", e.name.trim_start_matches('#'));
94 
95             let enum_name = syn::Ident::new(&enum_name_str, proc_macro2::Span::call_site());
96             let match_arms: Vec<_> = e
97                 .values
98                 .iter()
99                 .filter(|v| !v.display.is_empty() && v.display != ".____")
100                 .map(|v| {
101                     let variant_name = syn::Ident::new(
102                         get_enum_value_str(&enum_name_str, v).as_str(),
103                         proc_macro2::Span::call_site(),
104                     );
105                     let rule_name = syn::Ident::new(
106                         &to_upper_camel_case(v.name.unwrap_or(v.display), false),
107                         proc_macro2::Span::call_site(),
108                     );
109                     quote! { Rule::#rule_name => #enum_name::#variant_name }
110                 })
111                 .collect();
112 
113             quote! {
114                 impl FromPestRule for #enum_name {
115                     fn from_rule(rule: Rule) -> Self where Self: Sized {
116                         match rule {
117                             #(#match_arms),*,
118                             _ => panic!("Unexpected rule: {:?}", rule),
119                         }
120                     }
121                 }
122             }
123         })
124         .collect()
125 }
126 
127 /// Generates the implementation of `FromPestRule` for ISA opcodes
generate_from_rule_impl_opc(isa: &ISA) -> TokenStream2128 fn generate_from_rule_impl_opc(isa: &ISA) -> TokenStream2 {
129     let instr_name = syn::Ident::new("isa_opc", proc_macro2::Span::call_site());
130 
131     let match_arms: Vec<_> = isa
132         .bitsets
133         .values()
134         .filter(|bitset| !bitset.name.starts_with('#'))
135         .map(|instr| {
136             let variant_name = syn::Ident::new(
137                 format_enum_value_str("isa_opc", instr.name).as_str(),
138                 proc_macro2::Span::call_site(),
139             );
140 
141             let pest_rule = format!("Opc_{}", instr.name);
142 
143             let rule_name = syn::Ident::new(
144                 &to_upper_camel_case(pest_rule.as_str(), true),
145                 proc_macro2::Span::call_site(),
146             );
147             quote! { Rule::#rule_name => #instr_name::#variant_name }
148         })
149         .collect();
150 
151     quote! {
152         impl FromPestRule for isa_opc {
153             fn from_rule(rule: Rule) -> Self where Self: Sized {
154                 match rule {
155                     #(#match_arms),*,
156                     _ => panic!("Unexpected rule: {:?}", rule),
157                 }
158             }
159         }
160     }
161 }
162 
163 /// Main derive function to generate the parser
derive_parser(input: TokenStream) -> TokenStream164 fn derive_parser(input: TokenStream) -> TokenStream {
165     let mut ast: DeriveInput = parse_macro_input!(input as DeriveInput);
166     let root = "../src/etnaviv/isa/";
167     let (isa_filename, static_rules_filename) = parse_derive(&ast);
168     let isa_path = Path::new(&root).join(isa_filename);
169     let static_rules_path = Path::new(&root).join(static_rules_filename);
170 
171     // Load the XML document
172     let xml_content = fs::read_to_string(isa_path).expect("Failed to read XML file");
173     let doc = Document::parse(&xml_content).expect("Failed to parse XML");
174     let isa = ISA::new(&doc);
175 
176     // Load the static rules
177     let mut grammar =
178         fs::read_to_string(static_rules_path).expect("Failed to read static rules pest file");
179 
180     // Append generated grammar rules
181     grammar.push_str(&generate_peg_grammar(&isa));
182 
183     // Add grammar as an attribute to the AST
184     ast.attrs.push(parse_quote! {
185         #[grammar_inline = #grammar]
186     });
187 
188     // Generate the token streams for the parser, trait, and rule implementations
189     let tokens_parser = pest_generator::derive_parser(ast.to_token_stream(), false);
190     let tokens_from_rule_enums = generate_from_rule_impl_enums(&isa);
191     let tokens_from_rule_opc = generate_from_rule_impl_opc(&isa);
192 
193     // Combine all token streams into one
194     let tokens = quote! {
195         #tokens_parser
196 
197         pub trait FromPestRule {
198             fn from_rule(rule: Rule) -> Self where Self: Sized;
199         }
200 
201         #tokens_from_rule_enums
202         #tokens_from_rule_opc
203     };
204 
205     tokens.into()
206 }
207 
208 /// Generates PEG grammar rules for enums
generate_peg_grammar_enums(isa: &ISA) -> String209 fn generate_peg_grammar_enums(isa: &ISA) -> String {
210     let mut grammar = String::new();
211 
212     for e in isa.enums.values() {
213         let mut values: Vec<_> = e
214             .values
215             .iter()
216             .filter(|v| !v.display.is_empty() && v.display != ".____")
217             .collect();
218 
219         // From the pest docs:
220         // The choice operator, written as a vertical line |, is ordered. The PEG
221         // expression first | second means "try first; but if it fails, try second instead".
222         //
223         // We need to sort our enum to be able to parse eg th1.xxxx and t1.xxxx
224         values.sort_by(|a, b| b.display.cmp(a.display));
225 
226         let rule_name = to_upper_camel_case(e.name.trim_start_matches('#'), true);
227 
228         let value_names: Vec<_> = values
229             .iter()
230             .map(|enum_value| {
231                 to_upper_camel_case(enum_value.name.unwrap_or(enum_value.display), false)
232             })
233             .collect();
234 
235         grammar.push_str(&format!(
236             "{} = {{ {} }}\n",
237             rule_name,
238             value_names.join(" | ")
239         ));
240 
241         for value in &values {
242             let variant_name = to_upper_camel_case(value.name.unwrap_or(value.display), false);
243             grammar.push_str(&format!(
244                 "    {} = {{ \"{}\" }}\n",
245                 variant_name, value.display
246             ));
247         }
248 
249         grammar.push('\n')
250     }
251 
252     grammar
253 }
254 
255 /// Generates PEG grammar rules for instructions
generate_peg_grammar_instructions(isa: &ISA) -> String256 fn generate_peg_grammar_instructions(isa: &ISA) -> String {
257     let mut grammar = String::new();
258 
259     // Collect instructions that do not start with "#"
260     let instructions: Vec<_> = isa
261         .bitsets
262         .values()
263         .filter(|bitset| !bitset.name.starts_with('#'))
264         .collect();
265 
266     // Generate instruction names
267     let instruction_names: Vec<_> = instructions
268         .iter()
269         .map(|instruction| format!("Opc{}", to_upper_camel_case(instruction.name, true)))
270         .collect();
271 
272     // Join instruction names and append to grammar
273     grammar.push_str(&format!(
274         "instruction = _{{ {} }}\n",
275         instruction_names.join(" | ")
276     ));
277 
278     for (instruction, opcode) in std::iter::zip(instructions, instruction_names) {
279         let meta = isa.collect_meta(instruction.name);
280         let type_ = meta.get("type").copied().unwrap_or("");
281 
282         // Prepare rule parts
283         let mut rule_parts = Vec::new();
284         rule_parts.push(format!(
285             "\"{}\"",
286             instruction.displayname.unwrap_or(instruction.name)
287         ));
288 
289         let template_key = format!("INSTR_{}", type_.to_ascii_uppercase());
290         let flags = isa
291             .templates
292             .get(template_key.as_str())
293             .map_or("", |template| template.display.trim());
294 
295         // Process flags
296         // Convert the XML string to a vec and filter out not wanted NAME.
297         // e.g.: {NAME}{DST_FULL}{SAT}{COND}{SKPHP}{TYPE}{PMODE}{THREAD}{RMODE} to
298         // ["Dst_full", "Sat", "Cond", "Skphp", "Type", "Pmode", "Thread", "Rounding"]
299         flags
300             .split(&['{', '}'])
301             .filter(|part| !part.trim().is_empty() && *part != "NAME")
302             .for_each(|part| {
303                 let part = if part == "RMODE" { "Rounding" } else { part };
304                 rule_parts.push(format!("{}?", to_upper_camel_case(part, false)));
305             });
306 
307         let has_dest = meta
308             .get("has_dest")
309             .map(|b| b.parse::<bool>())
310             .unwrap_or(Ok(false))
311             .expect("has_dest must be a bool value (true|false)");
312 
313         let rule_part = match (has_dest, type_) {
314             (true, "load_store") => "(Dest | DstMemAddr) ~ \",\"",
315             (true, _) => "Dest ~ \",\"",
316             (false, _) => "DestVoid ~ \",\"",
317         };
318 
319         rule_parts.push(rule_part.to_string());
320 
321         if type_ == "tex" {
322             rule_parts.push("TexSrc ~ \",\"".to_string());
323         }
324 
325         let possible_srcs = if type_ == "cf" { 2 } else { 3 };
326         let valid_srcs: Vec<_> = meta
327             .get("valid_srcs")
328             .unwrap_or(&"")
329             .split('|')
330             .filter_map(|s| s.parse::<usize>().ok())
331             .collect();
332 
333         for i in 0..possible_srcs {
334             if valid_srcs.contains(&i) {
335                 rule_parts.push("Src".to_string());
336             } else {
337                 rule_parts.push("SrcVoid".to_string());
338             }
339             if i + 1 < possible_srcs {
340                 rule_parts.push("\",\"".to_string());
341             }
342         }
343 
344         if type_ == "cf" {
345             rule_parts.push("\",\"".to_string());
346             rule_parts.push("Target".to_string());
347         }
348 
349         grammar.push_str(&format!(
350             "    {} = {{ {} }}\n",
351             opcode,
352             rule_parts.join(" ~ ")
353         ));
354     }
355 
356     grammar
357 }
358 
359 /// Combines the PEG grammar rules for enums and instructions
generate_peg_grammar(isa: &ISA) -> String360 fn generate_peg_grammar(isa: &ISA) -> String {
361     let mut grammar = String::new();
362 
363     grammar.push_str(&generate_peg_grammar_enums(isa));
364     grammar.push_str(&generate_peg_grammar_instructions(isa));
365     grammar.push_str("instructions = _{ SOI ~ (instruction ~ NEWLINE?)* ~ EOI }");
366 
367     grammar
368 }
369 
370 /// Converts a string to UpperCamelCase
371 ///
372 /// # Arguments
373 /// * `s` - The input string
374 /// * `replace_underscores` - Whether to replace underscores with spaces
to_upper_camel_case(s: &str, replace_underscores: bool) -> String375 fn to_upper_camel_case(s: &str, replace_underscores: bool) -> String {
376     // remove unwanted characters
377     let mut s = s.replace(['.', '[', ']'], "");
378 
379     // optionally replace underscores with spaces
380     if replace_underscores {
381         s = s.replace('_', " ");
382     }
383 
384     // capitalize the first letter of each word and join them
385     s.split_whitespace()
386         .map(|word| {
387             let mut chars = word.chars();
388             match chars.next() {
389                 Some(first) => first
390                     .to_uppercase()
391                     .chain(chars.flat_map(|c| c.to_lowercase()))
392                     .collect(),
393                 None => String::new(),
394             }
395         })
396         .collect()
397 }
398 
399 /// Procedural macro to derive the ISA parser
400 #[proc_macro_derive(IsaParser, attributes(isa, static_rules_file))]
derive_isaspec_parser(input: TokenStream) -> TokenStream401 pub fn derive_isaspec_parser(input: TokenStream) -> TokenStream {
402     derive_parser(input)
403 }
404 
405 #[cfg(test)]
406 mod lib {
407     use super::*;
408     use crate::isa::{BitSetEnum, BitSetEnumValue, Bitset, BitsetTemplate, ISA};
409     use indexmap::IndexMap;
410     use std::collections::HashMap;
411 
412     #[test]
derive_ok()413     fn derive_ok() {
414         let definition = "
415             #[other_attr]
416             #[isa = \"myfile.isa\"]
417             #[static_rules_file = \"static_rules.pest\"]
418             pub struct MyParser<'a, T>;
419         ";
420         let ast = syn::parse_str(definition).unwrap();
421         let (isa, static_rules) = parse_derive(&ast);
422         assert_eq!(isa, "myfile.isa");
423         assert_eq!(static_rules, "static_rules.pest");
424     }
425 
426     #[test]
427     #[should_panic(expected = "Attribute must be a string")]
derive_wrong_arg_isa()428     fn derive_wrong_arg_isa() {
429         let definition = "
430             #[other_attr]
431             #[isa = 1]
432             #[static_rules_file = \"static_rules.pest\"]
433             pub struct MyParser<'a, T>;
434         ";
435         let ast = syn::parse_str(definition).unwrap();
436         parse_derive(&ast);
437     }
438 
439     #[test]
440     #[should_panic(expected = "Attribute must be a string")]
derive_wrong_arg_static_rules_file()441     fn derive_wrong_arg_static_rules_file() {
442         let definition = "
443             #[other_attr]
444             #[isa = \"test.xml\"]
445             #[static_rules_file = 1]
446             pub struct MyParser<'a, T>;
447         ";
448         let ast = syn::parse_str(definition).unwrap();
449         parse_derive(&ast);
450     }
451 
452     #[test]
453     #[should_panic(
454         expected = "An ISA file needs to be provided with the #[isa = \"PATH\"] attribute"
455     )]
derive_no_isa()456     fn derive_no_isa() {
457         let definition = "
458             #[other_attr]
459             pub struct MyParser<'a, T>;
460         ";
461         let ast = syn::parse_str(definition).unwrap();
462         parse_derive(&ast);
463     }
464 
465     #[test]
test_to_upper_camel_case()466     fn test_to_upper_camel_case() {
467         assert_eq!(to_upper_camel_case("test_string", true), "TestString");
468         assert_eq!(to_upper_camel_case("test_string", false), "Test_string");
469         assert_eq!(to_upper_camel_case("[Test]_String", true), "TestString");
470         assert_eq!(to_upper_camel_case("[Test]_String", false), "Test_string");
471         assert_eq!(
472             to_upper_camel_case("multiple_words_string", true),
473             "MultipleWordsString"
474         );
475     }
476 
mock_isa() -> ISA<'static>477     fn mock_isa() -> ISA<'static> {
478         let mut bitsets = IndexMap::new();
479         let mut enums = IndexMap::new();
480         let mut templates = IndexMap::new();
481 
482         // Add mock data for bitsets, enums, and templates
483         // Example for bitsets
484         bitsets.insert(
485             "bitset1",
486             Bitset {
487                 name: "bitset1",
488                 extends: None,
489                 meta: HashMap::from([("type", "alu"), ("has_dest", "true"), ("valid_srcs", "0")]),
490             },
491         );
492 
493         // Example for enums
494         enums.insert(
495             "enum1",
496             BitSetEnum {
497                 name: "enum1",
498                 values: vec![
499                     BitSetEnumValue {
500                         display: "val1",
501                         name: Some("val1_name"),
502                     },
503                     BitSetEnumValue {
504                         display: "val2",
505                         name: Some("val2_name"),
506                     },
507                 ],
508             },
509         );
510 
511         // Example for templates
512         templates.insert(
513             "INSTR_ALU",
514             BitsetTemplate {
515                 display: "{DST_FULL}{SAT}{COND}",
516             },
517         );
518 
519         ISA {
520             bitsets,
521             enums,
522             templates,
523         }
524     }
525 
526     #[test]
test_generate_peg_grammar_enums()527     fn test_generate_peg_grammar_enums() {
528         let isa = mock_isa();
529         let grammar = generate_peg_grammar_enums(&isa);
530         assert!(grammar.contains("Enum1 = { Val2 | Val1 }"));
531         assert!(grammar.contains("Val1 = { \"val1\" }"));
532         assert!(grammar.contains("Val2 = { \"val2\" }"));
533     }
534 
535     #[test]
test_generate_peg_grammar_instructions()536     fn test_generate_peg_grammar_instructions() {
537         let isa = mock_isa();
538         let grammar = generate_peg_grammar_instructions(&isa);
539         assert!(grammar.contains("instructions = _{ OpcBitset1 }"));
540         assert!(grammar.contains("OpcBitset1 = { \"bitset1\" ~ Dst_full? ~ Sat? ~ Cond? ~ Dest ~ \",\" ~ Src ~ \",\" ~ SrcVoid ~ \",\" ~ SrcVoid }"));
541     }
542 
543     #[test]
test_generate_peg_grammar()544     fn test_generate_peg_grammar() {
545         let isa = mock_isa();
546         let grammar = generate_peg_grammar(&isa);
547         assert!(grammar.contains("Enum1 = { Val2 | Val1 }"));
548         assert!(grammar.contains("instructions = _{ OpcBitset1 }"));
549         assert!(grammar.contains("OpcBitset1 = { \"bitset1\" ~ Dst_full? ~ Sat? ~ Cond? ~ Dest ~ \",\" ~ Src ~ \",\" ~ SrcVoid ~ \",\" ~ SrcVoid }"));
550     }
551 }
552