From f061fca9be62e41f7b25f39a70e8fd5b49f4f987 Mon Sep 17 00:00:00 2001 From: Henrique Lorenzi Date: Sun, 20 May 2018 18:44:45 -0300 Subject: [PATCH] add custom token defs; implement #2 --- Cargo.toml | 2 +- README.md | 11 +-- doc/cpudef.md | 37 ++++++++- src/asm/cpudef/cpudef.rs | 101 ++++++++++++++++++++++--- src/asm/cpudef/mod.rs | 2 + src/asm/cpudef/rule.rs | 16 +++- src/asm/cpudef/rule_pattern_matcher.rs | 77 +++++++++++++------ src/driver.rs | 13 ++-- src/expr/expression.rs | 9 +++ src/test/asm.rs | 21 +++++ 10 files changed, 237 insertions(+), 52 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8f77b5c0..8f0838fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "customasm" -version = "0.7.0" +version = "0.8.0" authors = ["Henrique Lorenzi "] [lib] diff --git a/README.md b/README.md index 0db482ac..5969d9d1 100644 --- a/README.md +++ b/README.md @@ -15,23 +15,16 @@ Also, [check out an example project](/examples/nes/) which targets the NES! You can compile from source by simply doing `cargo build`. There's also a battery of tests available at `cargo test`. -## Upgrading from `v0.4` - -Starting from `v0.6`, if you don't want to `#include` a CPU file in the main assembly file, -you can specify separate files to process in the same assembly session -with the `-i` command line option. Just remember to enclose the old CPU definition in a -`#cpudef` directive. - ## Command Line Usage ``` -Usage: customasm [options] +Usage: customasm [options] ... Options: -f, --format FORMAT The format of the output file. Possible formats: binary, binstr, hexstr, bindump, hexdump -i, --include FILE Specifies an additional file for processing before the - main assembly. + given . -o, --output FILE The name of the output file. -p, --print Print output to stdout instead of writing to a file. -q, --quiet Suppress progress reports. diff --git a/doc/cpudef.md b/doc/cpudef.md index 40f12a31..8d968ab0 100644 --- a/doc/cpudef.md +++ b/doc/cpudef.md @@ -19,7 +19,7 @@ defines mnemonics for its instruction set. ## Configurations The syntax first expects a list of configuration directives, one per line. -The currently available configuration is: +The currently available configurations are: - `#align ` Sets the number of bits per byte for the target machine. @@ -32,6 +32,19 @@ Machine instructions must be aligned to a byte boundary, hence the directive's name. So, with 8-bit bytes, valid instruction sizes are 8 bits, 16 bits, 24 bits, and so on. +- `#tokendef ` +Creates a group of tokens with associated values, which can +be used in place of arguments (e.g. for named registers). +See below for usage in parameters. Syntax is as follows: +```asm +#tokendef reg +{ + a = 1 + b = 2 + c = 3 +} +``` + ## Rules The first line not starting with a `#` begins the list of rules. @@ -48,6 +61,9 @@ The pattern part of a rule defines its mnemonic and/or parameter slots. The pattern is a sequence of tokens: - For mnemonics, text, or punctuation: just write them out verbatim. - For parameter slots: write them as `{x}`, with `x` being any valid name. +- For custom token groups declared with `#tokendef`, write them as `{x: name}`, +with `name` being the name given at the `#tokendef` declaration (`reg` in the +example above). ### Output @@ -209,4 +225,23 @@ Rule | Used as | Output nop -> 0b110 halt -> 0b111 } +``` + +```asm +#cpudef +{ + ; example with named registers + + #align 8 + + #tokendef reg + { + r0 = 0 + r1 = 1 + r2 = 2 + r3 = 3 + } + + mov {dest: reg}, {value} -> 0b111100 @ dest[1:0] @ value[7:0] +} ``` \ No newline at end of file diff --git a/src/asm/cpudef/cpudef.rs b/src/asm/cpudef/cpudef.rs index 60437d64..d3249abc 100644 --- a/src/asm/cpudef/cpudef.rs +++ b/src/asm/cpudef/cpudef.rs @@ -1,6 +1,8 @@ use syntax::{Token, TokenKind, Parser}; -use expr::Expression; -use asm::cpudef::{Rule, RulePatternMatcher}; +use expr::{Expression, ExpressionValue}; +use asm::cpudef::{Rule, RuleParameterType, RulePatternMatcher}; +use num::BigInt; +use std::collections::HashMap; #[derive(Debug)] @@ -8,7 +10,8 @@ pub struct CpuDef { pub align: usize, pub rules: Vec, - pub pattern_matcher: RulePatternMatcher + pub pattern_matcher: RulePatternMatcher, + pub custom_token_defs: Vec } @@ -17,7 +20,16 @@ struct CpuDefParser<'t> parser: &'t mut Parser, align: Option, - rules: Vec + rules: Vec, + custom_token_defs: Vec +} + + +#[derive(Debug)] +pub struct CustomTokenDef +{ + pub name: String, + pub excerpt_to_value_map: HashMap } @@ -29,7 +41,8 @@ impl CpuDef { parser: parser, align: None, - rules: Vec::new() + rules: Vec::new(), + custom_token_defs: Vec::new() }; cpudef_parser.parse_directives()?; @@ -39,13 +52,14 @@ impl CpuDef cpudef_parser.parse_rules()?; - let pattern_matcher = RulePatternMatcher::new(&cpudef_parser.rules); + let pattern_matcher = RulePatternMatcher::new(&cpudef_parser.rules, &cpudef_parser.custom_token_defs); let cpudef = CpuDef { align: cpudef_parser.align.unwrap(), rules: cpudef_parser.rules, - pattern_matcher: pattern_matcher + pattern_matcher: pattern_matcher, + custom_token_defs: cpudef_parser.custom_token_defs }; Ok(cpudef) @@ -63,6 +77,7 @@ impl<'t> CpuDefParser<'t> match tk_name.excerpt.as_ref().unwrap().as_ref() { "align" => self.parse_directive_align(&tk_name)?, + "tokendef" => self.parse_directive_tokendef(&tk_name)?, _ => return Err(self.parser.report.error_span("unknown directive", &tk_name.span)) } @@ -89,6 +104,53 @@ impl<'t> CpuDefParser<'t> Ok(()) } + + fn parse_directive_tokendef(&mut self, _tk_name: &Token) -> Result<(), ()> + { + let tk_defname = self.parser.expect(TokenKind::Identifier)?; + + let defname = tk_defname.excerpt.unwrap().clone(); + + if self.custom_token_defs.iter().find(|def| def.name == defname).is_some() + { return Err(self.parser.report.error_span("duplicate custom token def name", &tk_defname.span)); } + + let mut tokendef = CustomTokenDef + { + name: defname, + excerpt_to_value_map: HashMap::new() + }; + + self.parser.expect(TokenKind::BraceOpen)?; + + while !self.parser.is_over() && !self.parser.next_is(0, TokenKind::BraceClose) + { + let tk_token = self.parser.expect(TokenKind::Identifier)?; + let token_excerpt = tk_token.excerpt.unwrap().clone(); + + if tokendef.excerpt_to_value_map.contains_key(&token_excerpt) + { return Err(self.parser.report.error_span("duplicate token in group", &tk_token.span)); } + + self.parser.expect(TokenKind::Equal)?; + let value = ExpressionValue::Integer(BigInt::from(self.parser.expect_usize()?.1)); + + tokendef.excerpt_to_value_map.insert(token_excerpt, value); + + if self.parser.maybe_expect_linebreak().is_some() + { continue; } + + if self.parser.next_is(0, TokenKind::BraceClose) + { continue; } + + self.parser.expect(TokenKind::Comma)?; + } + + self.parser.expect(TokenKind::BraceClose)?; + + self.custom_token_defs.push(tokendef); + + Ok(()) + } + fn parse_rules(&mut self) -> Result<(), ()> { @@ -188,7 +250,28 @@ impl<'t> CpuDefParser<'t> if rule.param_exists(&name) { return Err(self.parser.report.error_span("duplicate parameter name", &tk_name.span)); } - rule.pattern_add_param(name); + let typ = + if self.parser.maybe_expect(TokenKind::Colon).is_some() + { + let tk_type = self.parser.expect(TokenKind::Identifier)?; + let typename = tk_type.excerpt.unwrap().clone(); + + let mut tokendef_index = None; + for i in 0..self.custom_token_defs.len() + { + if typename == self.custom_token_defs[i].name + { tokendef_index = Some(i); } + } + + if tokendef_index.is_none() + { return Err(self.parser.report.error_span("unknown parameter type", &tk_type.span)); } + + RuleParameterType::CustomTokenDef(tokendef_index.unwrap()) + } + else + { RuleParameterType::Expression }; + + rule.pattern_add_param(name, typ); self.parser.expect(TokenKind::BraceClose)?; @@ -207,7 +290,7 @@ impl<'t> CpuDefParser<'t> }; if width % self.align.unwrap() != 0 - { return Err(self.parser.report.error_span(format!("production (width = {}) does not align with a word boundary", width), &expr.span())); } + { return Err(self.parser.report.error_span(format!("binary representation (width = {}) does not align with a word boundary", width), &expr.span())); } rule.production = expr; diff --git a/src/asm/cpudef/mod.rs b/src/asm/cpudef/mod.rs index 2a41ca38..50862ed9 100644 --- a/src/asm/cpudef/mod.rs +++ b/src/asm/cpudef/mod.rs @@ -4,7 +4,9 @@ mod rule_pattern_matcher; pub use self::cpudef::CpuDef; +pub use self::cpudef::CustomTokenDef; pub use self::rule::Rule; pub use self::rule::RulePatternPart; pub use self::rule::RuleParameter; +pub use self::rule::RuleParameterType; pub use self::rule_pattern_matcher::RulePatternMatcher; \ No newline at end of file diff --git a/src/asm/cpudef/rule.rs b/src/asm/cpudef/rule.rs index a20a57e6..1bb97a6f 100644 --- a/src/asm/cpudef/rule.rs +++ b/src/asm/cpudef/rule.rs @@ -20,10 +20,19 @@ pub enum RulePatternPart } +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum RuleParameterType +{ + Expression, + CustomTokenDef(usize) +} + + #[derive(Debug)] pub struct RuleParameter { - pub name: String + pub name: String, + pub typ: RuleParameterType } @@ -47,7 +56,7 @@ impl Rule } - pub fn pattern_add_param(&mut self, name: S) + pub fn pattern_add_param(&mut self, name: S, typ: RuleParameterType) where S: Into { let name_owned = name.into(); @@ -58,7 +67,8 @@ impl Rule let param = RuleParameter { - name: name_owned + name: name_owned, + typ: typ }; self.params.push(param); diff --git a/src/asm/cpudef/rule_pattern_matcher.rs b/src/asm/cpudef/rule_pattern_matcher.rs index 0f85b868..c19b7ad8 100644 --- a/src/asm/cpudef/rule_pattern_matcher.rs +++ b/src/asm/cpudef/rule_pattern_matcher.rs @@ -1,6 +1,6 @@ use syntax::{TokenKind, Parser}; -use expr::Expression; -use asm::cpudef::{Rule, RulePatternPart}; +use expr::{Expression, ExpressionValue}; +use asm::cpudef::{Rule, RuleParameterType, RulePatternPart, CustomTokenDef}; use std::collections::HashMap; @@ -15,10 +15,11 @@ pub struct RulePatternMatcher struct MatchStep { rule_indices: Vec, - children_exact: HashMap, + children_exact: HashMap, MatchStep)>, children_param: HashMap } + #[derive(Debug, Eq, PartialEq, Hash)] struct MatchStepExact(TokenKind, Option); @@ -37,12 +38,12 @@ pub struct Match impl RulePatternMatcher { - pub fn new(rules: &[Rule]) -> RulePatternMatcher + pub fn new(rules: &[Rule], custom_token_defs: &Vec) -> RulePatternMatcher { let mut root_step = MatchStep::new(); for i in 0..rules.len() - { RulePatternMatcher::build_step(&mut root_step, &rules[i].pattern_parts, i); } + { RulePatternMatcher::build_step(&mut root_step, &rules[i], &rules[i].pattern_parts, i, custom_token_defs); } RulePatternMatcher @@ -52,7 +53,7 @@ impl RulePatternMatcher } - fn build_step(step: &mut MatchStep, next_parts: &[RulePatternPart], rule_index: usize) + fn build_step(step: &mut MatchStep, rule: &Rule, next_parts: &[RulePatternPart], rule_index: usize, custom_token_defs: &Vec) { if next_parts.len() == 0 { @@ -67,30 +68,52 @@ impl RulePatternMatcher { let step_kind = MatchStepExact(kind, excerpt.as_ref().map(|s| s.to_ascii_lowercase())); - if let Some(next_step) = step.children_exact.get_mut(&step_kind) + if let Some(&mut (_, ref mut next_step)) = step.children_exact.get_mut(&step_kind) { - RulePatternMatcher::build_step(next_step, &next_parts[1..], rule_index); + RulePatternMatcher::build_step(next_step, rule, &next_parts[1..], rule_index, custom_token_defs); return; } let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(&mut next_step, &next_parts[1..], rule_index); - step.children_exact.insert(step_kind, next_step); + RulePatternMatcher::build_step(&mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs); + step.children_exact.insert(step_kind, (None, next_step)); } - RulePatternPart::Parameter(_) => + RulePatternPart::Parameter(param_index) => { - let step_kind = MatchStepParameter; - - if let Some(next_step) = step.children_param.get_mut(&step_kind) + if let RuleParameterType::CustomTokenDef(tokendef_index) = rule.params[param_index].typ { - RulePatternMatcher::build_step(next_step, &next_parts[1..], rule_index); - return; + let custom_token_def = &custom_token_defs[tokendef_index]; + + for (excerpt, value) in &custom_token_def.excerpt_to_value_map + { + let step_kind = MatchStepExact(TokenKind::Identifier, Some(excerpt.to_ascii_lowercase())); + + if let Some(&mut (_, ref mut next_step)) = step.children_exact.get_mut(&step_kind) + { + RulePatternMatcher::build_step(next_step, rule, &next_parts[1..], rule_index, custom_token_defs); + return; + } + + let mut next_step = MatchStep::new(); + RulePatternMatcher::build_step(&mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs); + step.children_exact.insert(step_kind, (Some(value.clone()), next_step)); + } + } + else + { + let step_kind = MatchStepParameter; + + if let Some(next_step) = step.children_param.get_mut(&step_kind) + { + RulePatternMatcher::build_step(next_step, rule, &next_parts[1..], rule_index, custom_token_defs); + return; + } + + let mut next_step = MatchStep::new(); + RulePatternMatcher::build_step(&mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs); + step.children_param.insert(step_kind, next_step); } - - let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(&mut next_step, &next_parts[1..], rule_index); - step.children_param.insert(step_kind, next_step); } } } @@ -129,10 +152,18 @@ impl RulePatternMatcher let step_exact = MatchStepExact(tk.kind, tk.excerpt.map(|s| s.to_ascii_lowercase())); - if let Some(next_step) = step.children_exact.get(&step_exact) + if let Some(&(ref value, ref next_step)) = step.children_exact.get(&step_exact) { - if let Some(result) = self.parse_match_step(parser, &next_step, exprs) - { return Some(result); } + if value.is_some() + { exprs.push(value.as_ref().unwrap().make_literal()); } + + if let Some(result) = self.parse_match_step(parser, next_step, exprs) + { + return Some(result); + } + + if value.is_some() + { exprs.pop(); } } parser.restore(parser_state); diff --git a/src/driver.rs b/src/driver.rs index d01835db..62ff7154 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -77,17 +77,17 @@ fn drive_inner(report: RcReport, opts: &getopts::Options, args: &Vec, fi } }; - if matches.free.len() != 1 + if matches.free.len() < 1 { return Err(true); } - let asm_file = matches.free[0].clone(); + let main_asm_file = matches.free[0].clone(); let output_file = match matches.opt_str("o") { Some(f) => f, None => { - match get_default_output_filename(report.clone(), &asm_file) + match get_default_output_filename(report.clone(), &main_asm_file) { Ok(f) => f, Err(_) => return Err(true) @@ -96,7 +96,8 @@ fn drive_inner(report: RcReport, opts: &getopts::Options, args: &Vec, fi }; let mut filenames = matches.opt_strs("i"); - filenames.push(asm_file); + for filename in matches.free + { filenames.push(filename); } let assembled = assemble(report.clone(), fileserver, &filenames, quiet).map_err(|_| false)?; @@ -136,7 +137,7 @@ fn make_opts() -> getopts::Options { let mut opts = getopts::Options::new(); opts.optopt("f", "format", "The format of the output file. Possible formats: binary, binstr, hexstr, bindump, hexdump", "FORMAT"); - opts.optmulti("i", "include", "Specifies an additional file for processing before the main assembly.", "FILE"); + opts.optmulti("i", "include", "Specifies an additional file for processing before the given .", "FILE"); opts.optopt("o", "output", "The name of the output file.", "FILE"); opts.optflag("p", "print", "Print output to stdout instead of writing to a file."); opts.optflag("q", "quiet", "Suppress progress reports."); @@ -159,7 +160,7 @@ fn parse_opts(report: RcReport, opts: &getopts::Options, args: &Vec) -> fn print_usage(opts: &getopts::Options) { - println!("{}", opts.usage(&format!("Usage: {} [options] ", env!("CARGO_PKG_NAME")))); + println!("{}", opts.usage(&format!("Usage: {} [options] ... ", env!("CARGO_PKG_NAME")))); } diff --git a/src/expr/expression.rs b/src/expr/expression.rs index e742f899..f4cf6db5 100644 --- a/src/expr/expression.rs +++ b/src/expr/expression.rs @@ -70,4 +70,13 @@ impl Expression &Expression::Call (ref span, ..) => span.clone() } } +} + + +impl ExpressionValue +{ + pub fn make_literal(&self) -> Expression + { + Expression::Literal(Span::new_dummy(), self.clone()) + } } \ No newline at end of file diff --git a/src/test/asm.rs b/src/test/asm.rs index 73c99645..9c53e792 100644 --- a/src/test/asm.rs +++ b/src/test/asm.rs @@ -143,6 +143,27 @@ fn test_parameters() } +#[test] +fn test_tokendef() +{ + test("#tokendef reg { r1 = 1 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Pass((4, "ff01"))); + test("#tokendef reg { r1 = 0xbc } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Pass((4, "ffbc"))); + + test("#tokendef reg { r1 = 1, r2 = 2 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1 \n mov r2", Pass((4, "ff01ff02"))); + + test("#tokendef reg1 { r1 = 1 } \n #tokendef reg2 { r1 = 2 } \n mov1 {a: reg1} -> 0xff @ a[7:0] \n mov2 {a: reg2} -> 0xee @ a[7:0]", "mov1 r1 \n mov2 r1", Pass((4, "ff01ee02"))); + + test("#tokendef reg1 { r1 = 1 } \n #tokendef reg2 { r1 = 2 } \n mov {a: reg1} -> 0xff @ a[7:0] \n mov {a: reg2} -> 0xee @ a[7:0]", "mov r1 \n mov r1", Pass((4, "ff01ff01"))); + test("#tokendef reg1 { r1 = 1 } \n #tokendef reg2 { r2 = 2 } \n mov {a: reg1} -> 0xff @ a[7:0] \n mov {a: reg2} -> 0xee @ a[7:0]", "mov r1 \n mov r2", Pass((4, "ff01ee02"))); + + test("#tokendef reg { r1 = 0xbc } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r2", Fail(("asm", 1, "no match"))); + + test("#tokendef reg { r1 = 1, r1 = 2 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Fail(("cpu", 1, "duplicate token"))); + test("#tokendef 123 { r1 = 1, r2 = 2 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Fail(("cpu", 1, "identifier"))); + test("#tokendef reg { r1 = 1 } \n #tokendef reg { r2 = 1 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Fail(("cpu", 2, "duplicate custom token"))); +} + + #[test] fn test_assertions() {