From f061fca9be62e41f7b25f39a70e8fd5b49f4f987 Mon Sep 17 00:00:00 2001
From: Henrique Lorenzi <hlorenzi12@gmail.com>
Date: Sun, 20 May 2018 18:44:45 -0300
Subject: [PATCH] add custom token defs; implement #2

---
 Cargo.toml                             |   2 +-
 README.md                              |  11 +--
 doc/cpudef.md                          |  37 ++++++++-
 src/asm/cpudef/cpudef.rs               | 101 ++++++++++++++++++++++---
 src/asm/cpudef/mod.rs                  |   2 +
 src/asm/cpudef/rule.rs                 |  16 +++-
 src/asm/cpudef/rule_pattern_matcher.rs |  77 +++++++++++++------
 src/driver.rs                          |  13 ++--
 src/expr/expression.rs                 |   9 +++
 src/test/asm.rs                        |  21 +++++
 10 files changed, 237 insertions(+), 52 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 8f77b5c0..8f0838fc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "customasm"
-version = "0.7.0"
+version = "0.8.0"
 authors = ["Henrique Lorenzi <hlorenzi12@gmail.com>"]
 
 [lib]
diff --git a/README.md b/README.md
index 0db482ac..5969d9d1 100644
--- a/README.md
+++ b/README.md
@@ -15,23 +15,16 @@ Also, [check out an example project](/examples/nes/) which targets the NES!
 You can compile from source by simply doing `cargo build`. There's also a
 battery of tests available at `cargo test`.
 
-## Upgrading from `v0.4`
-
-Starting from `v0.6`, if you don't want to `#include` a CPU file in the main assembly file,
-you can specify separate files to process in the same assembly session
-with the `-i` command line option. Just remember to enclose the old CPU definition in a
-`#cpudef` directive.
-
 ## Command Line Usage
 
 ```
-Usage: customasm [options] <asm-file>
+Usage: customasm [options] <asm-file-1> ... <asm-file-N>
 
 Options:
     -f, --format FORMAT The format of the output file. Possible formats:
                         binary, binstr, hexstr, bindump, hexdump
     -i, --include FILE  Specifies an additional file for processing before the
-                        main assembly.
+                        given <asm-files>.
     -o, --output FILE   The name of the output file.
     -p, --print         Print output to stdout instead of writing to a file.
     -q, --quiet         Suppress progress reports.
diff --git a/doc/cpudef.md b/doc/cpudef.md
index 40f12a31..8d968ab0 100644
--- a/doc/cpudef.md
+++ b/doc/cpudef.md
@@ -19,7 +19,7 @@ defines mnemonics for its instruction set.
 ## Configurations
 
 The syntax first expects a list of configuration directives, one per line.
-The currently available configuration is:
+The currently available configurations are:
 
 - `#align <bit_num>`  
 Sets the number of bits per byte for the target machine.  
@@ -32,6 +32,19 @@ Machine instructions must be aligned to a byte boundary,
 hence the directive's name. So, with 8-bit bytes, valid
 instruction sizes are 8 bits, 16 bits, 24 bits, and so on.
 
+- `#tokendef <name>`
+Creates a group of tokens with associated values, which can
+be used in place of arguments (e.g. for named registers).
+See below for usage in parameters. Syntax is as follows:
+```asm
+#tokendef reg
+{
+	a = 1
+	b = 2
+	c = 3
+}
+```
+
 ## Rules
 
 The first line not starting with a `#` begins the list of rules.  
@@ -48,6 +61,9 @@ The pattern part of a rule defines its mnemonic and/or parameter slots.
 The pattern is a sequence of tokens:  
 - For mnemonics, text, or punctuation: just write them out verbatim.
 - For parameter slots: write them as `{x}`, with `x` being any valid name.
+- For custom token groups declared with `#tokendef`, write them as `{x: name}`,
+with `name` being the name given at the `#tokendef` declaration (`reg` in the
+example above).
 
 ### Output
 
@@ -209,4 +225,23 @@ Rule | Used as | Output
     nop          -> 0b110
     halt         -> 0b111
 }
+```
+
+```asm
+#cpudef
+{
+    ; example with named registers
+	
+    #align 8
+	
+	#tokendef reg
+	{
+		r0 = 0
+		r1 = 1
+		r2 = 2
+		r3 = 3
+	}
+    
+	mov {dest: reg}, {value} -> 0b111100 @ dest[1:0] @ value[7:0]
+}
 ```
\ No newline at end of file
diff --git a/src/asm/cpudef/cpudef.rs b/src/asm/cpudef/cpudef.rs
index 60437d64..d3249abc 100644
--- a/src/asm/cpudef/cpudef.rs
+++ b/src/asm/cpudef/cpudef.rs
@@ -1,6 +1,8 @@
 use syntax::{Token, TokenKind, Parser};
-use expr::Expression;
-use asm::cpudef::{Rule, RulePatternMatcher};
+use expr::{Expression, ExpressionValue};
+use asm::cpudef::{Rule, RuleParameterType, RulePatternMatcher};
+use num::BigInt;
+use std::collections::HashMap;
 
 
 #[derive(Debug)]
@@ -8,7 +10,8 @@ pub struct CpuDef
 {
 	pub align: usize,
 	pub rules: Vec<Rule>,
-	pub pattern_matcher: RulePatternMatcher
+	pub pattern_matcher: RulePatternMatcher,
+	pub custom_token_defs: Vec<CustomTokenDef>
 }
 
 
@@ -17,7 +20,16 @@ struct CpuDefParser<'t>
 	parser: &'t mut Parser,
 	
 	align: Option<usize>,
-	rules: Vec<Rule>
+	rules: Vec<Rule>,
+	custom_token_defs: Vec<CustomTokenDef>
+}
+
+
+#[derive(Debug)]
+pub struct CustomTokenDef
+{
+	pub name: String,
+	pub excerpt_to_value_map: HashMap<String, ExpressionValue>
 }
 
 
@@ -29,7 +41,8 @@ impl CpuDef
 		{
 			parser: parser,
 			align: None,
-			rules: Vec::new()
+			rules: Vec::new(),
+			custom_token_defs: Vec::new()
 		};
 		
 		cpudef_parser.parse_directives()?;	
@@ -39,13 +52,14 @@ impl CpuDef
 		
 		cpudef_parser.parse_rules()?;
 		
-		let pattern_matcher = RulePatternMatcher::new(&cpudef_parser.rules);
+		let pattern_matcher = RulePatternMatcher::new(&cpudef_parser.rules, &cpudef_parser.custom_token_defs);
 		
 		let cpudef = CpuDef
 		{
 			align: cpudef_parser.align.unwrap(),
 			rules: cpudef_parser.rules,
-			pattern_matcher: pattern_matcher
+			pattern_matcher: pattern_matcher,
+			custom_token_defs: cpudef_parser.custom_token_defs
 		};
 		
 		Ok(cpudef)
@@ -63,6 +77,7 @@ impl<'t> CpuDefParser<'t>
 			match tk_name.excerpt.as_ref().unwrap().as_ref()
 			{
 				"align" => self.parse_directive_align(&tk_name)?,
+				"tokendef" => self.parse_directive_tokendef(&tk_name)?,
 				
 				_ => return Err(self.parser.report.error_span("unknown directive", &tk_name.span))
 			}
@@ -89,6 +104,53 @@ impl<'t> CpuDefParser<'t>
 		Ok(())
 	}
 	
+	
+	fn parse_directive_tokendef(&mut self, _tk_name: &Token) -> Result<(), ()>
+	{
+		let tk_defname = self.parser.expect(TokenKind::Identifier)?;
+		
+		let defname = tk_defname.excerpt.unwrap().clone();
+		
+		if self.custom_token_defs.iter().find(|def| def.name == defname).is_some()
+			{ return Err(self.parser.report.error_span("duplicate custom token def name", &tk_defname.span)); }
+		
+		let mut tokendef = CustomTokenDef
+		{
+			name: defname,
+			excerpt_to_value_map: HashMap::new()
+		};
+		
+		self.parser.expect(TokenKind::BraceOpen)?;
+		
+		while !self.parser.is_over() && !self.parser.next_is(0, TokenKind::BraceClose)
+		{
+			let tk_token = self.parser.expect(TokenKind::Identifier)?;
+			let token_excerpt = tk_token.excerpt.unwrap().clone();
+			
+			if tokendef.excerpt_to_value_map.contains_key(&token_excerpt)
+				{ return Err(self.parser.report.error_span("duplicate token in group", &tk_token.span)); }
+			
+			self.parser.expect(TokenKind::Equal)?;
+			let value = ExpressionValue::Integer(BigInt::from(self.parser.expect_usize()?.1));
+			
+			tokendef.excerpt_to_value_map.insert(token_excerpt, value);
+			
+			if self.parser.maybe_expect_linebreak().is_some()
+				{ continue; }
+				
+			if self.parser.next_is(0, TokenKind::BraceClose)
+				{ continue; }
+				
+			self.parser.expect(TokenKind::Comma)?;
+		}
+		
+		self.parser.expect(TokenKind::BraceClose)?;
+		
+		self.custom_token_defs.push(tokendef);
+		
+		Ok(())
+	}
+	
 
 	fn parse_rules(&mut self) -> Result<(), ()>
 	{
@@ -188,7 +250,28 @@ impl<'t> CpuDefParser<'t>
 		if rule.param_exists(&name)
 			{ return Err(self.parser.report.error_span("duplicate parameter name", &tk_name.span)); }
 			
-		rule.pattern_add_param(name);
+		let typ =
+			if self.parser.maybe_expect(TokenKind::Colon).is_some()
+			{
+				let tk_type = self.parser.expect(TokenKind::Identifier)?;
+				let typename = tk_type.excerpt.unwrap().clone();
+				
+				let mut tokendef_index = None;
+				for i in 0..self.custom_token_defs.len()
+				{
+					if typename == self.custom_token_defs[i].name
+						{ tokendef_index = Some(i); }
+				}
+				
+				if tokendef_index.is_none()
+					{ return Err(self.parser.report.error_span("unknown parameter type", &tk_type.span)); }
+						
+				RuleParameterType::CustomTokenDef(tokendef_index.unwrap())
+			}
+			else
+				{ RuleParameterType::Expression };
+			
+		rule.pattern_add_param(name, typ);
 		
 		self.parser.expect(TokenKind::BraceClose)?;
 		
@@ -207,7 +290,7 @@ impl<'t> CpuDefParser<'t>
 		};
 		
 		if width % self.align.unwrap() != 0
-			{ return Err(self.parser.report.error_span(format!("production (width = {}) does not align with a word boundary", width), &expr.span())); }
+			{ return Err(self.parser.report.error_span(format!("binary representation (width = {}) does not align with a word boundary", width), &expr.span())); }
 		
 		rule.production = expr;
 		
diff --git a/src/asm/cpudef/mod.rs b/src/asm/cpudef/mod.rs
index 2a41ca38..50862ed9 100644
--- a/src/asm/cpudef/mod.rs
+++ b/src/asm/cpudef/mod.rs
@@ -4,7 +4,9 @@ mod rule_pattern_matcher;
 
 
 pub use self::cpudef::CpuDef;
+pub use self::cpudef::CustomTokenDef;
 pub use self::rule::Rule;
 pub use self::rule::RulePatternPart;
 pub use self::rule::RuleParameter;
+pub use self::rule::RuleParameterType;
 pub use self::rule_pattern_matcher::RulePatternMatcher;
\ No newline at end of file
diff --git a/src/asm/cpudef/rule.rs b/src/asm/cpudef/rule.rs
index a20a57e6..1bb97a6f 100644
--- a/src/asm/cpudef/rule.rs
+++ b/src/asm/cpudef/rule.rs
@@ -20,10 +20,19 @@ pub enum RulePatternPart
 }
 
 
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum RuleParameterType
+{
+	Expression,
+	CustomTokenDef(usize)
+}
+
+
 #[derive(Debug)]
 pub struct RuleParameter
 {
-	pub name: String
+	pub name: String,
+	pub typ: RuleParameterType
 }
 
 
@@ -47,7 +56,7 @@ impl Rule
 	}
 	
 	
-	pub fn pattern_add_param<S>(&mut self, name: S)
+	pub fn pattern_add_param<S>(&mut self, name: S, typ: RuleParameterType)
 	where S: Into<String>
 	{
 		let name_owned = name.into();
@@ -58,7 +67,8 @@ impl Rule
 		
 		let param = RuleParameter
 		{
-			name: name_owned
+			name: name_owned,
+			typ: typ
 		};
 		
 		self.params.push(param);
diff --git a/src/asm/cpudef/rule_pattern_matcher.rs b/src/asm/cpudef/rule_pattern_matcher.rs
index 0f85b868..c19b7ad8 100644
--- a/src/asm/cpudef/rule_pattern_matcher.rs
+++ b/src/asm/cpudef/rule_pattern_matcher.rs
@@ -1,6 +1,6 @@
 use syntax::{TokenKind, Parser};
-use expr::Expression;
-use asm::cpudef::{Rule, RulePatternPart};
+use expr::{Expression, ExpressionValue};
+use asm::cpudef::{Rule, RuleParameterType, RulePatternPart, CustomTokenDef};
 use std::collections::HashMap;
 
 
@@ -15,10 +15,11 @@ pub struct RulePatternMatcher
 struct MatchStep
 {
 	rule_indices: Vec<usize>,
-	children_exact: HashMap<MatchStepExact, MatchStep>,
+	children_exact: HashMap<MatchStepExact, (Option<ExpressionValue>, MatchStep)>,
 	children_param: HashMap<MatchStepParameter, MatchStep>
 }
 
+
 #[derive(Debug, Eq, PartialEq, Hash)]
 struct MatchStepExact(TokenKind, Option<String>);
 
@@ -37,12 +38,12 @@ pub struct Match
 
 impl RulePatternMatcher
 {
-	pub fn new(rules: &[Rule]) -> RulePatternMatcher
+	pub fn new(rules: &[Rule], custom_token_defs: &Vec<CustomTokenDef>) -> RulePatternMatcher
 	{
 		let mut root_step = MatchStep::new();
 		
 		for i in 0..rules.len()
-			{ RulePatternMatcher::build_step(&mut root_step, &rules[i].pattern_parts, i); }
+			{ RulePatternMatcher::build_step(&mut root_step, &rules[i], &rules[i].pattern_parts, i, custom_token_defs); }
 		
 		
 		RulePatternMatcher
@@ -52,7 +53,7 @@ impl RulePatternMatcher
 	}
 	
 
-	fn build_step(step: &mut MatchStep, next_parts: &[RulePatternPart], rule_index: usize)
+	fn build_step(step: &mut MatchStep, rule: &Rule, next_parts: &[RulePatternPart], rule_index: usize, custom_token_defs: &Vec<CustomTokenDef>)
 	{
 		if next_parts.len() == 0
 		{
@@ -67,30 +68,52 @@ impl RulePatternMatcher
 			{
 				let step_kind = MatchStepExact(kind, excerpt.as_ref().map(|s| s.to_ascii_lowercase()));
 				
-				if let Some(next_step) = step.children_exact.get_mut(&step_kind)
+				if let Some(&mut (_, ref mut next_step)) = step.children_exact.get_mut(&step_kind)
 				{
-					RulePatternMatcher::build_step(next_step, &next_parts[1..], rule_index);
+					RulePatternMatcher::build_step(next_step, rule, &next_parts[1..], rule_index, custom_token_defs);
 					return;
 				}
 				
 				let mut next_step = MatchStep::new();
-				RulePatternMatcher::build_step(&mut next_step, &next_parts[1..], rule_index);
-				step.children_exact.insert(step_kind, next_step);
+				RulePatternMatcher::build_step(&mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs);
+				step.children_exact.insert(step_kind, (None, next_step));
 			}
 			
-			RulePatternPart::Parameter(_) =>
+			RulePatternPart::Parameter(param_index) =>
 			{
-				let step_kind = MatchStepParameter;
-				
-				if let Some(next_step) = step.children_param.get_mut(&step_kind)
+				if let RuleParameterType::CustomTokenDef(tokendef_index) = rule.params[param_index].typ
 				{
-					RulePatternMatcher::build_step(next_step, &next_parts[1..], rule_index);
-					return;
+					let custom_token_def = &custom_token_defs[tokendef_index];
+					
+					for (excerpt, value) in &custom_token_def.excerpt_to_value_map
+					{
+						let step_kind = MatchStepExact(TokenKind::Identifier, Some(excerpt.to_ascii_lowercase()));
+						
+						if let Some(&mut (_, ref mut next_step)) = step.children_exact.get_mut(&step_kind)
+						{
+							RulePatternMatcher::build_step(next_step, rule, &next_parts[1..], rule_index, custom_token_defs);
+							return;
+						}
+						
+						let mut next_step = MatchStep::new();
+						RulePatternMatcher::build_step(&mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs);
+						step.children_exact.insert(step_kind, (Some(value.clone()), next_step));
+					}
+				}
+				else
+				{			
+					let step_kind = MatchStepParameter;
+					
+					if let Some(next_step) = step.children_param.get_mut(&step_kind)
+					{
+						RulePatternMatcher::build_step(next_step, rule, &next_parts[1..], rule_index, custom_token_defs);
+						return;
+					}
+					
+					let mut next_step = MatchStep::new();
+					RulePatternMatcher::build_step(&mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs);
+					step.children_param.insert(step_kind, next_step);
 				}
-				
-				let mut next_step = MatchStep::new();
-				RulePatternMatcher::build_step(&mut next_step, &next_parts[1..], rule_index);
-				step.children_param.insert(step_kind, next_step);
 			}
 		}
 	}
@@ -129,10 +152,18 @@ impl RulePatternMatcher
 			
 			let step_exact = MatchStepExact(tk.kind, tk.excerpt.map(|s| s.to_ascii_lowercase()));
 			
-			if let Some(next_step) = step.children_exact.get(&step_exact)
+			if let Some(&(ref value, ref next_step)) = step.children_exact.get(&step_exact)
 			{
-				if let Some(result) = self.parse_match_step(parser, &next_step, exprs)
-					{ return Some(result); }
+				if value.is_some()
+					{ exprs.push(value.as_ref().unwrap().make_literal()); }
+				
+				if let Some(result) = self.parse_match_step(parser, next_step, exprs)
+				{
+					return Some(result);
+				}
+				
+				if value.is_some()
+					{ exprs.pop(); }
 			}
 			
 			parser.restore(parser_state);
diff --git a/src/driver.rs b/src/driver.rs
index d01835db..62ff7154 100644
--- a/src/driver.rs
+++ b/src/driver.rs
@@ -77,17 +77,17 @@ fn drive_inner(report: RcReport, opts: &getopts::Options, args: &Vec<String>, fi
 		}
 	};
 	
-	if matches.free.len() != 1
+	if matches.free.len() < 1
 		{ return Err(true); }
 	
-	let asm_file = matches.free[0].clone();
+	let main_asm_file = matches.free[0].clone();
 	
 	let output_file = match matches.opt_str("o")
 	{
 		Some(f) => f,
 		None =>
 		{
-			match get_default_output_filename(report.clone(), &asm_file)
+			match get_default_output_filename(report.clone(), &main_asm_file)
 			{
 				Ok(f) => f,
 				Err(_) => return Err(true)
@@ -96,7 +96,8 @@ fn drive_inner(report: RcReport, opts: &getopts::Options, args: &Vec<String>, fi
 	};
 	
 	let mut filenames = matches.opt_strs("i");
-	filenames.push(asm_file);
+	for filename in matches.free
+		{ filenames.push(filename); }
 	
 	let assembled = assemble(report.clone(), fileserver, &filenames, quiet).map_err(|_| false)?;
 	
@@ -136,7 +137,7 @@ fn make_opts() -> getopts::Options
 {
     let mut opts = getopts::Options::new();
     opts.optopt("f", "format", "The format of the output file. Possible formats: binary, binstr, hexstr, bindump, hexdump", "FORMAT");
-    opts.optmulti("i", "include", "Specifies an additional file for processing before the main assembly.", "FILE");
+    opts.optmulti("i", "include", "Specifies an additional file for processing before the given <asm-files>.", "FILE");
     opts.optopt("o", "output", "The name of the output file.", "FILE");
     opts.optflag("p", "print", "Print output to stdout instead of writing to a file.");
     opts.optflag("q", "quiet", "Suppress progress reports.");
@@ -159,7 +160,7 @@ fn parse_opts(report: RcReport, opts: &getopts::Options, args: &Vec<String>) ->
 
 fn print_usage(opts: &getopts::Options)
 {
-	println!("{}", opts.usage(&format!("Usage: {} [options] <asm-file>", env!("CARGO_PKG_NAME"))));
+	println!("{}", opts.usage(&format!("Usage: {} [options] <asm-file-1> ... <asm-file-N>", env!("CARGO_PKG_NAME"))));
 }
 
 
diff --git a/src/expr/expression.rs b/src/expr/expression.rs
index e742f899..f4cf6db5 100644
--- a/src/expr/expression.rs
+++ b/src/expr/expression.rs
@@ -70,4 +70,13 @@ impl Expression
 			&Expression::Call     (ref span, ..) => span.clone()
 		}
 	}
+}
+
+
+impl ExpressionValue
+{
+	pub fn make_literal(&self) -> Expression
+	{
+		Expression::Literal(Span::new_dummy(), self.clone())
+	}
 }
\ No newline at end of file
diff --git a/src/test/asm.rs b/src/test/asm.rs
index 73c99645..9c53e792 100644
--- a/src/test/asm.rs
+++ b/src/test/asm.rs
@@ -143,6 +143,27 @@ fn test_parameters()
 }
 
 
+#[test]
+fn test_tokendef()
+{
+	test("#tokendef reg { r1 = 1    } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Pass((4, "ff01")));
+	test("#tokendef reg { r1 = 0xbc } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Pass((4, "ffbc")));
+	
+	test("#tokendef reg { r1 = 1, r2 = 2 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1 \n mov r2", Pass((4, "ff01ff02")));
+	
+	test("#tokendef reg1 { r1 = 1 } \n #tokendef reg2 { r1 = 2 } \n mov1 {a: reg1} -> 0xff @ a[7:0] \n mov2 {a: reg2} -> 0xee @ a[7:0]", "mov1 r1 \n mov2 r1", Pass((4, "ff01ee02")));
+	
+	test("#tokendef reg1 { r1 = 1 } \n #tokendef reg2 { r1 = 2 } \n mov {a: reg1} -> 0xff @ a[7:0] \n mov {a: reg2} -> 0xee @ a[7:0]", "mov r1 \n mov r1", Pass((4, "ff01ff01")));
+	test("#tokendef reg1 { r1 = 1 } \n #tokendef reg2 { r2 = 2 } \n mov {a: reg1} -> 0xff @ a[7:0] \n mov {a: reg2} -> 0xee @ a[7:0]", "mov r1 \n mov r2", Pass((4, "ff01ee02")));
+	
+	test("#tokendef reg { r1 = 0xbc } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r2", Fail(("asm", 1, "no match")));
+	
+	test("#tokendef reg { r1 = 1, r1 = 2 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Fail(("cpu", 1, "duplicate token")));
+	test("#tokendef 123 { r1 = 1, r2 = 2 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Fail(("cpu", 1, "identifier")));
+	test("#tokendef reg { r1 = 1 } \n #tokendef reg { r2 = 1 } \n mov {a: reg} -> 0xff @ a[7:0]", "mov r1", Fail(("cpu", 2, "duplicate custom token")));
+}
+
+
 #[test]
 fn test_assertions()
 {