diff --git a/Cargo.toml b/Cargo.toml index eb4fcc50..dc40b83a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "customasm" -version = "0.10.3" +version = "0.10.4" edition = "2018" authors = ["Henrique Lorenzi "] diff --git a/src/asm/cpudef/rule.rs b/src/asm/cpudef/rule.rs index 846d8050..e9e0c9b3 100644 --- a/src/asm/cpudef/rule.rs +++ b/src/asm/cpudef/rule.rs @@ -1,5 +1,5 @@ use crate::diagn::Span; -use crate::syntax::{Token, TokenKind}; +use crate::syntax::Token; use crate::expr::{Expression, ExpressionValue}; @@ -16,7 +16,7 @@ pub struct Rule #[derive(Debug, Eq, PartialEq, Hash)] pub enum RulePatternPart { - Exact(TokenKind, Option), + Exact(char), Parameter(usize) } @@ -53,8 +53,11 @@ impl Rule pub fn pattern_add_exact(&mut self, token: &Token) { - let part = RulePatternPart::Exact(token.kind, token.excerpt.clone()); - self.pattern_parts.push(part); + for c in token.text().chars() + { + let part = RulePatternPart::Exact(c); + self.pattern_parts.push(part); + } } diff --git a/src/asm/cpudef/rule_pattern_matcher.rs b/src/asm/cpudef/rule_pattern_matcher.rs index cce6f9e1..60bfa0fc 100644 --- a/src/asm/cpudef/rule_pattern_matcher.rs +++ b/src/asm/cpudef/rule_pattern_matcher.rs @@ -1,5 +1,5 @@ use crate::diagn::RcReport; -use crate::syntax::{TokenKind, Parser}; +use crate::syntax::Parser; use crate::expr::{Expression, ExpressionValue}; use crate::asm::cpudef::{Rule, RuleParameterType, RulePatternPart, CustomTokenDef}; use std::collections::HashMap; @@ -16,19 +16,11 @@ pub struct RulePatternMatcher struct MatchStep { rule_indices: Vec, - children_exact: HashMap, MatchStep)>>, - children_param: HashMap + children_exact: HashMap, Box)>>, + children_param: Option> } -#[derive(Debug, Eq, PartialEq, Hash)] -struct MatchStepExact(TokenKind, Option); - - -#[derive(Debug, Eq, PartialEq, Hash)] -struct MatchStepParameter; - - #[derive(Debug)] pub struct Match { @@ -44,8 +36,7 @@ impl RulePatternMatcher let mut root_step = MatchStep::new(); for i in 0..rules.len() - { RulePatternMatcher::build_step(report.clone(), &mut root_step, &rules[i], &rules[i].pattern_parts, i, custom_token_defs)?; } - + { RulePatternMatcher::add_tree_step(report.clone(), &mut root_step, &rules[i], &rules[i].pattern_parts, i, custom_token_defs)?; } Ok(RulePatternMatcher { @@ -54,7 +45,7 @@ impl RulePatternMatcher } - fn build_step(report: RcReport, step: &mut MatchStep, rule: &Rule, next_parts: &[RulePatternPart], rule_index: usize, custom_token_defs: &Vec) -> Result<(), ()> + fn add_tree_step(report: RcReport, step: &mut MatchStep, rule: &Rule, next_parts: &[RulePatternPart], rule_index: usize, custom_token_defs: &Vec) -> Result<(), ()> { if next_parts.len() == 0 { @@ -65,31 +56,10 @@ impl RulePatternMatcher match next_parts[0] { - RulePatternPart::Exact(kind, ref excerpt) => + RulePatternPart::Exact(c) => { - let step_kind = MatchStepExact(kind, excerpt.as_ref().map(|s| s.to_ascii_lowercase())); - - { - let mut maybe_next_steps = step.children_exact.get_mut(&step_kind); - - if let Some(ref mut next_steps) = maybe_next_steps - { - if let Some(&mut (_, ref mut next_step)) = next_steps.iter_mut().find(|s| s.0 == None) - { - RulePatternMatcher::build_step(report.clone(), next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - return Ok(()); - } - - let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(report.clone(), &mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - next_steps.push((None, next_step)); - return Ok(()); - } - } - - let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(report.clone(), &mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - step.children_exact.insert(step_kind, vec![(None, next_step)]); + let next_step = Self::make_step_exact(step, c.to_ascii_lowercase(), None); + RulePatternMatcher::add_tree_step(report.clone(), next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; } RulePatternPart::Parameter(param_index) => @@ -100,49 +70,73 @@ impl RulePatternMatcher for (excerpt, value) in &custom_token_def.excerpt_to_value_map { - let step_kind = MatchStepExact(TokenKind::Identifier, Some(excerpt.to_ascii_lowercase())); - + let chars = excerpt.chars().collect::>(); + let mut next_step = Self::make_step_exact( + step, + chars[0].to_ascii_lowercase(), + Some(value.clone())); + + if chars.len() > 1 { - let mut maybe_next_steps = step.children_exact.get_mut(&step_kind); - - if let Some(ref mut next_steps) = maybe_next_steps + for c in &chars[1..chars.len()] { - if let Some(&mut (_, ref mut next_step)) = next_steps.iter_mut().find(|s| s.0.as_ref() == Some(value)) - { - RulePatternMatcher::build_step(report.clone(), next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - continue; - } - - let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(report.clone(), &mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - next_steps.push((Some(value.clone()), next_step)); - continue; + next_step = Self::make_step_exact( + next_step, + c.to_ascii_lowercase(), + None); } } - - let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(report.clone(), &mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - step.children_exact.insert(step_kind, vec![(Some(value.clone()), next_step)]); + + RulePatternMatcher::add_tree_step(report.clone(), next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; } } else { - let step_kind = MatchStepParameter; - - if let Some(next_step) = step.children_param.get_mut(&step_kind) - { return RulePatternMatcher::build_step(report.clone(), next_step, rule, &next_parts[1..], rule_index, custom_token_defs); } - - let mut next_step = MatchStep::new(); - RulePatternMatcher::build_step(report.clone(), &mut next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; - step.children_param.insert(step_kind, next_step); + let next_step = Self::make_step_param(step); + RulePatternMatcher::add_tree_step(report.clone(), next_step, rule, &next_parts[1..], rule_index, custom_token_defs)?; } } } return Ok(()); } - - + + + fn make_step_exact(step: &mut MatchStep, c: char, value: Option) -> &mut MatchStep + { + if !step.children_exact.contains_key(&c) + { + step.children_exact.insert(c, vec![(value, Box::new(MatchStep::new()))]); + return &mut step.children_exact.get_mut(&c).unwrap().last_mut().unwrap().1; + } + + let next_steps = step.children_exact.get_mut(&c).unwrap(); + + if next_steps.iter_mut().find(|s| s.0 == value).is_none() + { + next_steps.push((value, Box::new(MatchStep::new()))); + return &mut next_steps.last_mut().unwrap().1; + } + + &mut next_steps.iter_mut().find(|s| s.0 == value).unwrap().1 + } + + + fn make_step_param(step: &mut MatchStep) -> &mut MatchStep + { + if step.children_param.is_none() + { + let next_step = MatchStep::new(); + step.children_param = Some(Box::new(next_step)); + step.children_param.as_mut().unwrap() + } + else + { + step.children_param.as_mut().unwrap() + } + } + + pub fn parse_match(&self, parser: &mut Parser) -> Option { let mut exprs = Vec::new(); @@ -172,11 +166,9 @@ impl RulePatternMatcher // Try to match fixed tokens first, if some rule accepts that. let parser_state = parser.save(); - let tk = parser.advance(); - - let step_exact = MatchStepExact(tk.kind, tk.excerpt.map(|s| s.to_ascii_lowercase())); + let c = parser.advance_partial(); - if let Some(ref next_steps) = step.children_exact.get(&step_exact) + if let Some(ref next_steps) = step.children_exact.get(&c.to_ascii_lowercase()) { for (ref value, ref next_step) in next_steps.iter() { @@ -196,7 +188,7 @@ impl RulePatternMatcher parser.restore(parser_state); // Then try to match argument expressions, if some rule accepts that. - if let Some(next_step) = step.children_param.get(&MatchStepParameter) + if let Some(ref next_step) = step.children_param { let parser_state = parser.save(); @@ -219,7 +211,7 @@ impl RulePatternMatcher // Finally, return a match if some rule ends here. if step.rule_indices.len() != 0 { - if !parser.next_is_linebreak() + if parser.is_at_partial() || !parser.next_is_linebreak() { return None } return Some(&step.rule_indices); @@ -255,7 +247,7 @@ impl RulePatternMatcher for _ in 0..indent { print!(" "); } - print!("{}", key.0.printable_excerpt(key.1.as_ref().map(|s| s as &str))); + print!("{}", key); if value.is_some() { @@ -272,7 +264,7 @@ impl RulePatternMatcher } } - for (_, next_step) in &step.children_param + if let Some(ref next_step) = step.children_param { for _ in 0..indent { print!(" "); } @@ -292,7 +284,7 @@ impl MatchStep { rule_indices: Vec::new(), children_exact: HashMap::new(), - children_param: HashMap::new() + children_param: None } } } \ No newline at end of file diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 24fa0f51..6409c99f 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -8,7 +8,8 @@ pub struct Parser tokens: Vec, index: usize, index_prev: usize, - read_linebreak: bool + read_linebreak: bool, + partial_index: usize } @@ -16,7 +17,8 @@ pub struct ParserState { index: usize, index_prev: usize, - read_linebreak: bool + read_linebreak: bool, + partial_index: usize } @@ -32,7 +34,8 @@ impl Parser tokens: tokens, index: 0, index_prev: 0, - read_linebreak: false + read_linebreak: false, + partial_index: 0 }; parser.skip_ignorable(); @@ -46,7 +49,8 @@ impl Parser { index: self.index, index_prev: self.index_prev, - read_linebreak: self.read_linebreak + read_linebreak: self.read_linebreak, + partial_index: self.partial_index } } @@ -56,6 +60,7 @@ impl Parser self.index = state.index; self.index_prev = state.index_prev; self.read_linebreak = state.read_linebreak; + self.partial_index = state.partial_index; } @@ -80,6 +85,9 @@ impl Parser pub fn advance(&mut self) -> Token { + if self.is_at_partial() + { panic!("at partial"); } + self.index_prev = self.index; let token = self.tokens[self.index].clone(); @@ -91,6 +99,29 @@ impl Parser self.skip_ignorable(); token } + + + pub fn advance_partial(&mut self) -> char + { + if self.tokens[self.index].kind == TokenKind::End + { return '\0'; } + + let sliced = unsafe { self.tokens[self.index].text().get_unchecked(self.partial_index..) }; + let mut char_indices = sliced.char_indices(); + let c = char_indices.next().unwrap().1; + + if let Some((index, _)) = char_indices.next() + { + self.partial_index += index; + } + else + { + self.partial_index = 0; + self.advance(); + } + + c + } pub fn skip_until_linebreak(&mut self) @@ -116,6 +147,12 @@ impl Parser { self.read_linebreak = false; } + + + pub fn is_at_partial(&self) -> bool + { + self.partial_index != 0 + } pub fn next_is(&self, mut nth: usize, kind: TokenKind) -> bool diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 1d837a1c..3b0eaecb 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -176,6 +176,54 @@ impl TokenKind } +impl Token +{ + pub fn text(&self) -> &str + { + match self.kind + { + TokenKind::ParenOpen => "(", + TokenKind::ParenClose => ")", + TokenKind::BracketOpen => "[", + TokenKind::BracketClose => "]", + TokenKind::BraceOpen => "{", + TokenKind::BraceClose => "}", + TokenKind::Dot => ".", + TokenKind::Comma => ",", + TokenKind::Colon => ":", + TokenKind::ColonColon => "::", + TokenKind::Arrow => "->", + TokenKind::Hash => "#", + TokenKind::Equal => "=", + TokenKind::Plus => "+", + TokenKind::Minus => "-", + TokenKind::Asterisk => "*", + TokenKind::Slash => "/", + TokenKind::Percent => "%", + TokenKind::Question => "?", + TokenKind::Exclamation => "!", + TokenKind::Ampersand => "&", + TokenKind::VerticalBar => "|", + TokenKind::Circumflex => "^", + TokenKind::Tilde => "~", + TokenKind::At => "@", + TokenKind::AmpersandAmpersand => "&&", + TokenKind::VerticalBarVerticalBar => "||", + TokenKind::EqualEqual => "==", + TokenKind::ExclamationEqual => "!=", + TokenKind::LessThan => "<", + TokenKind::LessThanLessThan => "<<", + TokenKind::LessThanEqual => "<=", + TokenKind::GreaterThan => ">", + TokenKind::GreaterThanGreaterThan => ">>", + TokenKind::GreaterThanGreaterThanGreaterThan => ">>>", + TokenKind::GreaterThanEqual => ">=", + _ => self.excerpt.as_ref().unwrap() + } + } +} + + pub fn tokenize(report: RcReport, src_filename: S, src: &[char]) -> Result, ()> where S: Into { diff --git a/src/test/asm.rs b/src/test/asm.rs index 0591a63a..1de737cb 100644 --- a/src/test/asm.rs +++ b/src/test/asm.rs @@ -115,6 +115,9 @@ fn test_simple() test("HALT -> 0x00", "halt", Pass((4, "00"))); test("Halt -> 0x00", "hALT", Pass((4, "00"))); test("hALT -> 0x00", "Halt", Pass((4, "00"))); + + test("halt -> 0x00", "h a l t", Pass((4, "00"))); + test("halt -> 0x00", "ha lt", Pass((4, "00"))); test("halt -> pc % 2 == 0 ? 0x12 : 0x34", "halt \n halt \n halt", Pass((4, "123412"))); test("halt -> pc ? 0x12 : 0x34", "halt \n halt \n halt", Fail(("cpu", 1, "type"))); @@ -191,6 +194,12 @@ fn test_tokendef() test("#tokendef reg { r1 = 1 } \n mov r1, r1 -> 0x55 \n mov r1, {src: reg} -> 0x88 @ src[7:0]", "mov r1, r2", Fail(("asm", 1, "no match"))); test("#tokendef alu { add = 0x10, sub = 0x20 } \n {op: alu} {x} -> op[7:0] @ x[7:0]", "add 0xef \n sub 0xfe", Pass((4, "10ef20fe"))); + + test("#tokendef cond { z = 0x10, ne = 0x20 } \n j{c: cond} {x} -> c[7:0] @ x[7:0]", "jz 0x50 \n jne 0x60", Pass((4, "10502060"))); + test("#tokendef cond { test = 0x10 } \n j{c: cond} -> c[7:0]", "j t e s t", Pass((4, "10"))); + test("#tokendef cond { test = 0x10 } \n j{c: cond} -> c[7:0]", "jtestx", Fail(("asm", 1, "no match"))); + test("#tokendef cond { test = 0x10 } \n j{c: cond} -> c[7:0]", "jtes", Fail(("asm", 1, "no match"))); + test("#tokendef cond { test = 0x10 } \n j{c: cond} -> c[7:0]", "jtes\nt", Fail(("asm", 1, "no match"))); } diff --git a/web/customasm.gc.wasm b/web/customasm.gc.wasm index f6b78870..d0603086 100644 Binary files a/web/customasm.gc.wasm and b/web/customasm.gc.wasm differ