From d1b8c6407c390c289daa23b1a8e18689ef53a6e9 Mon Sep 17 00:00:00 2001 From: hlorenzi Date: Mon, 11 Jul 2022 20:04:56 -0300 Subject: [PATCH] fix #97: use `slice_until_char_or_nesting` in subrule matching --- src/asm/parser/rule_invocation.rs | 51 +++++++++++++++++++++++++------ src/syntax/parser.rs | 25 ++++++++++++++- tests/issue97/1.asm | 31 +++++++++++++++++++ tests/rule_nested_ambiguous/5.asm | 2 +- 4 files changed, 97 insertions(+), 12 deletions(-) create mode 100644 tests/issue97/1.asm diff --git a/src/asm/parser/rule_invocation.rs b/src/asm/parser/rule_invocation.rs index f9b54485..a595305d 100644 --- a/src/asm/parser/rule_invocation.rs +++ b/src/asm/parser/rule_invocation.rs @@ -374,20 +374,41 @@ pub fn match_rule<'a>( } } - asm::PatternParameterType::Ruleset(rule_group_ref) => + asm::PatternParameterType::Ruleset(ruleset_ref) => { - if DEBUG + let token_start = branch.parser.get_current_token_index(); + + let mut subparser = branch.parser.clone(); + let mut subparser_using_slice = false; + let mut subparser_offset = 0; + + let next_part = rule.pattern.get(index + 1); + + if let Some(asm::PatternPart::Exact(next_part_char)) = next_part { - println!("- branch {}, try match subrule {:?}", branch_index, rule_group_ref); + if let Some(slice_parser) = subparser.slice_until_char_or_nesting(*next_part_char) + { + subparser = slice_parser; + subparser_using_slice = true; + subparser_offset = token_start; + } } - let token_start = branch.parser.get_current_token_index(); + if DEBUG + { + println!( + " branch {}, try match subrule {:?}, parser {}at `{}`", + branch_index, + ruleset_ref, + if subparser_using_slice { "using slice " } else { "" }, + fileserver.get_excerpt(&subparser.get_next_spans(100))); + } let subcandidates = match_ruleset( asm_state, - rule_group_ref, - &mut branch.parser, - false, + ruleset_ref, + &mut subparser, + subparser_using_slice, fileserver, report.clone())?; @@ -400,19 +421,29 @@ pub fn match_rule<'a>( for subcandidate in subcandidates.into_iter() { - let token_end = subcandidate.1.get_current_token_index(); + let mut new_parser = branch.parser.clone(); + new_parser.restore_with_offset(subcandidate.1.save(), subparser_offset); + + let token_end = new_parser.get_current_token_index(); let mut args_clone = branch.args.clone(); args_clone.push(asm::RuleInvocationArgument::NestedRuleset(subcandidate.0)); let mut token_args_clone = branch.token_args.clone(); - token_args_clone.push(Some(branch.parser.get_cloned_tokens_by_index(token_start, token_end))); + token_args_clone.push(Some(new_parser.get_cloned_tokens_by_index(token_start, token_end))); + if DEBUG + { + println!( + " continue branch, parser at `{}`", + fileserver.get_excerpt(&new_parser.get_next_spans(100))); + } + new_branches.push(ParsingBranch { args: args_clone, token_args: token_args_clone, - parser: subcandidate.1, + parser: new_parser, dead: false, }); } diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 860cdfc7..6c72f2ec 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -262,8 +262,20 @@ impl<'a> Parser<'a> let mut paren_nesting = 0; - while !self.is_over() && (paren_nesting > 0 || self.next_partial() != c) + loop { + if self.is_over() + { + break; + } + + if self.next_partial() == c && + paren_nesting == 0 && + self.get_current_token_index() > start + { + break; + } + if self.next_is(0, TokenKind::ParenOpen) { paren_nesting += 1; @@ -318,6 +330,17 @@ impl<'a> Parser<'a> self.index_prev = state.index_prev; self.read_linebreak = state.read_linebreak; self.partial_index = state.partial_index; + self.skip_ignorable(); + } + + + pub fn restore_with_offset(&mut self, state: ParserState, offset: usize) + { + self.index = state.index + offset; + self.index_prev = state.index_prev + offset; + self.read_linebreak = state.read_linebreak; + self.partial_index = state.partial_index; + self.skip_ignorable(); } diff --git a/tests/issue97/1.asm b/tests/issue97/1.asm new file mode 100644 index 00000000..f48a6c64 --- /dev/null +++ b/tests/issue97/1.asm @@ -0,0 +1,31 @@ +#subruledef sub +{ + {a: u8} => a +} + +#ruledef +{ + test {a: sub} ({b: u8}) => 0x11 @ a @ b + test {a: sub} {b: u8}) => 0x22 @ a @ b + + test2 {a: u8} ({b: u8}) => 0x33 @ a @ b +} + +test 0(0) ; = 0x110000 +test 0 (0) ; = 0x110000 +test 0 0) ; = 0x220000 + +test (0)(0) ; = 0x110000 +test (0) (0) ; = 0x110000 +test (ascii("\0"))(0) ; = 0x110000 + +x = 0xee +test x(0) ; = 0x11ee00 +test (x)(0) ; = 0x11ee00 + + +test2 1(2) ; = 0x330102 +test2 (1)(2) ; = 0x330102 +test2 (ascii("a"))(2) ; = 0x336102 +test2 x(2) ; = 0x33ee02 +test2 (x)(2) ; = 0x33ee02 \ No newline at end of file diff --git a/tests/rule_nested_ambiguous/5.asm b/tests/rule_nested_ambiguous/5.asm index d08b98d1..a32f3ac7 100644 --- a/tests/rule_nested_ambiguous/5.asm +++ b/tests/rule_nested_ambiguous/5.asm @@ -9,4 +9,4 @@ ld {x: inner}$ => 0x55 @ x`16 } -ld 0xaa$$ ; = 0x5522aa \ No newline at end of file +ld 0xaa$$ ; error: no match \ No newline at end of file