From f7529b80f0958fd47a525f25a123f16438bbb892 Mon Sep 17 00:00:00 2001 From: jfecher Date: Fri, 22 Sep 2023 18:35:31 -0500 Subject: [PATCH] feat: Implement string escape sequences (#2803) --- compiler/noirc_frontend/src/lexer/errors.rs | 26 +++++++++++----- compiler/noirc_frontend/src/lexer/lexer.rs | 30 ++++++++++++++++--- .../execution_success/strings/src/main.nr | 2 +- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/errors.rs b/compiler/noirc_frontend/src/lexer/errors.rs index 6b382d76f40..8b5aac8c787 100644 --- a/compiler/noirc_frontend/src/lexer/errors.rs +++ b/compiler/noirc_frontend/src/lexer/errors.rs @@ -21,6 +21,12 @@ pub enum LexerErrorKind { LogicalAnd { span: Span }, #[error("Unterminated block comment")] UnterminatedBlockComment { span: Span }, + #[error("Unterminated string literal")] + UnterminatedStringLiteral { span: Span }, + #[error( + "'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character." + )] + InvalidEscape { escaped: char, span: Span }, } impl LexerErrorKind { @@ -33,6 +39,8 @@ impl LexerErrorKind { LexerErrorKind::TooManyBits { span, .. } => *span, LexerErrorKind::LogicalAnd { span } => *span, LexerErrorKind::UnterminatedBlockComment { span } => *span, + LexerErrorKind::UnterminatedStringLiteral { span } => *span, + LexerErrorKind::InvalidEscape { span, .. } => *span, } } @@ -46,30 +54,30 @@ impl LexerErrorKind { let found: String = found.map(Into::into).unwrap_or_else(|| "".into()); ( - "an unexpected character was found".to_string(), - format!(" expected {expected} , but got {found}"), + "An unexpected character was found".to_string(), + format!("Expected {expected}, but found {found}"), *span, ) }, LexerErrorKind::NotADoubleChar { span, found } => ( - format!("tried to parse {found} as double char"), + format!("Tried to parse {found} as double char"), format!( " {found:?} is not a double char, this is an internal error" ), *span, ), LexerErrorKind::InvalidIntegerLiteral { span, found } => ( - "invalid integer literal".to_string(), + "Invalid integer literal".to_string(), format!(" {found} is not an integer"), *span, ), LexerErrorKind::MalformedFuncAttribute { span, found } => ( - "malformed function attribute".to_string(), + "Malformed function attribute".to_string(), format!(" {found} is not a valid attribute"), *span, ), LexerErrorKind::TooManyBits { span, max, got } => ( - "integer literal too large".to_string(), + "Integer literal too large".to_string(), format!( "The maximum number of bits needed to represent a field is {max}, This integer type needs {got} bits" ), @@ -80,7 +88,11 @@ impl LexerErrorKind { "Try `&` instead, or use `if` only if you require short-circuiting".to_string(), *span, ), - LexerErrorKind::UnterminatedBlockComment { span } => ("unterminated block comment".to_string(), "Unterminated block comment".to_string(), *span), + LexerErrorKind::UnterminatedBlockComment { span } => ("Unterminated block comment".to_string(), "Unterminated block comment".to_string(), *span), + LexerErrorKind::UnterminatedStringLiteral { span } => + ("Unterminated string literal".to_string(), "Unterminated string literal".to_string(), *span), + LexerErrorKind::InvalidEscape { escaped, span } => + (format!("'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character."), "Invalid escape sequence".to_string(), *span), } } } diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index c32b956b716..0b8922efce6 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -320,12 +320,34 @@ impl<'a> Lexer<'a> { fn eat_string_literal(&mut self) -> SpannedTokenResult { let start = self.position; + let mut string = String::new(); + + while let Some(next) = self.next_char() { + let char = match next { + '"' => break, + '\\' => match self.next_char() { + Some('r') => '\r', + Some('n') => '\n', + Some('t') => '\t', + Some('0') => '\0', + Some('"') => '"', + Some('\\') => '\\', + Some(escaped) => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::InvalidEscape { escaped, span }); + } + None => { + let span = Span::inclusive(start, self.position); + return Err(LexerErrorKind::UnterminatedStringLiteral { span }); + } + }, + other => other, + }; - let str_literal = self.eat_while(None, |ch| ch != '"'); - - let str_literal_token = Token::Str(str_literal); + string.push(char); + } - self.next_char(); // Advance past the closing quote + let str_literal_token = Token::Str(string); let end = self.position; Ok(str_literal_token.into_span(start, end)) diff --git a/tooling/nargo_cli/tests/execution_success/strings/src/main.nr b/tooling/nargo_cli/tests/execution_success/strings/src/main.nr index 9f122c3a137..1d401260179 100644 --- a/tooling/nargo_cli/tests/execution_success/strings/src/main.nr +++ b/tooling/nargo_cli/tests/execution_success/strings/src/main.nr @@ -19,7 +19,7 @@ fn main(message : pub str<11>, y : Field, hex_as_string : str<4>, hex_as_field : assert(y == 5); // Change to y != 5 to see how the later print statements are not called std::println(array); - bad_message = "helld world"; + bad_message = "hell\0\"world"; std::println(bad_message); assert(message != bad_message);