diff --git a/src/asm/parser/include.rs b/src/asm/parser/include.rs index 83e32729..46dc941f 100644 --- a/src/asm/parser/include.rs +++ b/src/asm/parser/include.rs @@ -5,7 +5,7 @@ pub fn parse_directive_include( state: &mut asm::parser::State) -> Result<(), ()> { - let tk_filename = state.parser.expect(syntax::TokenKind::String)?; + let tk_filename = state.parser.expect(syntax::TokenKind::String(expr::StringEncoding::Utf8))?; let filename = syntax::excerpt_as_string_contents( state.report.clone(), tk_filename.excerpt.as_ref().unwrap(), diff --git a/src/asm/state.rs b/src/asm/state.rs index 7f0d8898..6187fa97 100644 --- a/src/asm/state.rs +++ b/src/asm/state.rs @@ -919,6 +919,23 @@ impl State Ok(()) } } + else if let expr::Value::String(value_s) = value + { + let mut value_int = value_s.to_bigint(); + + if value_int.min_size() > size + { + report.error_span( + &format!("argument out of range for type `u{}`", size), + &span); + Err(()) + } + else + { + value_int.size = Some(size); + Ok(()) + } + } else { report.error_span( @@ -948,6 +965,24 @@ impl State Ok(()) } } + else if let expr::Value::String(value_s) = value + { + let mut value_int = value_s.to_bigint(); + + if (value_int.sign() == 0 && size == 0) || + (value_int.min_size() >= size) + { + report.error_span( + &format!("argument out of range for type `s{}`", size), + &span); + Err(()) + } + else + { + value_int.size = Some(size); + Ok(()) + } + } else { report.error_span( @@ -975,6 +1010,23 @@ impl State Ok(()) } } + else if let expr::Value::String(value_s) = value + { + let mut value_int = value_s.to_bigint(); + + if value_int.min_size() > size + { + report.error_span( + &format!("argument out of range for type `i{}`", size), + &span); + Err(()) + } + else + { + value_int.size = Some(size); + Ok(()) + } + } else { report.error_span( @@ -1237,7 +1289,7 @@ impl State State::eval_fn_check_arg_number(info, 1)?; if State::eval_fn_check_unknown_arg(info, 0, self.is_first_pass) { - return Ok(expr::Value::make_integer(util::BigInt::new_from_str(""))); + return Ok(expr::Value::make_integer(util::BigInt::from_bytes_be(&"".bytes().collect::>()))); } let value_string = State::eval_fn_get_string_arg(info, 0)?; diff --git a/src/diagn/span.rs b/src/diagn/span.rs index b1d6cdd3..17ca3f89 100644 --- a/src/diagn/span.rs +++ b/src/diagn/span.rs @@ -84,11 +84,11 @@ impl Span let end = max(self.location.unwrap().1, other.location.unwrap().1); Some((start, end)) }; - + Span { file: self.file.clone(), - location: location + location } } } diff --git a/src/expr/expression.rs b/src/expr/expression.rs index 21cdf1a0..26aa7a86 100644 --- a/src/expr/expression.rs +++ b/src/expr/expression.rs @@ -29,11 +29,23 @@ pub enum Value } +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum StringEncoding +{ + Utf8, + Utf16BE, + Utf16LE, + UnicodeBE, + UnicodeLE, + Ascii, +} + + #[derive(Clone, Debug, Eq, PartialEq)] pub struct ValueString { pub utf8_contents: String, - pub encoding: String, + pub encoding: StringEncoding, } @@ -105,6 +117,15 @@ impl Value } + pub fn make_string(s: &str, encoding: expr::StringEncoding) -> Value + { + Value::String(ValueString { + utf8_contents: s.to_string(), + encoding + }) + } + + pub fn get_bigint(&self) -> Option { match self @@ -122,6 +143,15 @@ impl ValueString { pub fn to_bigint(&self) -> util::BigInt { - util::BigInt::new_from_str(&self.utf8_contents) + let bytes: Vec = match self.encoding + { + StringEncoding::Utf8 => self.utf8_contents.bytes().collect(), + StringEncoding::Utf16BE => self.utf8_contents.encode_utf16().flat_map(|v| v.to_be_bytes()).collect(), + StringEncoding::Utf16LE => self.utf8_contents.encode_utf16().flat_map(|v| v.to_le_bytes()).collect(), + StringEncoding::UnicodeBE => self.utf8_contents.chars().flat_map(|c| (c as u32).to_be_bytes()).collect(), + StringEncoding::UnicodeLE => self.utf8_contents.chars().flat_map(|c| (c as u32).to_le_bytes()).collect(), + StringEncoding::Ascii => self.utf8_contents.chars().map(|c| (c as u32) as u8).collect(), // can potentially contain invalid chars + }; + util::BigInt::from_bytes_be(&bytes) } } \ No newline at end of file diff --git a/src/expr/mod.rs b/src/expr/mod.rs index 290ed263..5a8265b6 100644 --- a/src/expr/mod.rs +++ b/src/expr/mod.rs @@ -6,6 +6,7 @@ mod eval; pub use self::expression::Expr; pub use self::expression::Value; +pub use self::expression::StringEncoding; pub use self::expression::ValueString; pub use self::expression::UnaryOp; pub use self::expression::BinaryOp; diff --git a/src/expr/parser.rs b/src/expr/parser.rs index 470e11c7..f2b851da 100644 --- a/src/expr/parser.rs +++ b/src/expr/parser.rs @@ -378,8 +378,8 @@ impl<'a, 'parser> ExpressionParser<'a, 'parser> else if self.parser.next_is(0, syntax::TokenKind::Number) { self.parse_number() } - else if self.parser.next_is(0, syntax::TokenKind::String) - { self.parse_string() } + else if let Some(encoding) = self.parser.next_is_string(0) + { self.parse_string(encoding) } else if self.parser.next_is(0, syntax::TokenKind::KeywordAsm) { self.parse_asm() } @@ -497,9 +497,9 @@ impl<'a, 'parser> ExpressionParser<'a, 'parser> } - fn parse_string(&mut self) -> Result + fn parse_string(&mut self, encoding: expr::StringEncoding) -> Result { - let tk_str = self.parser.expect(syntax::TokenKind::String)?; + let tk_str = self.parser.expect(syntax::TokenKind::String(encoding))?; let string = syntax::excerpt_as_string_contents( self.parser.report.clone().unwrap_or(diagn::RcReport::new()), @@ -511,7 +511,7 @@ impl<'a, 'parser> ExpressionParser<'a, 'parser> expr::Value::String(expr::ValueString { utf8_contents: string, - encoding: "utf8".to_string(), + encoding, })); Ok(expr) diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index 860cdfc7..d9af4aa5 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -449,6 +449,27 @@ impl<'a> Parser<'a> self.tokens[index].kind == kind } + + + pub fn next_is_string(&self, mut nth: usize) -> Option + { + let mut index = self.index; + + while nth > 0 && index < self.tokens.len() + { + nth -= 1; + index += 1; + while index < self.tokens.len() && self.tokens[index].kind.ignorable() + { index += 1; } + } + + if index >= self.tokens.len() + { + return None; + } + + self.tokens[index].kind.is_string() + } pub fn maybe_expect(&mut self, kind: TokenKind) -> Option diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 43131b51..8cdedc14 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,4 +1,5 @@ use crate::diagn::{Span, RcReport}; +use crate::expr::StringEncoding; use std::rc::Rc; @@ -20,7 +21,7 @@ pub enum TokenKind LineBreak, Identifier, Number, - String, + String(StringEncoding), KeywordAsm, ParenOpen, ParenClose, @@ -66,11 +67,19 @@ pub enum TokenKind impl TokenKind { + pub fn is_string(self) -> Option { + if let TokenKind::String(encoding) = self { + Some(encoding) + } else { + None + } + } + fn needs_excerpt(self) -> bool { self == TokenKind::Identifier || self == TokenKind::Number || - self == TokenKind::String + self.is_string().is_some() } @@ -130,7 +139,7 @@ impl TokenKind TokenKind::LineBreak => "line break", TokenKind::Identifier => "identifier", TokenKind::Number => "number", - TokenKind::String => "string", + TokenKind::String(_) => "string", TokenKind::KeywordAsm => "`asm` keyword", TokenKind::ParenOpen => "`(`", TokenKind::ParenClose => "`)`", @@ -256,13 +265,23 @@ where S: Into check_for_number (&src[index..]).unwrap_or_else(|| check_for_string (&src[index..]).unwrap_or_else(|| (TokenKind::Error, 1))))))); - - let span = Span::new(filename.clone(), index, index + length); + + let length_offset = if let Some(encoding) = kind.is_string() { + if encoding != StringEncoding::Utf8 { + 1 + } else { + 0 + } + } else { + 0 + }; + + let span = Span::new(filename.clone(), index, index + length - length_offset); // Get the source excerpt for variable tokens (e.g. identifiers). let excerpt = match kind.needs_excerpt() { - true => Some(src[index..].iter().cloned().take(length).collect()), + true => Some(src[index..].iter().cloned().take(length - length_offset).collect()), false => None }; @@ -401,10 +420,33 @@ fn check_for_string(src: &[char]) -> Option<(TokenKind, usize)> if src[length] != '\"' { return None; } - + length += 1; + + let encoding = if length >= src.len() { + StringEncoding::Utf8 + } else if src[length] == 'W' { + length += 1; + StringEncoding::Utf16BE + } else if src[length] == 'U' { + length += 1; + StringEncoding::UnicodeBE + } else if src[length] == 'w' { + length += 1; + StringEncoding::Utf16LE + } else if src[length] == 'u' { + length += 1; + StringEncoding::UnicodeLE + } else if src[length] == 'a' { + length += 1; + StringEncoding::Ascii + } else if src[length].is_alphanumeric() { + return None + } else { + StringEncoding::Utf8 + }; - Some((TokenKind::String, length)) + Some((TokenKind::String(encoding), length)) } diff --git a/src/test/expr.rs b/src/test/expr.rs index 9237fa07..3fdb641d 100644 --- a/src/test/expr.rs +++ b/src/test/expr.rs @@ -63,6 +63,27 @@ fn test_literals() } +#[test] +fn test_string_literals() +{ + test("\"\"", Pass(expr::Value::make_string("", expr::StringEncoding::Utf8))); + test("\"\"W", Pass(expr::Value::make_string("", expr::StringEncoding::Utf16BE))); + test("\"\"w", Pass(expr::Value::make_string("", expr::StringEncoding::Utf16LE))); + test("\"\"U", Pass(expr::Value::make_string("", expr::StringEncoding::UnicodeBE))); + test("\"\"u", Pass(expr::Value::make_string("", expr::StringEncoding::UnicodeLE))); + test("\"\"a", Pass(expr::Value::make_string("", expr::StringEncoding::Ascii))); + test("\"\"x", Fail(("test", 1, "unexpected character"))); + + test("\"abc\"", Pass(expr::Value::make_string("abc", expr::StringEncoding::Utf8))); + test("\"abc\"W", Pass(expr::Value::make_string("abc", expr::StringEncoding::Utf16BE))); + test("\"abc\"w", Pass(expr::Value::make_string("abc", expr::StringEncoding::Utf16LE))); + test("\"abc\"U", Pass(expr::Value::make_string("abc", expr::StringEncoding::UnicodeBE))); + test("\"abc\"u", Pass(expr::Value::make_string("abc", expr::StringEncoding::UnicodeLE))); + test("\"abc\"a", Pass(expr::Value::make_string("abc", expr::StringEncoding::Ascii))); + test("\"abc\"x", Fail(("test", 1, "unexpected character"))); +} + + #[test] fn test_variables() { diff --git a/src/util/bigint.rs b/src/util/bigint.rs index eed07022..a851e241 100644 --- a/src/util/bigint.rs +++ b/src/util/bigint.rs @@ -22,18 +22,6 @@ impl BigInt } - pub fn new_from_str(s: &str) -> BigInt - { - let bytes = s.bytes().collect::>(); - let bigint = num_bigint::BigInt::from_signed_bytes_be(&bytes); - BigInt - { - bigint, - size: Some(bytes.len() * 8), - } - } - - pub fn as_string(&self) -> String { String::from_utf8_lossy(&self.bigint.to_signed_bytes_be()).to_string() diff --git a/src/util/bitvec_format.rs b/src/util/bitvec_format.rs index 39fb68e5..92e961e7 100644 --- a/src/util/bitvec_format.rs +++ b/src/util/bitvec_format.rs @@ -1,4 +1,6 @@ use crate::*; +use crate::util::CharCounter; +use crate::diagn::RcReport; impl util::BitVec @@ -476,7 +478,6 @@ impl util::BitVec result } - pub fn format_addrspan(&self, fileserver: &dyn util::FileServer) -> String {