Skip to content

Commit

Permalink
add string literal directives
Browse files Browse the repository at this point in the history
  • Loading branch information
hlorenzi committed Mar 18, 2017
1 parent fd1d735 commit 6988382
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "customasm"
version = "0.2.0"
version = "0.3.0"
authors = ["Henrique Lorenzi <[email protected]>"]

[lib]
Expand Down
43 changes: 43 additions & 0 deletions doc/src.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,49 @@ lda 0x77
Note that the `#d32` directive's arguments, `0x1234, 0x5678`, were
extended with zeroes to match the directive's bit-size.

### String Directive

This directive copies the UTF-8 representation of a string to
the output. The representation is extended with zeroes at the end
until it matches the machine's alignment. Escape sequences and
Unicode characters are available. For example:

```
#str "abcd"
#str "\n\r\0"
#str "\x12\x34"
#str "木"
```

...would be assembled into:

```
0x61 0x62 0x63 0x64
0x0a 0x0d 0x00
0x12 0x34
0xe6 0x9c 0xa8
```

### String with Length Directive

Works like the previous directive, but prepends the output with
the string length in bytes, expressed in the given number of bits.
For example:

```
#strl 8, "abcd"
#strl 16, "abcd"
#strl 32, "abcd"
```

...would be assembled into:

```
0x04 0x61 0x62 0x63 0x64
0x00 0x04 0x61 0x62 0x63 0x64
0x00 0x00 0x00 0x04 0x61 0x62 0x63 0x64
```

### Reserve Directive

This directive advances the instruction *and* output addresses by
Expand Down
52 changes: 52 additions & 0 deletions src/assembler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ impl<'def> Assembler<'def>
self.advance_address(bits);
}

"str" =>
return self.parse_str_directive(parser),

"strl" =>
return self.parse_strl_directive(parser),

"include" =>
{
let (new_path, span) = try!(self.parse_relative_filename(parser, cur_path));
Expand Down Expand Up @@ -296,6 +302,52 @@ impl<'def> Assembler<'def>
}


fn parse_str_directive(&mut self, parser: &mut Parser) -> Result<(), Error>
{
let (string, _) = try!(parser.expect_string());
let mut bitvec = BitVec::new();

for c in string.bytes()
{ bitvec.push(8, &BigInt::from_u8(c)); }

while bitvec.len() % self.def.align_bits != 0
{ bitvec.push_bit(false); }

self.output_bitvec(&bitvec);
try!(parser.expect_linebreak_or_end());
Ok(())
}


fn parse_strl_directive(&mut self, parser: &mut Parser) -> Result<(), Error>
{
let size_span = parser.current().span.clone();
let size = try!(self.parse_integer(parser));
if size % self.def.align_bits != 0
{ return Err(Error::new_with_span(format!("string length is not aligned"), size_span)); }

try!(parser.expect_operator(","));

let (string, _) = try!(parser.expect_string());
let mut bitvec = BitVec::new();

for c in string.bytes()
{ bitvec.push(8, &BigInt::from_u8(c)); }

while bitvec.len() % self.def.align_bits != 0
{ bitvec.push_bit(false); }

let strlen = BigInt::from_usize(string.len());
if strlen.width() > size
{ return Err(Error::new_with_span(format!("string length (`{}`) does not fit given width", string.len()), size_span)); }
self.output_integer(size, &strlen);

self.output_bitvec(&bitvec);
try!(parser.expect_linebreak_or_end());
Ok(())
}


fn parse_global_constant(&mut self, parser: &mut Parser) -> Result<(), Error>
{
let (label, label_span) = try!(parser.expect_identifier());
Expand Down
125 changes: 125 additions & 0 deletions src/tests/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,131 @@ fn test_data_directive_with_variables()
}


#[test]
fn test_str_directive_simple()
{
pass("#align 1", "#str \"abcd\"", 16, "61626364");
pass("#align 2", "#str \"abcd\"", 16, "61626364");
pass("#align 4", "#str \"abcd\"", 16, "61626364");
pass("#align 8", "#str \"abcd\"", 16, "61626364");
pass("#align 16", "#str \"abcd\"", 16, "61626364");
pass("#align 32", "#str \"abcd\"", 16, "61626364");
pass("#align 64", "#str \"abcd\"", 16, "6162636400000000");

pass("#align 1", "#str \"hello\"", 16, "68656c6c6f");
pass("#align 2", "#str \"hello\"", 16, "68656c6c6f");
pass("#align 4", "#str \"hello\"", 16, "68656c6c6f");
pass("#align 8", "#str \"hello\"", 16, "68656c6c6f");
pass("#align 16", "#str \"hello\"", 16, "68656c6c6f00");
pass("#align 32", "#str \"hello\"", 16, "68656c6c6f000000");
pass("#align 64", "#str \"hello\"", 16, "68656c6c6f000000");

pass("#align 3", "#str \"abcd\"", 2, "011000010110001001100011011001000");
pass("#align 5", "#str \"abcd\"", 2, "01100001011000100110001101100100000");
pass("#align 7", "#str \"abcd\"", 2, "01100001011000100110001101100100000");
pass("#align 9", "#str \"abcd\"", 2, "011000010110001001100011011001000000");
}


#[test]
fn test_str_directive_utf8()
{
pass("#align 1", "#str \"\"", 16, "e69ca8");
pass("#align 2", "#str \"\"", 16, "e69ca8");
pass("#align 4", "#str \"\"", 16, "e69ca8");
pass("#align 8", "#str \"\"", 16, "e69ca8");
pass("#align 16", "#str \"\"", 16, "e69ca800");
pass("#align 32", "#str \"\"", 16, "e69ca800");
pass("#align 64", "#str \"\"", 16, "e69ca80000000000");

pass("#align 1", "#str \"ab木cd\"", 16, "6162e69ca86364");
pass("#align 2", "#str \"ab木cd\"", 16, "6162e69ca86364");
pass("#align 4", "#str \"ab木cd\"", 16, "6162e69ca86364");
pass("#align 8", "#str \"ab木cd\"", 16, "6162e69ca86364");
pass("#align 16", "#str \"ab木cd\"", 16, "6162e69ca8636400");
pass("#align 32", "#str \"ab木cd\"", 16, "6162e69ca8636400");
pass("#align 64", "#str \"ab木cd\"", 16, "6162e69ca8636400");
}


#[test]
fn test_str_directive_escape()
{
pass("#align 8", "#str \"\0\"", 16, "00");
pass("#align 8", "#str \"\t\"", 16, "09");
pass("#align 8", "#str \"\n\"", 16, "0a");
pass("#align 8", "#str \"\r\"", 16, "0d");
pass("#align 8", "#str \"\\\\\"", 16, "5c");
pass("#align 8", "#str \"\\\"\"", 16, "22");

pass("#align 8", "#str \"\\\"", 16, "5c");

pass("#align 8", "#str \"\\x00\"", 16, "00");
pass("#align 8", "#str \"\\x12\"", 16, "12");
pass("#align 8", "#str \"\\x7f\"", 16, "7f");
pass("#align 8", "#str \"\\x80\"", 16, "c280");
pass("#align 8", "#str \"\\xab\"", 16, "c2ab");
pass("#align 8", "#str \"\\xAB\"", 16, "c2ab");
pass("#align 8", "#str \"\\xabcd\"", 16, "c2ab6364");
pass("#align 8", "#str \"ab\\xcd\"", 16, "6162c38d");

pass("#align 8", "#str \"\\x\"", 16, "5c");
pass("#align 8", "#str \"\\x0\"", 16, "5c30");

fail("#align 8", "#str 0", 1, "string");
fail("#align 8", "#str \"0", 1, "line break");
fail("#align 8", "#str \"\\", 1, "line break");
}


#[test]
fn test_strl_directive_simple()
{
pass("#align 8", "#strl 8, \"abcd\"", 16, "0461626364");
pass("#align 8", "#strl 16, \"abcd\"", 16, "000461626364");
pass("#align 8", "#strl 24, \"abcd\"", 16, "00000461626364");
pass("#align 8", "#strl 32, \"abcd\"", 16, "0000000461626364");
pass("#align 8", "#strl 64, \"abcd\"", 16, "000000000000000461626364");
pass("#align 16", "#strl 16, \"abcd\"", 16, "000461626364");
pass("#align 16", "#strl 32, \"abcd\"", 16, "0000000461626364");
pass("#align 16", "#strl 64, \"abcd\"", 16, "000000000000000461626364");
pass("#align 32", "#strl 32, \"abcd\"", 16, "0000000461626364");
pass("#align 32", "#strl 64, \"abcd\"", 16, "000000000000000461626364");
pass("#align 64", "#strl 64, \"abcd\"", 16, "00000000000000046162636400000000");

pass("#align 8", "#strl 8, \"\"", 16, "00");
pass("#align 8", "#strl 8, \"a\"", 16, "0161");
pass("#align 8", "#strl 8, \"ab\"", 16, "026162");
pass("#align 8", "#strl 8, \"abc\"", 16, "03616263");
pass("#align 8", "#strl 8, \"abcde\"", 16, "056162636465");
pass("#align 8", "#strl 8, \"abcdef\"", 16, "06616263646566");
pass("#align 8", "#strl 8, \"abcdefg\"", 16, "0761626364656667");

fail("#align 8", "#strl \"abcd\"", 1, "expected");
fail("#align 8", "#strl 8 \"abcd\"", 1, "expected");

pass("#align 4", "#strl 4, \"0123456789abcde\"", 16, "f303132333435363738396162636465");
fail("#align 4", "#strl 4, \"0123456789abcdef\"", 1, "does not fit");

fail("#align 8", "#strl 1, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 2, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 3, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 4, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 5, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 6, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 7, \"abcd\"", 1, "string length");
fail("#align 8", "#strl 9, \"abcd\"", 1, "string length");
}


#[test]
fn test_strl_directive_utf8()
{
pass("#align 8", "#strl 8, \"\"", 16, "03e69ca8");
pass("#align 8", "#strl 8, \"ab木cd\"", 16, "076162e69ca86364");
}


#[test]
fn test_reserve_directive()
{
Expand Down
11 changes: 10 additions & 1 deletion src/util/bigint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub struct BigInt


impl BigInt
{
{
pub fn from_i64(value: i64) -> BigInt
{
BigInt
Expand All @@ -17,6 +17,15 @@ impl BigInt
}


pub fn from_u8(value: u8) -> BigInt
{
BigInt
{
value: value as i64
}
}


pub fn from_usize(value: usize) -> BigInt
{
BigInt
Expand Down
57 changes: 51 additions & 6 deletions src/util/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,14 +378,59 @@ fn try_read_string(file: &Rc<String>, src: &[char], index: &mut CharIndex) -> Op
index.advance();

let mut s = String::new();
while index.linear < src.len() && src[index.linear] != '\"' // "
while index.linear + 1 < src.len() && src[index.linear] != '\"' // "
{
s.push(src[index.linear]);

if src[index.linear] == '\n'
{ index.advance_line(); }
// Parse escape sequences.
if src[index.linear] == '\\' && index.linear + 2 < src.len()
{
index.advance();

match src[index.linear]
{
'\\' => { s.push('\\'); index.advance(); }
'\"' => { s.push('\"'); index.advance(); } // "
'0' => { s.push('\0'); index.advance(); }
't' => { s.push('\t'); index.advance(); }
'n' => { s.push('\n'); index.advance(); }
'r' => { s.push('\r'); index.advance(); }
'x' =>
{
index.advance();

if index.linear + 2 < src.len()
{
let hex1 = src[index.linear + 0].to_digit(16);
let hex2 = src[index.linear + 1].to_digit(16);

if hex1.is_some() && hex2.is_some()
{
index.advance();
index.advance();

s.push(((hex1.unwrap() << 4) | hex2.unwrap()) as u8 as char);
}
// FIXME: Should return an error.
else
{ s.push('\\'); }
}
// FIXME: Should return an error.
else
{ s.push('\\'); }
}

// FIXME: Should return an error.
_ => { s.push('\\'); }
}
}
else
{ index.advance(); }
{
s.push(src[index.linear]);

if src[index.linear] == '\n'
{ index.advance_line(); }
else
{ index.advance(); }
}
}

if src[index.linear] == '\"' // "
Expand Down

0 comments on commit 6988382

Please sign in to comment.