Skip to content

Commit

Permalink
fix: Handle multi-byte utf8 characters in formatter (#6118)
Browse files Browse the repository at this point in the history
# Description

## Problem\*

Resolves #6108

## Summary\*



## Additional Context



## Documentation\*

Check one:
- [x] No documentation needed.
- [ ] Documentation included in this PR.
- [ ] **[For Experimental Features]** Documentation to be submitted in a
separate PR.

# PR Checklist\*

- [x] I have tested the changes locally.
- [x] I have formatted the changes with [Prettier](https://prettier.io/)
and/or `cargo fmt` on default settings.

---------

Co-authored-by: Michael Klein <[email protected]>
Co-authored-by: Michael J Klein <[email protected]>
Co-authored-by: TomAFrench <[email protected]>
  • Loading branch information
4 people authored Sep 24, 2024
1 parent 5b1c896 commit b1d0619
Show file tree
Hide file tree
Showing 11 changed files with 135 additions and 39 deletions.
69 changes: 61 additions & 8 deletions .github/workflows/test-js-packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ jobs:
with:
name: acvm-js
path: ./acvm-repo/acvm_js

- name: Set up test environment
uses: ./.github/actions/setup

Expand Down Expand Up @@ -230,13 +230,13 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Download nargo binary
uses: actions/download-artifact@v4
with:
name: nargo
path: ./nargo

- name: Download artifact
uses: actions/download-artifact@v4
with:
Expand All @@ -248,7 +248,7 @@ jobs:
with:
name: noirc_abi_wasm
path: ./tooling/noirc_abi_wasm

- name: Set nargo on PATH
run: |
nargo_binary="${{ github.workspace }}/nargo/nargo"
Expand Down Expand Up @@ -336,13 +336,13 @@ jobs:
with:
name: acvm-js
path: ./acvm-repo/acvm_js

- name: Download noirc_abi package artifact
uses: actions/download-artifact@v4
with:
name: noirc_abi_wasm
path: ./tooling/noirc_abi_wasm

- name: Set nargo on PATH
run: |
nargo_binary="${{ github.workspace }}/nargo/nargo"
Expand Down Expand Up @@ -468,7 +468,7 @@ jobs:
working-directory: ./compiler/integration-tests
run: |
yarn test:browser
test-examples:
name: Example scripts
runs-on: ubuntu-latest
Expand Down Expand Up @@ -509,6 +509,59 @@ jobs:
working-directory: ./examples/codegen_verifier
run: ./test.sh

external-repo-checks:
needs: [build-nargo]
runs-on: ubuntu-latest
# Only run when 'run-external-checks' label is present
if: contains(github.event.pull_request.labels.*.name, 'run-external-checks')
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
project:
# Disabled as these are currently failing with many visibility errors
# - { repo: AztecProtocol/aztec-nr, path: ./ }
# - { repo: AztecProtocol/aztec-packages, path: ./noir-projects/noir-contracts }
# Disabled as aztec-packages requires a setup-step in order to generate a `Nargo.toml`
#- { repo: AztecProtocol/aztec-packages, path: ./noir-projects/noir-protocol-circuits }
- { repo: zac-williamson/noir-edwards, path: ./, ref: 037e44b2ee8557c51f6aef9bb9d63ea9e32722d1 }
# TODO: Enable these once they're passing against master again.
# - { repo: zac-williamson/noir-bignum, path: ./, ref: 030c2acce1e6b97c44a3bbbf3429ed96f20d72d3 }
# - { repo: vlayer-xyz/monorepo, path: ./, ref: ee46af88c025863872234eb05d890e1e447907cb }
# - { repo: hashcloak/noir-bigint, path: ./, ref: 940ddba3a5201b508e7b37a2ef643551afcf5ed8 }
name: Check external repo - ${{ matrix.project.repo }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
repository: ${{ matrix.project.repo }}
path: test-repo
ref: ${{ matrix.project.ref }}

- name: Download nargo binary
uses: actions/download-artifact@v4
with:
name: nargo
path: ./nargo

- name: Set nargo on PATH
run: |
nargo_binary="${{ github.workspace }}/nargo/nargo"
chmod +x $nargo_binary
echo "$(dirname $nargo_binary)" >> $GITHUB_PATH
export PATH="$PATH:$(dirname $nargo_binary)"
nargo -V
- name: Remove requirements on compiler version
working-directory: ./test-repo
run: |
# Github actions seems to not expand "**" in globs by default.
shopt -s globstar
sed -i '/^compiler_version/d' ./**/Nargo.toml
- name: Run nargo check
working-directory: ./test-repo/${{ matrix.project.path }}
run: nargo check

# This is a job which depends on all test jobs and reports the overall status.
# This allows us to add/remove test jobs without having to update the required workflows.
tests-end:
Expand All @@ -526,7 +579,7 @@ jobs:
- test-integration-node
- test-integration-browser
- test-examples

steps:
- name: Report overall success
run: |
Expand Down
6 changes: 6 additions & 0 deletions compiler/noirc_frontend/src/lexer/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub enum LexerErrorKind {
InvalidEscape { escaped: char, span: Span },
#[error("Invalid quote delimiter `{delimiter}`, valid delimiters are `{{`, `[`, and `(`")]
InvalidQuoteDelimiter { delimiter: SpannedToken },
#[error("Non-ASCII characters are invalid in comments")]
NonAsciiComment { span: Span },
#[error("Expected `{end_delim}` to close this {start_delim}")]
UnclosedQuote { start_delim: SpannedToken, end_delim: Token },
}
Expand Down Expand Up @@ -65,6 +67,7 @@ impl LexerErrorKind {
LexerErrorKind::UnterminatedStringLiteral { span } => *span,
LexerErrorKind::InvalidEscape { span, .. } => *span,
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(),
LexerErrorKind::NonAsciiComment { span, .. } => *span,
LexerErrorKind::UnclosedQuote { start_delim, .. } => start_delim.to_span(),
}
}
Expand Down Expand Up @@ -124,6 +127,9 @@ impl LexerErrorKind {
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => {
(format!("Invalid quote delimiter `{delimiter}`"), "Valid delimiters are `{`, `[`, and `(`".to_string(), delimiter.to_span())
},
LexerErrorKind::NonAsciiComment { span } => {
("Non-ASCII character in comment".to_string(), "Invalid comment character: only ASCII is currently supported.".to_string(), *span)
}
LexerErrorKind::UnclosedQuote { start_delim, end_delim } => {
("Unclosed `quote` expression".to_string(), format!("Expected a `{end_delim}` to close this `{start_delim}`"), start_delim.to_span())
}
Expand Down
24 changes: 24 additions & 0 deletions compiler/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,11 @@ impl<'a> Lexer<'a> {
};
let comment = self.eat_while(None, |ch| ch != '\n');

if !comment.is_ascii() {
let span = Span::from(start..self.position);
return Err(LexerErrorKind::NonAsciiComment { span });
}

if doc_style.is_none() && self.skip_comments {
return self.next_token();
}
Expand Down Expand Up @@ -651,6 +656,11 @@ impl<'a> Lexer<'a> {
}

if depth == 0 {
if !content.is_ascii() {
let span = Span::from(start..self.position);
return Err(LexerErrorKind::NonAsciiComment { span });
}

if doc_style.is_none() && self.skip_comments {
return self.next_token();
}
Expand Down Expand Up @@ -1331,6 +1341,7 @@ mod tests {

Err(LexerErrorKind::InvalidIntegerLiteral { .. })
| Err(LexerErrorKind::UnexpectedCharacter { .. })
| Err(LexerErrorKind::NonAsciiComment { .. })
| Err(LexerErrorKind::UnterminatedBlockComment { .. }) => {
expected_token_found = true;
}
Expand Down Expand Up @@ -1389,4 +1400,17 @@ mod tests {
}
}
}

#[test]
fn test_non_ascii_comments() {
let cases = vec!["// 🙂", "// schön", "/* in the middle 🙂 of a comment */"];

for source in cases {
let mut lexer = Lexer::new(source);
assert!(
lexer.any(|token| matches!(token, Err(LexerErrorKind::NonAsciiComment { .. }))),
"Expected NonAsciiComment error"
);
}
}
}
4 changes: 2 additions & 2 deletions noir_stdlib/src/collections/map.nr
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::default::Default;
use crate::hash::{Hash, Hasher, BuildHasher};
use crate::collections::bounded_vec::BoundedVec;

// We use load factor α_max = 0.75.
// We use load factor alpha_max = 0.75.
// Upon exceeding it, assert will fail in order to inform the user
// about performance degradation, so that he can adjust the capacity.
global MAX_LOAD_FACTOR_NUMERATOR = 3;
Expand Down Expand Up @@ -624,7 +624,7 @@ impl<K, V, let N: u32, B> HashMap<K, V, N, B> {
(hash + (attempt + attempt * attempt) / 2) % N
}

// Amount of elements in the table in relation to available slots exceeds α_max.
// Amount of elements in the table in relation to available slots exceeds alpha_max.
// To avoid a comparatively more expensive division operation
// we conduct cross-multiplication instead.
// n / m >= MAX_LOAD_FACTOR_NUMERATOR / MAX_LOAD_FACTOR_DEN0MINATOR
Expand Down
14 changes: 7 additions & 7 deletions noir_stdlib/src/ec/mod.nr
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
// ========
// The following three elliptic curve representations are admissible:
mod tecurve; // Twisted Edwards curves
mod swcurve; // Elliptic curves in Short Weierstraß form
mod swcurve; // Elliptic curves in Short Weierstrass form
mod montcurve; // Montgomery curves
mod consts; // Commonly used curve presets
//
// Note that Twisted Edwards and Montgomery curves are (birationally) equivalent, so that
// they may be freely converted between one another, whereas Short Weierstraß curves are
// they may be freely converted between one another, whereas Short Weierstrass curves are
// more general. Diagramatically:
//
// tecurve == montcurve swcurve
// tecurve == montcurve `subset` swcurve
//
// Each module is further divided into two submodules, 'affine' and 'curvegroup', depending
// on the preferred coordinate representation. Affine coordinates are none other than the usual
Expand Down Expand Up @@ -47,7 +47,7 @@ mod consts; // Commonly used curve presets
// coordinates by calling the `into_group` (resp. `into_affine`) method on them. Finally,
// Points may be freely mapped between their respective Twisted Edwards and Montgomery
// representations by calling the `into_montcurve` or `into_tecurve` methods. For mappings
// between Twisted Edwards/Montgomery curves and Short Weierstraß curves, see the Curve section
// between Twisted Edwards/Montgomery curves and Short Weierstrass curves, see the Curve section
// below, as the underlying mappings are those of curves rather than ambient spaces.
// As a rule, Points in affine (or CurveGroup) coordinates are mapped to Points in affine
// (resp. CurveGroup) coordinates.
Expand Down Expand Up @@ -91,21 +91,21 @@ mod consts; // Commonly used curve presets
// Curve configurations may also be converted between different curve representations by calling the `into_swcurve`,
// `into_montcurve` and `into_tecurve` methods subject to the relation between the curve representations mentioned
// above. Note that it is possible to map Points from a Twisted Edwards/Montgomery curve to the corresponding
// Short Weierstraß representation and back, and the methods to do so are exposed as `map_into_swcurve` and
// Short Weierstrass representation and back, and the methods to do so are exposed as `map_into_swcurve` and
// `map_from_swcurve`, which each take one argument, the point to be mapped.
//
// Curve maps
// ==========
// There are a few different ways of mapping Field elements to elliptic curves. Here we provide the simplified
// Shallue-van de Woestijne-Ulas and Elligator 2 methods, the former being applicable to all curve types
// provided above subject to the constraint that the coefficients of the corresponding Short Weierstraß curve satisfies
// provided above subject to the constraint that the coefficients of the corresponding Short Weierstrass curve satisfies
// a*b != 0 and the latter being applicable to Montgomery and Twisted Edwards curves subject to the constraint that
// the coefficients of the corresponding Montgomery curve satisfy j*k != 0 and (j^2 - 4)/k^2 is non-square.
//
// The simplified Shallue-van de Woestijne-Ulas method is exposed as the method `swu_map` on the Curve configuration and
// depends on two parameters, a Field element z != -1 for which g(x) - z is irreducible over Field and g(b/(z*a)) is
// square, where g(x) = x^3 + a*x + b is the right-hand side of the defining equation of the corresponding Short
// Weierstraß curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above,
// Weierstrass curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above,
// it may be determined using the scripts provided at <https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve> that z = 5.
//
// The Elligator 2 method is exposed as the method `elligator2_map` on the Curve configurations of Montgomery and
Expand Down
12 changes: 6 additions & 6 deletions noir_stdlib/src/ec/montcurve.nr
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ mod affine {
TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve())
}

// Conversion to equivalent Short Weierstraß curve
// Conversion to equivalent Short Weierstrass curve
pub fn into_swcurve(self) -> SWCurve {
let j = self.j;
let k = self.k;
Expand All @@ -155,7 +155,7 @@ mod affine {
SWCurve::new(a0, b0, self.map_into_swcurve(self.gen))
}

// Point mapping into equivalent Short Weierstraß curve
// Point mapping into equivalent Short Weierstrass curve
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
if p.is_zero() {
SWPoint::zero()
Expand All @@ -164,7 +164,7 @@ mod affine {
}
}

// Point mapping from equivalent Short Weierstraß curve
// Point mapping from equivalent Short Weierstrass curve
fn map_from_swcurve(self, p: SWPoint) -> Point {
let SWPoint {x, y, infty} = p;
let j = self.j;
Expand Down Expand Up @@ -347,7 +347,7 @@ mod curvegroup {
TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve())
}

// Conversion to equivalent Short Weierstraß curve
// Conversion to equivalent Short Weierstrass curve
fn into_swcurve(self) -> SWCurve {
let j = self.j;
let k = self.k;
Expand All @@ -357,12 +357,12 @@ mod curvegroup {
SWCurve::new(a0, b0, self.map_into_swcurve(self.gen))
}

// Point mapping into equivalent Short Weierstraß curve
// Point mapping into equivalent Short Weierstrass curve
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
self.into_affine().map_into_swcurve(p.into_affine()).into_group()
}

// Point mapping from equivalent Short Weierstraß curve
// Point mapping from equivalent Short Weierstrass curve
fn map_from_swcurve(self, p: SWPoint) -> Point {
self.into_affine().map_from_swcurve(p.into_affine()).into_group()
}
Expand Down
8 changes: 4 additions & 4 deletions noir_stdlib/src/ec/swcurve.nr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mod affine {
// Affine representation of Short Weierstraß curves
// Affine representation of Short Weierstrass curves
// Points are represented by two-dimensional Cartesian coordinates.
// Group operations are implemented in terms of those in CurveGroup (in this case, extended Twisted Edwards) coordinates
// for reasons of efficiency, cf. <https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates>.
Expand All @@ -10,7 +10,7 @@ mod affine {
use crate::cmp::Eq;

// Curve specification
pub struct Curve { // Short Weierstraß curve
pub struct Curve { // Short Weierstrass curve
// Coefficients in defining equation y^2 = x^3 + ax + b
a: Field,
b: Field,
Expand Down Expand Up @@ -187,14 +187,14 @@ mod affine {
}

mod curvegroup {
// CurveGroup representation of Weierstraß curves
// CurveGroup representation of Weierstrass curves
// Points are represented by three-dimensional Jacobian coordinates.
// See <https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates> for details.
use crate::ec::swcurve::affine;
use crate::cmp::Eq;

// Curve specification
pub struct Curve { // Short Weierstraß curve
pub struct Curve { // Short Weierstrass curve
// Coefficients in defining equation y^2 = x^3 + axz^4 + bz^6
a: Field,
b: Field,
Expand Down
Loading

0 comments on commit b1d0619

Please sign in to comment.