diff --git a/CHANGELOG.md b/CHANGELOG.md index e408d4f..504a045 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## Unreleased +## v1.4.0 - 29 March 2024 +- Fix bug where trailing comma was causing error + ## v1.3.1 - 1 February 2024 - Update to gleam_stdlib = "~> 0.34 or ~> 1.0" in preparation for 1.0 diff --git a/gleam.toml b/gleam.toml index a73f87d..f1d7b54 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,5 +1,5 @@ name = "gsv" -version = "1.3.1" +version = "1.4.0" gleam = ">= 0.32.0" description = "A simple csv parser and generator written in gleam " diff --git a/src/gsv.gleam b/src/gsv.gleam index 297a819..60dae51 100644 --- a/src/gsv.gleam +++ b/src/gsv.gleam @@ -1,9 +1,9 @@ -import gsv/internal/ast.{ParseError} -import gsv/internal/token.{Location} +import gleam/int import gleam/list -import gleam/string import gleam/result -import gleam/int +import gleam/string +import gsv/internal/ast.{ParseError} +import gsv/internal/token.{Location} /// Parses a csv string to a list of lists of strings. /// Automatically handles Windows and Unix line endings. diff --git a/src/gsv/internal/ast.gleam b/src/gsv/internal/ast.gleam index 7a526a6..0bf657f 100644 --- a/src/gsv/internal/ast.gleam +++ b/src/gsv/internal/ast.gleam @@ -92,33 +92,46 @@ fn parse_p( // If we just parsed a comma, we're expecting an Escaped or Non-Escaped string, or another comma // (indicating an empty string) - [#(Textdata(str), _), ..remaining_tokens], JustParsedComma, [ - curr_line, - ..previously_parsed_lines - ] -> + [#(Textdata(str), _), ..remaining_tokens], + JustParsedComma, + [curr_line, ..previously_parsed_lines] -> parse_p(remaining_tokens, JustParsedField, [ [str, ..curr_line], ..previously_parsed_lines ]) - [#(Doublequote, _), ..remaining_tokens], JustParsedComma, [ - curr_line, - ..previously_parsed_lines - ] -> + [#(Doublequote, _), ..remaining_tokens], + JustParsedComma, + [curr_line, ..previously_parsed_lines] -> parse_p(remaining_tokens, InsideEscapedString, [ ["", ..curr_line], ..previously_parsed_lines ]) - [#(Comma, _), ..remaining_tokens], JustParsedComma, [ - curr_line, - ..previously_parsed_lines - ] -> + [#(Comma, _), ..remaining_tokens], + JustParsedComma, + [curr_line, ..previously_parsed_lines] -> parse_p(remaining_tokens, JustParsedComma, [ ["", ..curr_line], ..previously_parsed_lines ]) + [#(CR, _), ..remaining_tokens], + JustParsedComma, + [curr_line, ..previously_parsed_lines] -> + parse_p(remaining_tokens, JustParsedCR, [ + ["", ..curr_line], + ..previously_parsed_lines + ]) + + [#(LF, _), ..remaining_tokens], + JustParsedComma, + [curr_line, ..previously_parsed_lines] -> + parse_p(remaining_tokens, JustParsedNewline, [ + ["", ..curr_line], + ..previously_parsed_lines + ]) + [#(tok, loc), ..], JustParsedComma, _ -> Error(ParseError( loc, @@ -130,10 +143,9 @@ fn parse_p( [#(Textdata(str), _), ..remaining_tokens], JustParsedNewline, llf -> parse_p(remaining_tokens, JustParsedField, [[str], ..llf]) - [#(Doublequote, _), ..remaining_tokens], JustParsedNewline, [ - curr_line, - ..previously_parsed_lines - ] -> + [#(Doublequote, _), ..remaining_tokens], + JustParsedNewline, + [curr_line, ..previously_parsed_lines] -> parse_p(remaining_tokens, InsideEscapedString, [ ["", ..curr_line], ..previously_parsed_lines @@ -148,10 +160,9 @@ fn parse_p( // If we're inside an escaped string, we can take anything until we get a double quote, // but a double double quote "" escapes the double quote and we keep parsing - [#(Doublequote, _), #(Doublequote, _), ..remaining_tokens], InsideEscapedString, [ - [str, ..rest_curr_line], - ..previously_parsed_lines - ] -> + [#(Doublequote, _), #(Doublequote, _), ..remaining_tokens], + InsideEscapedString, + [[str, ..rest_curr_line], ..previously_parsed_lines] -> parse_p(remaining_tokens, InsideEscapedString, [ [str <> "\"", ..rest_curr_line], ..previously_parsed_lines @@ -160,10 +171,9 @@ fn parse_p( [#(Doublequote, _), ..remaining_tokens], InsideEscapedString, llf -> parse_p(remaining_tokens, JustParsedField, llf) - [#(other_token, _), ..remaining_tokens], InsideEscapedString, [ - [str, ..rest_curr_line], - ..previously_parsed_lines - ] -> + [#(other_token, _), ..remaining_tokens], + InsideEscapedString, + [[str, ..rest_curr_line], ..previously_parsed_lines] -> parse_p(remaining_tokens, InsideEscapedString, [ [str <> token.to_lexeme(other_token), ..rest_curr_line], ..previously_parsed_lines diff --git a/src/gsv/internal/token.gleam b/src/gsv/internal/token.gleam index 4bf03ab..98f2aef 100644 --- a/src/gsv/internal/token.gleam +++ b/src/gsv/internal/token.gleam @@ -8,8 +8,8 @@ //// escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE //// non-escaped = *TEXTDATA -import gleam/string import gleam/list +import gleam/string pub type CsvToken { Comma diff --git a/test/gsv_test.gleam b/test/gsv_test.gleam index ea1042c..81b61a7 100644 --- a/test/gsv_test.gleam +++ b/test/gsv_test.gleam @@ -1,14 +1,14 @@ +import gleam/int +import gleam/list +import gleam/result +import gleam/string import gleeunit import gleeunit/should +import gsv.{Unix, Windows} +import gsv/internal/ast.{ParseError, parse} import gsv/internal/token.{ CR, Comma, Doublequote, LF, Location, Textdata, scan, with_location, } -import gsv/internal/ast.{ParseError, parse} -import gsv.{Unix, Windows} -import gleam/list -import gleam/result -import gleam/int -import gleam/string pub fn main() { gleeunit.main() @@ -161,13 +161,13 @@ pub fn error_cases_test() { produce_error("Ben, 25,\n, TRUE") |> should.equal(#( - Location(1, 9), - "Expected escaped or non-escaped string after comma, found: \n", + Location(2, 1), + "Expected escaped or non-escaped string after newline, found: ,", )) produce_error("Austin, 25, FALSE\n\"Ben Peinhardt\", 25,\n, TRUE") |> should.equal(#( - Location(2, 21), - "Expected escaped or non-escaped string after comma, found: \n", + Location(3, 1), + "Expected escaped or non-escaped string after newline, found: ,", )) } @@ -180,3 +180,11 @@ pub fn totally_errors_test() { |> gsv.to_lists_or_error |> should.equal(Ok([["Ben", " 25", "", " TRUE"]])) } + +pub fn trailing_commas_fine_test() { + "Ben, 25, TRUE, Hello\nAustin, 25,\n" + |> gsv.to_lists + |> should.equal( + Ok([["Ben", " 25", " TRUE", " Hello"], ["Austin", " 25", ""]]), + ) +}