Skip to content

Commit

Permalink
v2
Browse files Browse the repository at this point in the history
  • Loading branch information
bcpeinhardt committed Sep 24, 2024
1 parent 0dc40ea commit 117880f
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 72 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

## v2.0.0 - 24 September 2024
- Now there are four public functions, `to_lists`, `to_dicts`, `from_lists` and `from_dicts`.

## v1.4.0 - 29 March 2024
- Fix bug where trailing comma was causing error

Expand Down
15 changes: 10 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
[![Package Version](https://img.shields.io/hexpm/v/gsv)](https://hex.pm/packages/gsv)
[![Hex Docs](https://img.shields.io/badge/hex-docs-ffaff3)](https://hexdocs.pm/gsv/)

This is a simple csv parser and writer for gleam. It will get more performant in the future,
but if you're looking for high performance now, I'd recommend doing ffi to an existing parser
in your target runtime.

We are using the grammar from [rfc 4180](https://datatracker.ietf.org/doc/html/rfc4180#section-2)
This is a simple csv parser and writer for Gleam. It will get more performant/battle tested in the future,
but if you're looking for that now, I'd recommend doing ffi to an existing parser in your target runtime.

#### Example

Expand All @@ -23,6 +20,14 @@ pub fn main() {
// Write a List(List(String)) to a CSV string
let csv_str = records
|> gsv.from_lists(separator: ",", line_ending: Windows)
// Parse a CSV string with headers to a List(Dict(String, String))
let assert Ok(records) = gsv.to_dicts(csv_str)
// => [ dict.from_list([ #("Hello", "Goodbye"), #("World", "Mars") ]) ]
// Write a List(Dict(String, String)) to a CSV string, treating the keys as the header row
let csv_str = records
|> gsv.from_dicts(separator: ",", line_ending: Windows)
}
```

Expand Down
2 changes: 1 addition & 1 deletion gleam.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "gsv"
version = "1.4.0"
version = "2.0.0"
gleam = ">= 0.32.0"
description = "A simple csv parser and generator written in gleam "

Expand Down
114 changes: 76 additions & 38 deletions src/gsv.gleam
Original file line number Diff line number Diff line change
@@ -1,51 +1,18 @@
import gleam/dict.{type Dict}
import gleam/int
import gleam/list
import gleam/pair
import gleam/result
import gleam/string
import gsv/internal/ast.{ParseError}
import gsv/internal/token.{Location}

/// Parses a csv string to a list of lists of strings.
/// Automatically handles Windows and Unix line endings.
pub fn to_lists(input: String) -> Result(List(List(String)), Nil) {
input
|> token.scan
|> token.with_location
|> ast.parse
|> result.nil_error
}

/// Parses a csv string to a list of lists of strings.
/// Automatically handles Windows and Unix line endings.
/// Panics with an error msg if the string is not valid csv.
pub fn to_lists_or_panic(input: String) -> List(List(String)) {
let res =
input
|> token.scan
|> token.with_location
|> ast.parse

case res {
Ok(lol) -> lol
Error(ParseError(Location(line, column), msg)) -> {
panic as {
"["
<> "line "
<> int.to_string(line)
<> " column "
<> int.to_string(column)
<> "] of csv: "
<> msg
}
[[]]
}
}
}

/// Parses a csv string to a list of lists of strings.
/// Automatically handles Windows and Unix line endings.
/// Returns a string error msg if the string is not valid csv.
pub fn to_lists_or_error(input: String) -> Result(List(List(String)), String) {
/// Unquoted strings are trimmed, while quoted strings have leading and trailing
/// whitespace preserved.
pub fn to_lists(input: String) -> Result(List(List(String)), String) {
input
|> token.scan
|> token.with_location
Expand All @@ -62,6 +29,43 @@ pub fn to_lists_or_error(input: String) -> Result(List(List(String)), String) {
})
}

/// Parses a csv string to a list of dicts.
/// Automatically handles Windows and Unix line endings.
/// Returns a string error msg if the string is not valid csv.
/// Unquoted strings are trimmed, while quoted strings have leading and trailing
/// whitespace preserved.
/// Whitespace only or empty strings are not valid headers and will be ignored.
/// Whitespace only or empty strings are not considered "present" in the csv row and
/// are not inserted into the row dict.
pub fn to_dicts(input: String) -> Result(List(Dict(String, String)), String) {
use lol <- result.try(to_lists(input))
case lol {
[] -> []
[headers, ..rows] -> {
let headers =
list.index_fold(headers, dict.new(), fn(acc, x, i) {
case string.trim(x) == "" {
True -> acc
False -> dict.insert(acc, i, x)
}
})

list.map(rows, fn(row) {
use acc, x, i <- list.index_fold(row, dict.new())
case dict.get(headers, i) {
Error(Nil) -> acc
Ok(h) ->
case string.trim(x) {
"" -> acc
t -> dict.insert(acc, string.trim(h), t)
}
}
})
}
}
|> Ok
}

/// Option for using "\n = LF = Unix" or "\r\n = CRLF = Windows"
/// line endings. Use with the `from_lists` function when
/// writing to a csv string.
Expand Down Expand Up @@ -107,3 +111,37 @@ pub fn from_lists(
|> list.map(fn(row) { string.join(row, separator) })
|> string.join(le_to_string(line_ending))
}

/// Takes a list of dicts and writes it to a csv string.
/// Will automatically escape strings that contain double quotes or
/// line endings with double quotes (in csv, double quotes get escaped by doing
/// a double doublequote)
/// The string `he"llo\n` becomes `"he""llo\n"`
pub fn from_dicts(
input: List(Dict(String, String)),
separator separator: String,
line_ending line_ending: LineEnding,
) -> String {
case input {
[] -> ""
[first_row, ..] -> {
let headers =
first_row
|> dict.to_list
|> list.map(pair.first)
|> list.sort(string.compare)

let rows =
list.map(input, fn(row) {
row
|> dict.to_list
|> list.sort(fn(lhs, rhs) {
string.compare(pair.first(lhs), pair.first(rhs))
})
|> list.map(pair.second)
})

from_lists([headers, ..rows], separator, line_ending)
}
}
}
31 changes: 20 additions & 11 deletions src/gsv/internal/ast.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import gleam/list
import gleam/result
import gleam/string
import gsv/internal/token.{
type CsvToken, type Location, CR, Comma, Doublequote, LF, Location, Textdata,
}
Expand Down Expand Up @@ -53,7 +54,7 @@ fn parse_p(

// File should begin with either Escaped or Nonescaped string
[#(Textdata(str), _), ..remaining_tokens], Beginning, [] ->
parse_p(remaining_tokens, JustParsedField, [[str]])
parse_p(remaining_tokens, JustParsedField, [[string.trim(str)]])

[#(Doublequote, _), ..remaining_tokens], Beginning, [] ->
parse_p(remaining_tokens, InsideEscapedString, [[""]])
Expand Down Expand Up @@ -94,39 +95,44 @@ fn parse_p(
// (indicating an empty string)
[#(Textdata(str), _), ..remaining_tokens],
JustParsedComma,
[curr_line, ..previously_parsed_lines] ->
[curr_line, ..previously_parsed_lines]
->
parse_p(remaining_tokens, JustParsedField, [
[str, ..curr_line],
[string.trim(str), ..curr_line],
..previously_parsed_lines
])

[#(Doublequote, _), ..remaining_tokens],
JustParsedComma,
[curr_line, ..previously_parsed_lines] ->
[curr_line, ..previously_parsed_lines]
->
parse_p(remaining_tokens, InsideEscapedString, [
["", ..curr_line],
..previously_parsed_lines
])

[#(Comma, _), ..remaining_tokens],
JustParsedComma,
[curr_line, ..previously_parsed_lines] ->
[curr_line, ..previously_parsed_lines]
->
parse_p(remaining_tokens, JustParsedComma, [
["", ..curr_line],
..previously_parsed_lines
])

[#(CR, _), ..remaining_tokens],
JustParsedComma,
[curr_line, ..previously_parsed_lines] ->
[curr_line, ..previously_parsed_lines]
->
parse_p(remaining_tokens, JustParsedCR, [
["", ..curr_line],
..previously_parsed_lines
])

[#(LF, _), ..remaining_tokens],
JustParsedComma,
[curr_line, ..previously_parsed_lines] ->
[curr_line, ..previously_parsed_lines]
->
parse_p(remaining_tokens, JustParsedNewline, [
["", ..curr_line],
..previously_parsed_lines
Expand All @@ -141,11 +147,12 @@ fn parse_p(

// If we just parsed a new line, we're expecting an escaped or non-escaped string
[#(Textdata(str), _), ..remaining_tokens], JustParsedNewline, llf ->
parse_p(remaining_tokens, JustParsedField, [[str], ..llf])
parse_p(remaining_tokens, JustParsedField, [[string.trim(str)], ..llf])

[#(Doublequote, _), ..remaining_tokens],
JustParsedNewline,
[curr_line, ..previously_parsed_lines] ->
[curr_line, ..previously_parsed_lines]
->
parse_p(remaining_tokens, InsideEscapedString, [
["", ..curr_line],
..previously_parsed_lines
Expand All @@ -162,7 +169,8 @@ fn parse_p(
// but a double double quote "" escapes the double quote and we keep parsing
[#(Doublequote, _), #(Doublequote, _), ..remaining_tokens],
InsideEscapedString,
[[str, ..rest_curr_line], ..previously_parsed_lines] ->
[[str, ..rest_curr_line], ..previously_parsed_lines]
->
parse_p(remaining_tokens, InsideEscapedString, [
[str <> "\"", ..rest_curr_line],
..previously_parsed_lines
Expand All @@ -173,7 +181,8 @@ fn parse_p(

[#(other_token, _), ..remaining_tokens],
InsideEscapedString,
[[str, ..rest_curr_line], ..previously_parsed_lines] ->
[[str, ..rest_curr_line], ..previously_parsed_lines]
->
parse_p(remaining_tokens, InsideEscapedString, [
[str <> token.to_lexeme(other_token), ..rest_curr_line],
..previously_parsed_lines
Expand Down
Loading

0 comments on commit 117880f

Please sign in to comment.