Skip to content

Commit

Permalink
imp: Support tsv and ssv prefixes (simonmichael#2164)
Browse files Browse the repository at this point in the history
  • Loading branch information
reesmichael1 authored and adept committed Mar 8, 2024
1 parent 91f4da1 commit 3e33c94
Show file tree
Hide file tree
Showing 10 changed files with 118 additions and 34 deletions.
31 changes: 30 additions & 1 deletion hledger-lib/Hledger/Data/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -622,9 +622,38 @@ data Journal = Journal {
-- The data is partial, and list fields are in reverse order.
type ParsedJournal = Journal

-- | One of the standard *-separated value file types known by hledger,
data SepFormat
= Csv -- comma-separated
| Tsv -- tab-separated
| Ssv -- semicolon-separated
deriving Eq

-- | The id of a data format understood by hledger, eg @journal@ or @csv@.
-- The --output-format option selects one of these for output.
type StorageFormat = String
data StorageFormat
= Rules
| Journal'
| Ledger'
| Timeclock
| Timedot
| Sep SepFormat
deriving Eq

instance Show SepFormat where
show Csv = "csv"
show Ssv = "ssv"
show Tsv = "tsv"

instance Show StorageFormat where
show Rules = "rules"
show Journal' = "journal"
show Ledger' = "ledger"
show Timeclock = "timeclock"
show Timedot = "timedot"
show (Sep Csv) = "csv"
show (Sep Ssv) = "ssv"
show (Sep Tsv) = "tsv"

-- | Extra information found in a payee directive.
data PayeeDeclarationInfo = PayeeDeclarationInfo {
Expand Down
2 changes: 1 addition & 1 deletion hledger-lib/Hledger/Read/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ data Reader m = Reader {
,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal
}

instance Show (Reader m) where show r = rFormat r ++ " reader"
instance Show (Reader m) where show r = show (rFormat r) ++ " reader"

-- | Parse an InputOpts from a RawOpts and a provided date.
-- This will fail with a usage error if the forecast period expression cannot be parsed.
Expand Down
16 changes: 8 additions & 8 deletions hledger-lib/Hledger/Read/CsvReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ import Hledger.Read.RulesReader (readJournalFromCsv)

--- ** reader

reader :: MonadIO m => Reader m
reader = Reader
{rFormat = "csv"
,rExtensions = ["csv","tsv","ssv"]
,rReadFn = parse
reader :: MonadIO m => SepFormat -> Reader m
reader sep = Reader
{rFormat = Sep sep
,rExtensions = [show sep]
,rReadFn = parse sep
,rParser = error' "sorry, CSV files can't be included yet" -- PARTIAL:
}

Expand All @@ -54,10 +54,10 @@ reader = Reader
-- This file path is normally the CSV(/SSV/TSV) data file, and a corresponding rules file is inferred.
-- But it can also be the rules file, in which case the corresponding data file is inferred.
-- This does not check balance assertions.
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse iopts f t = do
parse :: SepFormat -> InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse sep iopts f t = do
let mrulesfile = mrules_file_ iopts
readJournalFromCsv (Right <$> mrulesfile) f t
readJournalFromCsv (Right <$> mrulesfile) f t (Just sep)
-- apply any command line account aliases. Can fail with a bad replacement pattern.
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
-- journalFinalise assumes the journal's items are
Expand Down
31 changes: 23 additions & 8 deletions hledger-lib/Hledger/Read/JournalReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -139,21 +139,25 @@ readers' = [
,TimeclockReader.reader
,TimedotReader.reader
,RulesReader.reader
,CsvReader.reader
,CsvReader.reader Csv
,CsvReader.reader Tsv
,CsvReader.reader Ssv
-- ,LedgerReader.reader
]

readerNames :: [String]
readerNames = map rFormat (readers'::[Reader IO])
readerNames = map (show . rFormat) (readers'::[Reader IO])

-- | @findReader mformat mpath@
--
-- Find the reader named by @mformat@, if provided.
-- ("ssv" and "tsv" are recognised as alternate names for the csv reader,
-- which also handles those formats.)
-- Or, if a file path is provided, find the first reader that handles
-- its file extension, if any.
findReader :: MonadIO m => Maybe StorageFormat -> Maybe FilePath -> Maybe (Reader m)
findReader Nothing Nothing = Nothing
findReader (Just fmt) _ = headMay [r | r <- readers', rFormat r == fmt]
findReader (Just fmt) _ = headMay [r | r <- readers', let rname = rFormat r, rname == fmt]
findReader Nothing (Just path) =
case prefix of
Just fmt -> headMay [r | r <- readers', rFormat r == fmt]
Expand All @@ -168,16 +172,27 @@ type PrefixedFilePath = FilePath

-- | If a filepath is prefixed by one of the reader names and a colon,
-- split that off. Eg "csv:-" -> (Just "csv", "-").
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
-- These reader prefixes can be used to force a specific reader,
-- overriding the file extension.
splitReaderPrefix :: PrefixedFilePath -> (Maybe StorageFormat, FilePath)
splitReaderPrefix f =
headDef (Nothing, f) $
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
let
candidates = [(Just r, drop (length r + 1) f) | r <- readerNames ++ ["ssv","tsv"], (r++":") `isPrefixOf` f]
(strPrefix, newF) = headDef (Nothing, f) candidates
in case strPrefix of
Just "csv" -> (Just (Sep Csv), newF)
Just "tsv" -> (Just (Sep Tsv), newF)
Just "ssv" -> (Just (Sep Ssv), newF)
Just "journal" -> (Just Journal', newF)
Just "timeclock" -> (Just Timeclock, newF)
Just "timedot" -> (Just Timedot, newF)
_ -> (Nothing, f)

--- ** reader

reader :: MonadIO m => Reader m
reader = Reader
{rFormat = "journal"
{rFormat = Journal'
,rExtensions = ["journal", "j", "hledger", "ledger"]
,rReadFn = parse
,rParser = journalp -- no need to add command line aliases like journalp'
Expand Down Expand Up @@ -282,7 +297,7 @@ includedirectivep = do
paths <- getFilePaths parentoff parentpos glb
let prefixedpaths = case mprefix of
Nothing -> paths
Just fmt -> map ((fmt++":")++) paths
Just fmt -> map ((show fmt++":")++) paths
forM_ prefixedpaths $ parseChild parentpos
void newline

Expand Down
31 changes: 18 additions & 13 deletions hledger-lib/Hledger/Read/RulesReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ _READER__________________________________________ = undefined -- VSCode outline

reader :: MonadIO m => Reader m
reader = Reader
{rFormat = "rules"
{rFormat = Rules
,rExtensions = ["rules"]
,rReadFn = parse
,rParser = error' "sorry, rules files can't be included" -- PARTIAL:
Expand Down Expand Up @@ -135,7 +135,7 @@ parse iopts f _ = do
then return nulljournal -- data file inferred from rules file name was not found
else do
t <- liftIO $ readFileOrStdinPortably dat
readJournalFromCsv (Just $ Left rules) dat t
readJournalFromCsv (Just $ Left rules) dat t Nothing
-- apply any command line account aliases. Can fail with a bad replacement pattern.
>>= liftEither . journalApplyAliases (aliasesFromOpts iopts)
-- journalFinalise assumes the journal's items are
Expand Down Expand Up @@ -855,9 +855,9 @@ _CSV_READING__________________________________________ = undefined
--
-- 4. Return the transactions as a Journal.
--
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> ExceptT String IO Journal
readJournalFromCsv Nothing "-" _ = throwError "please use --rules-file when reading CSV from stdin"
readJournalFromCsv merulesfile csvfile csvtext = do
readJournalFromCsv :: Maybe (Either CsvRules FilePath) -> FilePath -> Text -> Maybe SepFormat -> ExceptT String IO Journal
readJournalFromCsv Nothing "-" _ _ = throwError "please use --rules-file when reading CSV from stdin"
readJournalFromCsv merulesfile csvfile csvtext sep = do
-- for now, correctness is the priority here, efficiency not so much

rules <- case merulesfile of
Expand All @@ -879,14 +879,19 @@ readJournalFromCsv merulesfile csvfile csvtext = do
-- convert back to text and parse as csv records
let
csvtext1 = T.unlines csvlines2
separator =
case getDirective "separator" rules >>= parseSeparator of
Just c -> c
_ | ext == "ssv" -> ';'
_ | ext == "tsv" -> '\t'
_ -> ','
where
ext = map toLower $ drop 1 $ takeExtension csvfile
-- The separator in the rules file takes precedence over the extension or prefix
separator = case getDirective "separator" rules >>= parseSeparator of
Just c -> c
_ | ext == "ssv" -> ';'
_ | ext == "tsv" -> '\t'
_ ->
case sep of
Just Csv -> ','
Just Ssv -> ';'
Just Tsv -> '\t'
Nothing -> ','
where
ext = map toLower $ drop 1 $ takeExtension csvfile
-- parsec seemed to fail if you pass it "-" here -- TODO: try again with megaparsec
parsecfilename = if csvfile == "-" then "(stdin)" else csvfile
dbg6IO "using separator" separator
Expand Down
2 changes: 1 addition & 1 deletion hledger-lib/Hledger/Read/TimeclockReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ import Data.Text as T (strip)

reader :: MonadIO m => Reader m
reader = Reader
{rFormat = "timeclock"
{rFormat = Timeclock
,rExtensions = ["timeclock"]
,rReadFn = parse
,rParser = timeclockfilep
Expand Down
2 changes: 1 addition & 1 deletion hledger-lib/Hledger/Read/TimedotReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ import Data.List (group)

reader :: MonadIO m => Reader m
reader = Reader
{rFormat = "timedot"
{rFormat = Timedot
,rExtensions = ["timedot"]
,rReadFn = parse
,rParser = timedotp
Expand Down
2 changes: 1 addition & 1 deletion hledger/Hledger/Cli/CliOptions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ expandPathPreservingPrefix d prefixedf = do
let (p,f) = splitReaderPrefix prefixedf
f' <- expandPath d f
return $ case p of
Just p' -> p' ++ ":" ++ f'
Just p' -> (show p') ++ ":" ++ f'
Nothing -> f'

-- | Get the expanded, absolute output file path specified by an
Expand Down
13 changes: 13 additions & 0 deletions hledger/test/csv.test
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,19 @@ $ ./csvtest.sh

>=

# ** 59. specify ssv prefix and no extension
<
12/11/2019;Foo;123;10.23
RULES
fields date, description, , amount
date-format %d/%m/%Y
$ ./ssvtest.sh
2019-11-12 Foo
expenses:unknown 10.23
income:unknown -10.23

>=

# ** .
#<
#$ ./csvtest.sh
Expand Down
22 changes: 22 additions & 0 deletions hledger/test/ssvtest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/sh
#
# sh version, ported from bash so freebsd users can run these tests.
# This scripts expects stdin formatted like this:
# <multi-line ssv file (at least one line required, even if blank)>
# RULES
# <multi-line rules>
#
# Here, unlike in csvtest.sh, the ssv extension is intentionally NOT set
# This allows us to verify that the prefix detection is working

cat > t.$$.input
sed '1,/^RULES/d' t.$$.input > t.$$.rules
sed '/^RULES/,$d' t.$$.input > t.$$

trap 'rm -f t.$$.input t.$$ t.$$.rules t.$$.stderr' EXIT

# Remove variable file name from error messages
mkfifo t.$$.stderr
sed -Ee "s/t\.$$/input/" t.$$.stderr >&2 &

hledger -f ssv:t.$$ print "$@" 2> t.$$.stderr

0 comments on commit 3e33c94

Please sign in to comment.