Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making ByteString.hGetLine behave like System.IO.hGetLine #327

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1816,31 +1816,48 @@ hGetLine h =
else ioe_EOF
else haveBuf h_ buf' len xss

haveBuf h_@Handle__{haByteBuffer}
haveBuf h_@Handle__{haByteBuffer, haInputNL}
buf@Buffer{ bufRaw=raw, bufR=w, bufL=r }
len xss =
do
off <- findEOL r w raw
(off, sizeNewline) <- findEOL haInputNL r w raw
let new_len = len + off - r
xs <- mkPS raw r off

-- if eol == True, then off is the offset of the '\n'
-- otherwise off == w and the buffer is now empty.
if off /= w
then do if w == off + 1
then writeIORef haByteBuffer buf{ bufL=0, bufR=0 }
else writeIORef haByteBuffer buf{ bufL = off + 1 }
mkBigPS new_len (xs:xss)
then do
-- If off + sizeNewline == w then the remaining buffer is empty
if (off + sizeNewline) == w
then writeIORef haByteBuffer buf{ bufL=0, bufR=0 }
else writeIORef haByteBuffer buf{ bufL = off + sizeNewline }
mkBigPS new_len (xs:xss)
else fill h_ buf{ bufL=0, bufR=0 } new_len (xs:xss)

-- find the end-of-line character, if there is one
findEOL r w raw
| r == w = return w
findEOL haInputNL r w raw
| r == w = return (w, 0)
| otherwise = do
c <- readWord8Buf raw r
if c == fromIntegral (ord '\n')
then return r -- NB. not r+1: don't include the '\n'
else findEOL (r+1) w raw
then do
-- NB. not r+1: don't include the '\n'
-- Also, it is important that it ends the line in both modes
-- To match System.IO.hGetLine's behavior
return (r, 1)
else if haInputNL == CRLF && c == fromIntegral (ord '\r') && r+1 < w
then do
c' <- readWord8Buf raw (r+1)
if c' == fromIntegral (ord '\n')
then return (r, 2) -- NB. not r+1 or r+2: don't include the '\r\n'
else do
-- We cannot jump 2 characters ahead
-- because if we encountered '\r\r\n'
-- We would miss the pattern starting on the second '\r'
findEOL haInputNL (r+1) w raw
else findEOL haInputNL (r+1) w raw


mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString
mkPS buf start end =
Expand Down
24 changes: 23 additions & 1 deletion tests/Properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,27 @@ prop_read_write_file_D x = unsafePerformIO $ do
(const $ do y <- D.readFile f
return (x==y))

prop_hgetline_like_s8_hgetline (LinedASCII filetext) (crlfIn, crlfOut) = idempotentIOProperty $ do
(fn, h) <- openTempFile "." "hgetline-prop-test.tmp"
hSetNewlineMode h noNewlineTranslation -- This is to ensure strings like \n are covered on Windows.
hPutStr h filetext
hClose h
bsLines <- readFileByLines C.hGetLine fn
sLines <- readFileByLines System.IO.hGetLine fn
removeFile fn
return (map C.unpack bsLines === sLines)
where
newlineMode = NewlineMode (if crlfIn then CRLF else LF) (if crlfOut then CRLF else LF)
readFileByLines getLine fn = withFile fn ReadMode $ \h -> do
hSetNewlineMode h newlineMode
readByLines getLine h
readByLines getLine h = do
isEnd <- hIsEOF h
if isEnd
then return []
else (:) <$> getLine h <*> readByLines getLine h


------------------------------------------------------------------------

prop_append_file_P x y = unsafePerformIO $ do
Expand Down Expand Up @@ -1791,7 +1812,8 @@ io_tests =
, testProperty "appendFile " prop_append_file_D

, testProperty "packAddress " prop_packAddress


, testProperty "pack.hGetLine=hGetLine" prop_hgetline_like_s8_hgetline
]

misc_tests =
Expand Down
16 changes: 16 additions & 0 deletions tests/QuickCheckUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ instance Arbitrary String8 where
toChar :: Word8 -> Char
toChar = toEnum . fromIntegral

-- | Strings, but each char is ASCII and there are a lot of newlines generated
--
newtype LinedASCII = LinedASCII String
deriving (Eq, Ord, Show)

instance Arbitrary LinedASCII where
arbitrary = fmap LinedASCII . listOf . oneof $
[ arbitraryASCIIChar
, elements ['\n', '\r']
]

shrink (LinedASCII s) = fmap LinedASCII (shrink s)

instance CoArbitrary LinedASCII where
coarbitrary (LinedASCII s) = coarbitrary s

------------------------------------------------------------------------
--
-- We're doing two forms of testing here. Firstly, model based testing.
Expand Down