Skip to content

Commit

Permalink
Improve benchmarks for small Builders (#680)
Browse files Browse the repository at this point in the history
* Improve benchmarks for small Builders

   * Do not measure the overhead of allocating destination chunks
   * Add several more benchmarks for P.cstring and P.cstringUtf8

* More benchmark fiddling

* Update "since" markers for new NFData instances
  • Loading branch information
clyring authored and Bodigrim committed Oct 9, 2024
1 parent f0edbfe commit 8feb70e
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 19 deletions.
12 changes: 12 additions & 0 deletions Data/ByteString/Builder/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ module Data.ByteString.Builder.Internal (
) where

import Control.Arrow (second)
import Control.DeepSeq (NFData(..))

import Data.Semigroup (Semigroup(..))
import Data.List.NonEmpty (NonEmpty(..))
Expand Down Expand Up @@ -155,11 +156,22 @@ import System.IO.Unsafe (unsafeDupablePerformIO)
data BufferRange = BufferRange {-# UNPACK #-} !(Ptr Word8) -- First byte of range
{-# UNPACK #-} !(Ptr Word8) -- First byte /after/ range

-- | @since 0.12.2.0
instance NFData BufferRange where
rnf !_ = ()

-- | A 'Buffer' together with the 'BufferRange' of free bytes. The filled
-- space starts at offset 0 and ends at the first free byte.
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
{-# UNPACK #-} !BufferRange

-- | Like the @NFData@ instance for @StrictByteString@,
-- this does not force the @ForeignPtrContents@ field
-- of the underlying @ForeignPtr@.
--
-- @since 0.12.2.0
instance NFData Buffer where
rnf !_ = ()

-- | Combined size of the filled and free space in the buffer.
{-# INLINE bufferSize #-}
Expand Down
106 changes: 87 additions & 19 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,29 @@ import Data.Monoid
import Data.Semigroup
import Data.String
import Test.Tasty.Bench

import Prelude hiding (words)
import qualified Data.List as List
import Control.DeepSeq
import Control.Exception

import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as L8

import Data.ByteString.Builder
import Data.ByteString.Builder.Extra (byteStringCopy,
byteStringInsert,
intHost)
import Data.ByteString.Builder.Internal (ensureFree)
import qualified Data.ByteString.Builder.Extra as Extra
import qualified Data.ByteString.Builder.Internal as BI
import Data.ByteString.Builder.Prim (BoundedPrim, FixedPrim,
(>$<))
import qualified Data.ByteString.Builder.Prim as P
import qualified Data.ByteString.Builder.Prim.Internal as PI

import Foreign
import Foreign.ForeignPtr
import qualified GHC.Exts as Exts
import GHC.Ptr (Ptr(..))

import System.Random

Expand Down Expand Up @@ -121,15 +124,45 @@ loremIpsum = S8.unlines $ map S8.pack
-- benchmark wrappers
---------------------

{-# INLINE benchB #-}
benchB :: String -> a -> (a -> Builder) -> Benchmark
benchB name x b =
bench (name ++" (" ++ show nRepl ++ ")") $
whnf (L.length . toLazyByteString . b) x
{-# INLINE benchB #-}
benchB name x b = benchB' (name ++" (" ++ show nRepl ++ ")") x b

{-# INLINE benchB' #-}
benchB' :: String -> a -> (a -> Builder) -> Benchmark
benchB' name x b = bench name $ whnf (L.length . toLazyByteString . b) x
{-# INLINE benchB' #-}
benchB' name x mkB =
env (BI.newBuffer BI.defaultChunkSize) $ \buf ->
bench name $ whnfAppIO (runBuildStepOn buf . BI.runBuilder . mkB) x

benchB'_ :: String -> Builder -> Benchmark
{-# INLINE benchB'_ #-}
benchB'_ name b =
env (BI.newBuffer BI.defaultChunkSize) $ \buf ->
bench name $ whnfIO (runBuildStepOn buf (BI.runBuilder b))

-- | @runBuilderOn@ runs a @BuildStep@'s actions all on the same @Buffer@.
-- It is used to avoid measuring driver allocation overhead.
runBuildStepOn :: BI.Buffer -> BI.BuildStep () -> IO ()
{-# NOINLINE runBuildStepOn #-}
runBuildStepOn (BI.Buffer fp br@(BI.BufferRange op ope)) b = go b
where
!len = ope `minusPtr` op

go :: BI.BuildStep () -> IO ()
go bs = BI.fillWithBuildStep bs doneH fullH insertChunkH br

doneH :: Ptr Word8 -> () -> IO ()
doneH _ _ = touchForeignPtr fp
-- 'touchForeignPtr' is adequate because the given BuildStep
-- will always terminate. (We won't measure an infinite loop!)

fullH :: Ptr Word8 -> Int -> BI.BuildStep () -> IO ()
fullH _ minLen nextStep
| len < minLen = throwIO (ErrorCall "runBuilderOn: action expects too long of a BufferRange")
| otherwise = go nextStep

insertChunkH :: Ptr Word8 -> S.ByteString -> BI.BuildStep () -> IO ()
insertChunkH _ _ nextStep = go nextStep

{-# INLINE benchBInts #-}
benchBInts :: String -> ([Int] -> Builder) -> Benchmark
Expand Down Expand Up @@ -247,18 +280,53 @@ largeTraversalInput = S.concat (replicate 10 byteStringData)
smallTraversalInput :: S.ByteString
smallTraversalInput = S8.pack "The quick brown fox"

asciiBuf, utf8Buf, halfNullBuf, allNullBuf :: Ptr Word8
asciiBuf = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
utf8Buf = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xc0\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
halfNullBuf = Ptr "\xc0\x80xx\xc0\x80x\xc0\x80\xc0\x80x\xc0\x80\xc0\x80xx\xc0\x80\xc0\x80xxx\xc0\x80x\xc0\x80x\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80xxx\xc0\x80x\xc0\x80xx\xc0\x80\xc0\x80xxxxxxxxxx\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80x\xc0\x80\xc0\x80x\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80xxx"#
allNullBuf = Ptr "\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80"#

asciiLit, utf8Lit :: Ptr Word8 -> Builder
asciiLit (Ptr p#) = P.cstring p#
utf8Lit (Ptr p#) = P.cstringUtf8 p#

asciiStr, utf8Str :: String
asciiStr = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
utf8Str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

main :: IO ()
main = do
defaultMain
[ bgroup "Data.ByteString.Builder"
[ bgroup "Small payload"
[ benchB' "mempty" () (const mempty)
, benchB' "ensureFree 8" () (const (ensureFree 8))
, benchB' "intHost 1" 1 intHost
, benchB' "UTF-8 String (naive)" "hello world\0" fromString
, benchB' "UTF-8 String" () $ \() -> P.cstringUtf8 "hello world\0"#
, benchB' "String (naive)" "hello world!" fromString
, benchB' "String" () $ \() -> P.cstring "hello world!"#
[ benchB'_ "mempty" mempty
, bench "toLazyByteString mempty" $ nf toLazyByteString mempty
, benchB'_ "empty (10000 times)" $
stimes (10000 :: Int) (Exts.lazy BI.empty)
, benchB'_ "ensureFree 8" (BI.ensureFree 8)
, benchB' "intHost 1" 1 Extra.intHost
, benchB' "UTF-8 String (12B, naive)" "hello world\0" fromString
, benchB'_ "UTF-8 String (12B)" $ utf8Lit (Ptr "hello world\xc0\x80"#)
, benchB' "UTF-8 String (64B, naive)" utf8Str fromString
, benchB'_ "UTF-8 String (64B, one null)" $ utf8Lit utf8Buf
, benchB'
"UTF-8 String (64B, one null, no shared work)"
utf8Buf
utf8Lit
, benchB'_ "UTF-8 String (64B, half nulls)" $ utf8Lit halfNullBuf
, benchB'_ "UTF-8 String (64B, all nulls)" $ utf8Lit allNullBuf
, benchB'
"UTF-8 String (64B, all nulls, no shared work)"
allNullBuf
utf8Lit
, benchB'
"UTF-8 String (1 byte, no shared work)"
(Ptr "\xc0\x80"#)
utf8Lit
, benchB' "ASCII String (12B, naive)" "hello world!" fromString
, benchB'_ "ASCII String (12B)" $ asciiLit (Ptr "hello wurld!"#)
, benchB' "ASCII String (64B, naive)" asciiStr fromString
, benchB'_ "ASCII String (64B)" $ asciiLit asciiBuf
]

, bgroup "Encoding wrappers"
Expand All @@ -275,11 +343,11 @@ main = do
]
, bgroup "ByteString insertion" $
[ benchB "foldMap byteStringInsert" byteStringChunksData
(foldMap byteStringInsert)
(foldMap Extra.byteStringInsert)
, benchB "foldMap byteString" byteStringChunksData
(foldMap byteString)
, benchB "foldMap byteStringCopy" byteStringChunksData
(foldMap byteStringCopy)
(foldMap Extra.byteStringCopy)
]

, bgroup "Non-bounded encodings"
Expand Down

0 comments on commit 8feb70e

Please sign in to comment.