From 8a0e096bfb86ac9bb2a6df29d102e6afd1380e0c Mon Sep 17 00:00:00 2001 From: Matthew Craven Date: Thu, 15 Feb 2024 14:46:33 -0500 Subject: [PATCH] Remove remaining uses of FFI under -fpure-haskell (#660) All of these were standard C functions that GHC's JS backend actually somewhat supports; their shims can be found in the compiler source at "rts/js/mem.js". But it seems simpler to just get rid of all FFI uses with -fpure-haskell rather than try to keep track of which functions GHC supports. The pure Haskell implementation of memcmp runs about 6-7x as fast as the simple one-byte-at-a-time implementation for long equal buffers, which makes it... about the same speed as the pre-existing shim, even though the latter is also a one-byte- at-a-time implementation! Apparently GHC's JS backend is not yet able to produce efficient code for tight loops like these yet; the biggest problem is that it does not perform any loopification so each iteration must go through a generic-call indirection. Unfortunately that means that this patch probably makes 'strlen' and 'memchr' much slower with the JS backend. (cherry picked from commit 305604c4cfa6111c1d256053fb54c745afc95e5e) --- .gitignore | 1 + Data/ByteString/Builder/ASCII.hs | 3 - Data/ByteString/Builder/Prim/ASCII.hs | 5 +- Data/ByteString/Builder/Prim/Binary.hs | 2 +- Data/ByteString/Builder/RealFloat/Internal.hs | 2 +- Data/ByteString/Internal/Pure.hs | 66 +++++++++++++++++-- Data/ByteString/Internal/Type.hs | 61 +++++++++++------ Data/ByteString/Short/Internal.hs | 1 - .../{UnalignedWrite.hs => UnalignedAccess.hs} | 21 +++++- bytestring.cabal | 2 +- cbits/fpstring.c | 6 ++ 11 files changed, 134 insertions(+), 36 deletions(-) rename Data/ByteString/Utils/{UnalignedWrite.hs => UnalignedAccess.hs} (76%) diff --git a/.gitignore b/.gitignore index 56259fbd1..f5f4fd37e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ cabal.sandbox.config dist-newstyle/ cabal.project.local* .nvimrc +.ghc.environment* diff --git a/Data/ByteString/Builder/ASCII.hs b/Data/ByteString/Builder/ASCII.hs index c00912996..437339e09 100644 --- a/Data/ByteString/Builder/ASCII.hs +++ b/Data/ByteString/Builder/ASCII.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE ForeignFunctionInterface #-} -{-# LANGUAGE Trustworthy #-} - {-# OPTIONS_HADDOCK not-home #-} -- | Copyright : (c) 2010 - 2011 Simon Meier diff --git a/Data/ByteString/Builder/Prim/ASCII.hs b/Data/ByteString/Builder/Prim/ASCII.hs index 359c67220..8f764200f 100644 --- a/Data/ByteString/Builder/Prim/ASCII.hs +++ b/Data/ByteString/Builder/Prim/ASCII.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables, ForeignFunctionInterface #-} -{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE ScopedTypeVariables #-} -- | Copyright : (c) 2010 Jasper Van der Jeugt -- (c) 2010 - 2011 Simon Meier -- License : BSD3-style (see LICENSE) @@ -82,7 +81,7 @@ import Data.ByteString.Builder.Prim.Binary import Data.ByteString.Builder.Prim.Internal import Data.ByteString.Builder.Prim.Internal.Floating import Data.ByteString.Builder.Prim.Internal.Base16 -import Data.ByteString.Utils.UnalignedWrite +import Data.ByteString.Utils.UnalignedAccess import Data.Char (ord) diff --git a/Data/ByteString/Builder/Prim/Binary.hs b/Data/ByteString/Builder/Prim/Binary.hs index 437f6ab90..a0ed9996a 100644 --- a/Data/ByteString/Builder/Prim/Binary.hs +++ b/Data/ByteString/Builder/Prim/Binary.hs @@ -57,7 +57,7 @@ module Data.ByteString.Builder.Prim.Binary ( import Data.ByteString.Builder.Prim.Internal import Data.ByteString.Builder.Prim.Internal.Floating import Data.ByteString.Utils.ByteOrder -import Data.ByteString.Utils.UnalignedWrite +import Data.ByteString.Utils.UnalignedAccess import Foreign diff --git a/Data/ByteString/Builder/RealFloat/Internal.hs b/Data/ByteString/Builder/RealFloat/Internal.hs index 1d3ec08d4..636830f7b 100644 --- a/Data/ByteString/Builder/RealFloat/Internal.hs +++ b/Data/ByteString/Builder/RealFloat/Internal.hs @@ -73,7 +73,7 @@ import Data.ByteString.Internal (c2w) import Data.ByteString.Builder.Prim.Internal (BoundedPrim, boundedPrim) import Data.ByteString.Builder.RealFloat.TableGenerator import Data.ByteString.Utils.ByteOrder -import Data.ByteString.Utils.UnalignedWrite +import Data.ByteString.Utils.UnalignedAccess #if PURE_HASKELL import qualified Data.ByteString.Internal.Pure as Pure #else diff --git a/Data/ByteString/Internal/Pure.hs b/Data/ByteString/Internal/Pure.hs index b36927036..699bc358a 100644 --- a/Data/ByteString/Internal/Pure.hs +++ b/Data/ByteString/Internal/Pure.hs @@ -7,8 +7,12 @@ -- | Haskell implementation of C bits module Data.ByteString.Internal.Pure - ( -- * fpstring.c - intersperse + ( -- * standard string.h functions + strlen + , memchr + , memcmp + -- * fpstring.c + , intersperse , countOcc , countOccBA , reverseBytes @@ -38,13 +42,65 @@ import GHC.Int (Int8(..)) import Data.Bits (Bits(..), shiftR, (.&.)) import Data.Word -import Foreign.Ptr (plusPtr) +import Foreign.Ptr (plusPtr, nullPtr) import Foreign.Storable (Storable(..)) import Control.Monad (when) import Control.Exception (assert) +import Data.ByteString.Utils.ByteOrder +import Data.ByteString.Utils.UnalignedAccess + +---------------------------------------------------------------- +-- Haskell versions of standard functions in string.h +---------------------------------------------------------------- + +strlen :: Ptr Word8 -> IO Int +strlen = go 0 where + go :: Int -> Ptr Word8 -> IO Int + go !acc !p = do + c <- peek p + if | c == 0 -> pure acc + | nextAcc <- acc + 1 + , nextAcc >= 0 -> go nextAcc (p `plusPtr` 1) + | otherwise -> errorWithoutStackTrace + "bytestring: strlen: String length does not fit in a Haskell Int" + +memchr :: Ptr Word8 -> Word8 -> Int -> IO (Ptr Word8) +memchr !p !target !len + | len == 0 = pure nullPtr + | otherwise = assert (len > 0) $ do + c <- peek p + if c == target + then pure p + else memchr (p `plusPtr` 1) target (len - 1) + +memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int +memcmp !p1 !p2 !len + | len >= 8 = do + w1 <- unalignedReadU64 p1 + w2 <- unalignedReadU64 p2 + let toBigEndian = whenLittleEndian byteSwap64 + if | w1 == w2 + -> memcmp (p1 `plusPtr` 8) (p2 `plusPtr` 8) (len - 8) + | toBigEndian w1 < toBigEndian w2 + -> pure (0-1) + | otherwise -> pure 1 + | otherwise = memcmp1 p1 p2 len + +-- | Like 'memcmp', but definitely scans one byte at a time +memcmp1 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int +memcmp1 !p1 !p2 !len + | len == 0 = pure 0 + | otherwise = assert (len > 0) $ do + c1 <- peek p1 + c2 <- peek p2 + if | c1 == c2 -> memcmp1 (p1 `plusPtr` 1) (p2 `plusPtr` 1) (len - 1) + | c1 < c2 -> pure (0-1) + | otherwise -> pure 1 + + ---------------------------------------------------------------- --- Haskell version of functions in fpstring.c +-- Haskell versions of functions in fpstring.c ---------------------------------------------------------------- -- | duplicate a string, interspersing the character through the elements of the @@ -232,7 +288,7 @@ isValidUtf8' idx !len = go 0 ---------------------------------------------------------------- --- Haskell version of functions in itoa.c +-- Haskell versions of functions in itoa.c ---------------------------------------------------------------- diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs index 5bbce468f..625dcd2ff 100644 --- a/Data/ByteString/Internal/Type.hs +++ b/Data/ByteString/Internal/Type.hs @@ -1,11 +1,16 @@ -{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-} -{-# LANGUAGE UnliftedFFITypes, MagicHash, - UnboxedTuples #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE PatternSynonyms, ViewPatterns #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE Unsafe #-} + +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE TemplateHaskellQuotes #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE ViewPatterns #-} + {-# OPTIONS_HADDOCK not-home #-} -- | @@ -129,11 +134,12 @@ import Prelude hiding (concat, null) import qualified Data.List as List import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) -import Foreign.Ptr (Ptr, FunPtr, plusPtr) +import Foreign.Ptr import Foreign.Storable (Storable(..)) import Foreign.C.Types import Foreign.C.String (CString) import Foreign.Marshal.Utils +import Foreign.Marshal.Alloc (finalizerFree) #if PURE_HASKELL import qualified Data.ByteString.Internal.Pure as Pure @@ -1104,24 +1110,42 @@ accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -- Standard C functions -- +memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) +memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt +{-# DEPRECATED memset "Use Foreign.Marshal.Utils.fillBytes instead" #-} +-- | deprecated since @bytestring-0.11.5.0@ +memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) + +#if !PURE_HASKELL + foreign import ccall unsafe "string.h strlen" c_strlen :: CString -> IO CSize -foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer - :: FunPtr (Ptr Word8 -> IO ()) - foreign import ccall unsafe "string.h memchr" c_memchr :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) - -memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) memchr p w sz = c_memchr p (fromIntegral w) sz foreign import ccall unsafe "string.h memcmp" c_memcmp :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt - -memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt memcmp p q s = c_memcmp p q (fromIntegral s) +foreign import ccall unsafe "string.h memset" c_memset + :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) +memset p w sz = c_memset p (fromIntegral w) sz + +#else + +c_strlen :: CString -> IO CSize +c_strlen p = checkedCast <$!> Pure.strlen (castPtr p) + +memchr p w len = Pure.memchr p w (checkedCast len) + +memcmp p q s = checkedCast <$!> Pure.memcmp p q s + +memset p w len = p <$ fillBytes p w (checkedCast len) + +#endif + {-# DEPRECATED memcpy "Use Foreign.Marshal.Utils.copyBytes instead" #-} -- | deprecated since @bytestring-0.11.5.0@ memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO () @@ -1131,13 +1155,10 @@ memcpyFp :: ForeignPtr Word8 -> ForeignPtr Word8 -> Int -> IO () memcpyFp fp fq s = unsafeWithForeignPtr fp $ \p -> unsafeWithForeignPtr fq $ \q -> copyBytes p q s -foreign import ccall unsafe "string.h memset" c_memset - :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) +c_free_finalizer :: FunPtr (Ptr Word8 -> IO ()) +c_free_finalizer = finalizerFree + -{-# DEPRECATED memset "Use Foreign.Marshal.Utils.fillBytes instead" #-} --- | deprecated since @bytestring-0.11.5.0@ -memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -memset p w sz = c_memset p (fromIntegral w) sz -- --------------------------------------------------------------------- -- diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 0ed51ed1e..b0a91b3fd 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveLift #-} -{-# LANGUAGE ForeignFunctionInterface #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MagicHash #-} diff --git a/Data/ByteString/Utils/UnalignedWrite.hs b/Data/ByteString/Utils/UnalignedAccess.hs similarity index 76% rename from Data/ByteString/Utils/UnalignedWrite.hs rename to Data/ByteString/Utils/UnalignedAccess.hs index 60daffbe0..67ab5fdbb 100644 --- a/Data/ByteString/Utils/UnalignedWrite.hs +++ b/Data/ByteString/Utils/UnalignedAccess.hs @@ -1,3 +1,13 @@ +-- | +-- Module : Data.ByteString.Utils.UnalignedAccess +-- Copyright : (c) Matthew Craven 2023-2024 +-- License : BSD-style +-- Maintainer : clyring@gmail.com +-- Stability : internal +-- Portability : non-portable +-- +-- Primitives for reading and writing at potentially-unaligned memory locations + {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} @@ -5,12 +15,13 @@ #include "bytestring-cpp-macros.h" -module Data.ByteString.Utils.UnalignedWrite +module Data.ByteString.Utils.UnalignedAccess ( unalignedWriteU16 , unalignedWriteU32 , unalignedWriteU64 , unalignedWriteFloat , unalignedWriteDouble + , unalignedReadU64 ) where import Foreign.Ptr @@ -42,6 +53,10 @@ unalignedWriteDouble :: Double -> Ptr Word8 -> IO () unalignedWriteDouble = coerce $ \(D# x#) (Ptr p#) s -> (# writeWord8OffAddrAsDouble# p# 0# x# s, () #) +unalignedReadU64 :: Ptr Word8 -> IO Word64 +unalignedReadU64 = coerce $ \(Ptr p#) s + -> case readWord8OffAddrAsWord64# p# 0# s of + (# s', w64# #) -> (# s', W64# w64# #) #elif HS_UNALIGNED_POKES_OK import Foreign.Storable @@ -61,6 +76,8 @@ unalignedWriteFloat x p = poke (castPtr p) x unalignedWriteDouble :: Double -> Ptr Word8 -> IO () unalignedWriteDouble x p = poke (castPtr p) x +unalignedReadU64 :: Ptr Word8 -> IO Word64 +unalignedReadU64 p = peek (castPtr p) #else foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u16" @@ -73,5 +90,7 @@ foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsFloat" unalignedWriteFloat :: Float -> Ptr Word8 -> IO () foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsDouble" unalignedWriteDouble :: Double -> Ptr Word8 -> IO () +foreign import ccall unsafe "static fpstring.h fps_unaligned_read_u64" + unalignedReadU64 :: Ptr Word8 -> IO Word64 #endif diff --git a/bytestring.cabal b/bytestring.cabal index 1de5c78b4..d4bdb04b5 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -121,7 +121,7 @@ library Data.ByteString.ReadInt Data.ByteString.ReadNat Data.ByteString.Utils.ByteOrder - Data.ByteString.Utils.UnalignedWrite + Data.ByteString.Utils.UnalignedAccess default-language: Haskell2010 other-extensions: CPP, diff --git a/cbits/fpstring.c b/cbits/fpstring.c index 1c363e992..febec8011 100644 --- a/cbits/fpstring.c +++ b/cbits/fpstring.c @@ -134,6 +134,12 @@ void fps_unaligned_write_HsDouble(HsDouble x, uint8_t *p) { memcpy(p, &x, SIZEOF_HSDOUBLE); } +uint64_t fps_unaligned_read_u64(uint8_t *p) { + uint64_t ans; + memcpy(&ans, p, 8); + return ans; +} + /* count the number of occurrences of a char in a string */ size_t fps_count_naive(unsigned char *str, size_t len, unsigned char w) { size_t c;