From 7e879110e1c5bcfce6eaeac499751002ee68b878 Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Wed, 14 Feb 2024 17:21:50 -0500
Subject: [PATCH 1/3] Move all endianness/byte-order CPP into one module

---
 Data/ByteString/Builder/Prim/Binary.hs        | 42 ++++---------------
 Data/ByteString/Builder/RealFloat/Internal.hs | 29 ++++++-------
 Data/ByteString/Utils/ByteOrder.hs            | 42 +++++++++++++++++++
 bytestring.cabal                              |  3 +-
 4 files changed, 64 insertions(+), 52 deletions(-)
 create mode 100644 Data/ByteString/Utils/ByteOrder.hs

diff --git a/Data/ByteString/Builder/Prim/Binary.hs b/Data/ByteString/Builder/Prim/Binary.hs
index 06fc77845..437f6ab90 100644
--- a/Data/ByteString/Builder/Prim/Binary.hs
+++ b/Data/ByteString/Builder/Prim/Binary.hs
@@ -1,12 +1,7 @@
-{-# LANGUAGE CPP #-}
 {-# LANGUAGE Trustworthy #-}
 
 {-# LANGUAGE TypeApplications #-}
 
-#include "MachDeps.h"
-#include "bytestring-cpp-macros.h"
-
-
 -- | Copyright   : (c) 2010-2011 Simon Meier
 -- License       : BSD3-style (see LICENSE)
 --
@@ -61,6 +56,7 @@ module Data.ByteString.Builder.Prim.Binary (
 
 import Data.ByteString.Builder.Prim.Internal
 import Data.ByteString.Builder.Prim.Internal.Floating
+import Data.ByteString.Utils.ByteOrder
 import Data.ByteString.Utils.UnalignedWrite
 
 import Foreign
@@ -86,38 +82,22 @@ word8 = fixedPrim 1 (flip poke) -- Word8 is always aligned
 -- | Encoding 'Word16's in big endian format.
 {-# INLINE word16BE #-}
 word16BE :: FixedPrim Word16
-#ifdef WORDS_BIGENDIAN
-word16BE = word16Host
-#else
-word16BE = byteSwap16 >$< word16Host
-#endif
+word16BE = whenLittleEndian byteSwap16 >$< word16Host
 
 -- | Encoding 'Word16's in little endian format.
 {-# INLINE word16LE #-}
 word16LE :: FixedPrim Word16
-#ifdef WORDS_BIGENDIAN
-word16LE = byteSwap16 >$< word16Host
-#else
-word16LE = word16Host
-#endif
+word16LE = whenBigEndian byteSwap16 >$< word16Host
 
 -- | Encoding 'Word32's in big endian format.
 {-# INLINE word32BE #-}
 word32BE :: FixedPrim Word32
-#ifdef WORDS_BIGENDIAN
-word32BE = word32Host
-#else
-word32BE = byteSwap32 >$< word32Host
-#endif
+word32BE = whenLittleEndian byteSwap32 >$< word32Host
 
 -- | Encoding 'Word32's in little endian format.
 {-# INLINE word32LE #-}
 word32LE :: FixedPrim Word32
-#ifdef WORDS_BIGENDIAN
-word32LE = byteSwap32 >$< word32Host
-#else
-word32LE = word32Host
-#endif
+word32LE = whenBigEndian byteSwap32 >$< word32Host
 
 -- on a little endian machine:
 -- word32LE w32 = fixedPrim 4 (\w p -> poke (castPtr p) w32)
@@ -125,20 +105,12 @@ word32LE = word32Host
 -- | Encoding 'Word64's in big endian format.
 {-# INLINE word64BE #-}
 word64BE :: FixedPrim Word64
-#ifdef WORDS_BIGENDIAN
-word64BE = word64Host
-#else
-word64BE = byteSwap64 >$< word64Host
-#endif
+word64BE = whenLittleEndian byteSwap64 >$< word64Host
 
 -- | Encoding 'Word64's in little endian format.
 {-# INLINE word64LE #-}
 word64LE :: FixedPrim Word64
-#ifdef WORDS_BIGENDIAN
-word64LE = byteSwap64 >$< word64Host
-#else
-word64LE = word64Host
-#endif
+word64LE = whenBigEndian byteSwap64 >$< word64Host
 
 
 -- | Encode a single native machine 'Word'. The 'Word's is encoded in host order,
diff --git a/Data/ByteString/Builder/RealFloat/Internal.hs b/Data/ByteString/Builder/RealFloat/Internal.hs
index 1b6b7f1d8..1d3ec08d4 100644
--- a/Data/ByteString/Builder/RealFloat/Internal.hs
+++ b/Data/ByteString/Builder/RealFloat/Internal.hs
@@ -72,6 +72,7 @@ import Data.Bits (Bits(..), FiniteBits(..))
 import Data.ByteString.Internal (c2w)
 import Data.ByteString.Builder.Prim.Internal (BoundedPrim, boundedPrim)
 import Data.ByteString.Builder.RealFloat.TableGenerator
+import Data.ByteString.Utils.ByteOrder
 import Data.ByteString.Utils.UnalignedWrite
 #if PURE_HASKELL
 import qualified Data.ByteString.Internal.Pure as Pure
@@ -408,25 +409,23 @@ wrapped f (I# w) = I# (f w)
 #if WORD_SIZE_IN_BITS == 32
 -- | Packs 2 32-bit system words (hi, lo) into a Word64
 packWord64 :: Word# -> Word# -> Word64#
-packWord64 hi lo =
-#if defined(WORDS_BIGENDIAN)
+packWord64 hi lo = case hostByteOrder of
+  BigEndian ->
     ((wordToWord64# lo) `uncheckedShiftL64#` 32#) `or64#` (wordToWord64# hi)
-#else
+  LittleEndian ->
     ((wordToWord64# hi) `uncheckedShiftL64#` 32#) `or64#` (wordToWord64# lo)
-#endif
 
 -- | Unpacks a Word64 into 2 32-bit words (hi, lo)
 unpackWord64 :: Word64# -> (# Word#, Word# #)
-unpackWord64 w =
-#if defined(WORDS_BIGENDIAN)
+unpackWord64 w = case hostByteOrder of
+  BigEndian ->
     (# word64ToWord# w
      , word64ToWord# (w `uncheckedShiftRL64#` 32#)
      #)
-#else
+  LittleEndian ->
     (# word64ToWord# (w `uncheckedShiftRL64#` 32#)
      , word64ToWord# w
      #)
-#endif
 
 -- | Adds 2 Word64's with 32-bit addition and manual carrying
 plusWord64 :: Word64# -> Word64# -> Word64#
@@ -731,21 +730,19 @@ getWord128At (Ptr arr) (I# i) = let
 
 -- | Packs 2 bytes [lsb, msb] into 16-bit word
 packWord16 :: Word# -> Word# -> Word#
-packWord16 l h =
-#if defined(WORDS_BIGENDIAN)
+packWord16 l h = case hostByteOrder of
+  BigEndian ->
     (h `uncheckedShiftL#` 8#) `or#` l
-#else
+  LittleEndian ->
     (l `uncheckedShiftL#` 8#) `or#` h
-#endif
 
 -- | Unpacks a 16-bit word into 2 bytes [lsb, msb]
 unpackWord16 :: Word# -> (# Word#, Word# #)
-unpackWord16 w =
-#if defined(WORDS_BIGENDIAN)
+unpackWord16 w = case hostByteOrder of
+  BigEndian ->
     (# w `and#` 0xff##, w `uncheckedShiftRL#` 8# #)
-#else
+  LittleEndian ->
     (# w `uncheckedShiftRL#` 8#, w `and#` 0xff## #)
-#endif
 
 
 -- | Static array of 2-digit pairs 00..99 for faster ascii rendering
diff --git a/Data/ByteString/Utils/ByteOrder.hs b/Data/ByteString/Utils/ByteOrder.hs
new file mode 100644
index 000000000..9248b7cad
--- /dev/null
+++ b/Data/ByteString/Utils/ByteOrder.hs
@@ -0,0 +1,42 @@
+{-# LANGUAGE CPP #-}
+
+-- | Why does this module exist? There is "GHC.ByteOrder" in base.
+-- But that module is /broken/ until base-4.14/ghc-8.10, so we
+-- can't rely on it until we drop support for older ghcs.
+-- See https://gitlab.haskell.org/ghc/ghc/-/issues/20338
+-- and https://gitlab.haskell.org/ghc/ghc/-/issues/18445
+
+#include "MachDeps.h"
+
+module Data.ByteString.Utils.ByteOrder
+  ( ByteOrder(..)
+  , hostByteOrder
+  , whenLittleEndian
+  , whenBigEndian
+  ) where
+
+data ByteOrder
+  = LittleEndian
+  | BigEndian
+
+hostByteOrder :: ByteOrder
+hostByteOrder =
+#ifdef WORDS_BIGENDIAN
+  BigEndian
+#else
+  LittleEndian
+#endif
+
+-- | If the host is little-endian, applies the given function to the given arg.
+-- If the host is big-endian, returns the second argument unchanged.
+whenLittleEndian :: (a -> a) -> a -> a
+whenLittleEndian fun val = case hostByteOrder of
+  LittleEndian -> fun val
+  BigEndian    -> val
+
+-- | If the host is little-endian, returns the second argument unchanged.
+-- If the host is big-endian, applies the given function to the given arg.
+whenBigEndian :: (a -> a) -> a -> a
+whenBigEndian fun val = case hostByteOrder of
+  LittleEndian -> val
+  BigEndian    -> fun val
diff --git a/bytestring.cabal b/bytestring.cabal
index a680197ca..1de5c78b4 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -120,6 +120,7 @@ library
                      Data.ByteString.Lazy.ReadNat
                      Data.ByteString.ReadInt
                      Data.ByteString.ReadNat
+                     Data.ByteString.Utils.ByteOrder
                      Data.ByteString.Utils.UnalignedWrite
 
   default-language:  Haskell2010
@@ -140,7 +141,7 @@ library
                     -fmax-simplifier-iterations=10
                     -fdicts-cheap
                     -fspec-constr-count=6
-  
+
   if arch(javascript) || flag(pure-haskell)
     cpp-options: -DPURE_HASKELL=1
     other-modules: Data.ByteString.Internal.Pure

From 5f9c1441cc298cffbc05050c6e356c221bf4f6b8 Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Wed, 14 Feb 2024 17:34:59 -0500
Subject: [PATCH 2/3] Remove remaining uses of FFI under -fpure-haskell

All of these were standard C functions that GHC's JS backend
actually somewhat supports; their shims can be found in the
compiler source at "rts/js/mem.js".  But it seems simpler to
just get rid of all FFI uses with -fpure-haskell rather than
try to keep track of which functions GHC supports.

The pure Haskell implementation of memcmp runs about 6-7x as fast
as the simple one-byte-at-a-time implementation for long equal
buffers, which makes it...  about the same speed as the
pre-existing shim, even though the latter is also a one-byte-
at-a-time implementation!

Apparently GHC's JS backend is not yet able to produce efficient
code for tight loops like these yet; the biggest problem is that
it does not perform any loopification so each iteration must go
through a generic-call indirection.

Unfortunately that means that this patch probably makes 'strlen'
and 'memchr' much slower with the JS backend.
---
 .gitignore                                    |  1 +
 Data/ByteString/Builder/ASCII.hs              |  3 -
 Data/ByteString/Builder/Prim/ASCII.hs         |  5 +-
 Data/ByteString/Builder/Prim/Binary.hs        |  2 +-
 Data/ByteString/Builder/RealFloat/Internal.hs |  2 +-
 Data/ByteString/Internal/Pure.hs              | 66 +++++++++++++++++--
 Data/ByteString/Internal/Type.hs              | 61 +++++++++++------
 Data/ByteString/Short/Internal.hs             |  1 -
 .../{UnalignedWrite.hs => UnalignedAccess.hs} | 21 +++++-
 bytestring.cabal                              |  2 +-
 cbits/fpstring.c                              |  6 ++
 11 files changed, 134 insertions(+), 36 deletions(-)
 rename Data/ByteString/Utils/{UnalignedWrite.hs => UnalignedAccess.hs} (76%)

diff --git a/.gitignore b/.gitignore
index 56259fbd1..f5f4fd37e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ cabal.sandbox.config
 dist-newstyle/
 cabal.project.local*
 .nvimrc
+.ghc.environment*
diff --git a/Data/ByteString/Builder/ASCII.hs b/Data/ByteString/Builder/ASCII.hs
index c00912996..437339e09 100644
--- a/Data/ByteString/Builder/ASCII.hs
+++ b/Data/ByteString/Builder/ASCII.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE ForeignFunctionInterface #-}
-{-# LANGUAGE Trustworthy #-}
-
 {-# OPTIONS_HADDOCK not-home #-}
 
 -- | Copyright : (c) 2010 - 2011 Simon Meier
diff --git a/Data/ByteString/Builder/Prim/ASCII.hs b/Data/ByteString/Builder/Prim/ASCII.hs
index 359c67220..8f764200f 100644
--- a/Data/ByteString/Builder/Prim/ASCII.hs
+++ b/Data/ByteString/Builder/Prim/ASCII.hs
@@ -1,5 +1,4 @@
-{-# LANGUAGE ScopedTypeVariables, ForeignFunctionInterface #-}
-{-# LANGUAGE Trustworthy #-}
+{-# LANGUAGE ScopedTypeVariables #-}
 -- | Copyright   : (c) 2010 Jasper Van der Jeugt
 --                 (c) 2010 - 2011 Simon Meier
 -- License       : BSD3-style (see LICENSE)
@@ -82,7 +81,7 @@ import Data.ByteString.Builder.Prim.Binary
 import Data.ByteString.Builder.Prim.Internal
 import Data.ByteString.Builder.Prim.Internal.Floating
 import Data.ByteString.Builder.Prim.Internal.Base16
-import Data.ByteString.Utils.UnalignedWrite
+import Data.ByteString.Utils.UnalignedAccess
 
 import Data.Char (ord)
 
diff --git a/Data/ByteString/Builder/Prim/Binary.hs b/Data/ByteString/Builder/Prim/Binary.hs
index 437f6ab90..a0ed9996a 100644
--- a/Data/ByteString/Builder/Prim/Binary.hs
+++ b/Data/ByteString/Builder/Prim/Binary.hs
@@ -57,7 +57,7 @@ module Data.ByteString.Builder.Prim.Binary (
 import Data.ByteString.Builder.Prim.Internal
 import Data.ByteString.Builder.Prim.Internal.Floating
 import Data.ByteString.Utils.ByteOrder
-import Data.ByteString.Utils.UnalignedWrite
+import Data.ByteString.Utils.UnalignedAccess
 
 import Foreign
 
diff --git a/Data/ByteString/Builder/RealFloat/Internal.hs b/Data/ByteString/Builder/RealFloat/Internal.hs
index 1d3ec08d4..636830f7b 100644
--- a/Data/ByteString/Builder/RealFloat/Internal.hs
+++ b/Data/ByteString/Builder/RealFloat/Internal.hs
@@ -73,7 +73,7 @@ import Data.ByteString.Internal (c2w)
 import Data.ByteString.Builder.Prim.Internal (BoundedPrim, boundedPrim)
 import Data.ByteString.Builder.RealFloat.TableGenerator
 import Data.ByteString.Utils.ByteOrder
-import Data.ByteString.Utils.UnalignedWrite
+import Data.ByteString.Utils.UnalignedAccess
 #if PURE_HASKELL
 import qualified Data.ByteString.Internal.Pure as Pure
 #else
diff --git a/Data/ByteString/Internal/Pure.hs b/Data/ByteString/Internal/Pure.hs
index b36927036..699bc358a 100644
--- a/Data/ByteString/Internal/Pure.hs
+++ b/Data/ByteString/Internal/Pure.hs
@@ -7,8 +7,12 @@
 
 -- | Haskell implementation of C bits
 module Data.ByteString.Internal.Pure
-  ( -- * fpstring.c
-    intersperse
+  ( -- * standard string.h functions
+    strlen
+  , memchr
+  , memcmp
+    -- * fpstring.c
+  , intersperse
   , countOcc
   , countOccBA
   , reverseBytes
@@ -38,13 +42,65 @@ import GHC.Int                  (Int8(..))
 
 import Data.Bits                (Bits(..), shiftR, (.&.))
 import Data.Word
-import Foreign.Ptr              (plusPtr)
+import Foreign.Ptr              (plusPtr, nullPtr)
 import Foreign.Storable         (Storable(..))
 import Control.Monad            (when)
 import Control.Exception        (assert)
 
+import Data.ByteString.Utils.ByteOrder
+import Data.ByteString.Utils.UnalignedAccess
+
+----------------------------------------------------------------
+-- Haskell versions of standard functions in string.h
+----------------------------------------------------------------
+
+strlen :: Ptr Word8 -> IO Int
+strlen = go 0 where
+  go :: Int -> Ptr Word8 -> IO Int
+  go !acc !p = do
+    c <- peek p
+    if | c == 0 -> pure acc
+       | nextAcc <- acc + 1
+       , nextAcc >= 0 -> go nextAcc (p `plusPtr` 1)
+       | otherwise -> errorWithoutStackTrace
+           "bytestring: strlen: String length does not fit in a Haskell Int"
+
+memchr :: Ptr Word8 -> Word8 -> Int -> IO (Ptr Word8)
+memchr !p !target !len
+  | len == 0 = pure nullPtr
+  | otherwise = assert (len > 0) $ do
+      c <- peek p
+      if c == target
+        then pure p
+        else memchr (p `plusPtr` 1) target (len - 1)
+
+memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
+memcmp !p1 !p2 !len
+  | len >= 8 = do
+      w1 <- unalignedReadU64 p1
+      w2 <- unalignedReadU64 p2
+      let toBigEndian = whenLittleEndian byteSwap64
+      if | w1 == w2
+           -> memcmp (p1 `plusPtr` 8) (p2 `plusPtr` 8) (len - 8)
+         | toBigEndian w1 < toBigEndian w2
+           -> pure (0-1)
+         | otherwise -> pure 1
+  | otherwise = memcmp1 p1 p2 len
+
+-- | Like 'memcmp', but definitely scans one byte at a time
+memcmp1 :: Ptr Word8 -> Ptr Word8 -> Int -> IO Int
+memcmp1 !p1 !p2 !len
+  | len == 0 = pure 0
+  | otherwise = assert (len > 0) $ do
+      c1 <- peek p1
+      c2 <- peek p2
+      if | c1 == c2 -> memcmp1 (p1 `plusPtr` 1) (p2 `plusPtr` 1) (len - 1)
+         | c1 < c2   -> pure (0-1)
+         | otherwise -> pure 1
+
+
 ----------------------------------------------------------------
--- Haskell version of functions in fpstring.c
+-- Haskell versions of functions in fpstring.c
 ----------------------------------------------------------------
 
 -- | duplicate a string, interspersing the character through the elements of the
@@ -232,7 +288,7 @@ isValidUtf8' idx !len = go 0
 
 
 ----------------------------------------------------------------
--- Haskell version of functions in itoa.c
+-- Haskell versions of functions in itoa.c
 ----------------------------------------------------------------
 
 
diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
index 52dede330..9ba018a6e 100644
--- a/Data/ByteString/Internal/Type.hs
+++ b/Data/ByteString/Internal/Type.hs
@@ -1,11 +1,16 @@
-{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
-{-# LANGUAGE UnliftedFFITypes, MagicHash,
-            UnboxedTuples #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE Unsafe #-}
+
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
 {-# LANGUAGE TemplateHaskellQuotes #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+{-# LANGUAGE ViewPatterns #-}
+
 {-# OPTIONS_HADDOCK not-home #-}
 
 -- |
@@ -129,11 +134,12 @@ import Prelude hiding (concat, null)
 import qualified Data.List as List
 
 import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr)
-import Foreign.Ptr              (Ptr, FunPtr, plusPtr)
+import Foreign.Ptr              (Ptr, FunPtr, plusPtr, castPtr)
 import Foreign.Storable         (Storable(..))
 import Foreign.C.Types
 import Foreign.C.String         (CString)
 import Foreign.Marshal.Utils
+import Foreign.Marshal.Alloc    (finalizerFree)
 
 #if PURE_HASKELL
 import qualified Data.ByteString.Internal.Pure as Pure
@@ -1104,24 +1110,42 @@ accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
 -- Standard C functions
 --
 
+memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
+memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
+{-# DEPRECATED memset "Use Foreign.Marshal.Utils.fillBytes instead" #-}
+-- | deprecated since @bytestring-0.11.5.0@
+memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
+
+#if !PURE_HASKELL
+
 foreign import ccall unsafe "string.h strlen" c_strlen
     :: CString -> IO CSize
 
-foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
-    :: FunPtr (Ptr Word8 -> IO ())
-
 foreign import ccall unsafe "string.h memchr" c_memchr
     :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
-
-memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
 memchr p w sz = c_memchr p (fromIntegral w) sz
 
 foreign import ccall unsafe "string.h memcmp" c_memcmp
     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
-
-memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
 memcmp p q s = c_memcmp p q (fromIntegral s)
 
+foreign import ccall unsafe "string.h memset" c_memset
+    :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
+memset p w sz = c_memset p (fromIntegral w) sz
+
+#else
+
+c_strlen :: CString -> IO CSize
+c_strlen p = checkedCast <$!> Pure.strlen (castPtr p)
+
+memchr p w len = Pure.memchr p w (checkedCast len)
+
+memcmp p q s = checkedCast <$!> Pure.memcmp p q s
+
+memset p w len = p <$ fillBytes p w (checkedCast len)
+
+#endif
+
 {-# DEPRECATED memcpy "Use Foreign.Marshal.Utils.copyBytes instead" #-}
 -- | deprecated since @bytestring-0.11.5.0@
 memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
@@ -1131,13 +1155,10 @@ memcpyFp :: ForeignPtr Word8 -> ForeignPtr Word8 -> Int -> IO ()
 memcpyFp fp fq s = unsafeWithForeignPtr fp $ \p ->
                      unsafeWithForeignPtr fq $ \q -> copyBytes p q s
 
-foreign import ccall unsafe "string.h memset" c_memset
-    :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
+c_free_finalizer :: FunPtr (Ptr Word8 -> IO ())
+c_free_finalizer = finalizerFree
+
 
-{-# DEPRECATED memset "Use Foreign.Marshal.Utils.fillBytes instead" #-}
--- | deprecated since @bytestring-0.11.5.0@
-memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
-memset p w sz = c_memset p (fromIntegral w) sz
 
 -- ---------------------------------------------------------------------
 --
diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs
index 3475eca75..af18a0587 100644
--- a/Data/ByteString/Short/Internal.hs
+++ b/Data/ByteString/Short/Internal.hs
@@ -3,7 +3,6 @@
 {-# LANGUAGE DeriveDataTypeable       #-}
 {-# LANGUAGE DeriveGeneric            #-}
 {-# LANGUAGE DeriveLift               #-}
-{-# LANGUAGE ForeignFunctionInterface #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE LambdaCase               #-}
 {-# LANGUAGE MagicHash                #-}
diff --git a/Data/ByteString/Utils/UnalignedWrite.hs b/Data/ByteString/Utils/UnalignedAccess.hs
similarity index 76%
rename from Data/ByteString/Utils/UnalignedWrite.hs
rename to Data/ByteString/Utils/UnalignedAccess.hs
index 60daffbe0..67ab5fdbb 100644
--- a/Data/ByteString/Utils/UnalignedWrite.hs
+++ b/Data/ByteString/Utils/UnalignedAccess.hs
@@ -1,3 +1,13 @@
+-- |
+-- Module      : Data.ByteString.Utils.UnalignedAccess
+-- Copyright   : (c) Matthew Craven 2023-2024
+-- License     : BSD-style
+-- Maintainer  : clyring@gmail.com
+-- Stability   : internal
+-- Portability : non-portable
+--
+-- Primitives for reading and writing at potentially-unaligned memory locations
+
 {-# LANGUAGE CPP #-}
 
 {-# LANGUAGE MagicHash #-}
@@ -5,12 +15,13 @@
 
 #include "bytestring-cpp-macros.h"
 
-module Data.ByteString.Utils.UnalignedWrite
+module Data.ByteString.Utils.UnalignedAccess
   ( unalignedWriteU16
   , unalignedWriteU32
   , unalignedWriteU64
   , unalignedWriteFloat
   , unalignedWriteDouble
+  , unalignedReadU64
   ) where
 
 import Foreign.Ptr
@@ -42,6 +53,10 @@ unalignedWriteDouble :: Double -> Ptr Word8 -> IO ()
 unalignedWriteDouble = coerce $ \(D# x#) (Ptr p#) s
   -> (# writeWord8OffAddrAsDouble# p# 0# x# s, () #)
 
+unalignedReadU64 :: Ptr Word8 -> IO Word64
+unalignedReadU64 = coerce $ \(Ptr p#) s
+  -> case readWord8OffAddrAsWord64# p# 0# s of
+       (# s', w64# #) -> (# s', W64# w64# #)
 
 #elif HS_UNALIGNED_POKES_OK
 import Foreign.Storable
@@ -61,6 +76,8 @@ unalignedWriteFloat x p = poke (castPtr p) x
 unalignedWriteDouble :: Double -> Ptr Word8 -> IO ()
 unalignedWriteDouble x p = poke (castPtr p) x
 
+unalignedReadU64 :: Ptr Word8 -> IO Word64
+unalignedReadU64 p = peek (castPtr p)
 
 #else
 foreign import ccall unsafe "static fpstring.h fps_unaligned_write_u16"
@@ -73,5 +90,7 @@ foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsFloat"
   unalignedWriteFloat :: Float -> Ptr Word8 -> IO ()
 foreign import ccall unsafe "static fpstring.h fps_unaligned_write_HsDouble"
   unalignedWriteDouble :: Double -> Ptr Word8 -> IO ()
+foreign import ccall unsafe "static fpstring.h fps_unaligned_read_u64"
+  unalignedReadU64 :: Ptr Word8 -> IO Word64
 #endif
 
diff --git a/bytestring.cabal b/bytestring.cabal
index 1de5c78b4..d4bdb04b5 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -121,7 +121,7 @@ library
                      Data.ByteString.ReadInt
                      Data.ByteString.ReadNat
                      Data.ByteString.Utils.ByteOrder
-                     Data.ByteString.Utils.UnalignedWrite
+                     Data.ByteString.Utils.UnalignedAccess
 
   default-language:  Haskell2010
   other-extensions:  CPP,
diff --git a/cbits/fpstring.c b/cbits/fpstring.c
index 1c363e992..febec8011 100644
--- a/cbits/fpstring.c
+++ b/cbits/fpstring.c
@@ -134,6 +134,12 @@ void fps_unaligned_write_HsDouble(HsDouble x, uint8_t *p) {
   memcpy(p, &x, SIZEOF_HSDOUBLE);
 }
 
+uint64_t fps_unaligned_read_u64(uint8_t *p) {
+  uint64_t ans;
+  memcpy(&ans, p, 8);
+  return ans;
+}
+
 /* count the number of occurrences of a char in a string */
 size_t fps_count_naive(unsigned char *str, size_t len, unsigned char w) {
     size_t c;

From 747324f63b7c9ecb910d0e7899181604ea3ad3a6 Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Wed, 14 Feb 2024 21:54:03 -0500
Subject: [PATCH 3/3] Eliminate unused-import warning

---
 Data/ByteString/Internal/Type.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
index 9ba018a6e..f10236e4d 100644
--- a/Data/ByteString/Internal/Type.hs
+++ b/Data/ByteString/Internal/Type.hs
@@ -134,7 +134,7 @@ import Prelude hiding (concat, null)
 import qualified Data.List as List
 
 import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr)
-import Foreign.Ptr              (Ptr, FunPtr, plusPtr, castPtr)
+import Foreign.Ptr
 import Foreign.Storable         (Storable(..))
 import Foreign.C.Types
 import Foreign.C.String         (CString)