Skip to content

Commit f3ad3d7

Browse files
committed
Add encodingToStrictByteString
This runs Builder to produce Strict ByteString directly, by making a mutable buffer and growing it exponentially. This might be good or bad, better or worse than LBS.toStrict . encodingToLazyByteString. Latter allocates many small chunks, and copies once; encodingToStrictByteString makes a buffer exponentially, but copies data everytime.
1 parent 105fe14 commit f3ad3d7

File tree

6 files changed

+110
-4
lines changed

6 files changed

+110
-4
lines changed

aeson.cabal

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ library
8181
Data.Aeson.Encoding.Builder
8282
Data.Aeson.Internal.ByteString
8383
Data.Aeson.Internal.Functions
84+
Data.Aeson.Internal.StrictBuilder
8485
Data.Aeson.Internal.Text
8586
Data.Aeson.Internal.TH
8687
Data.Aeson.Parser.Time
@@ -99,10 +100,10 @@ library
99100
, bytestring >=0.10.8.1 && <0.12
100101
, containers >=0.5.7.1 && <0.7
101102
, deepseq >=1.4.2.0 && <1.5
103+
, exceptions >=0.10.4 && <0.11
102104
, ghc-prim >=0.5.0.0 && <0.10
103105
, template-haskell >=2.11.0.0 && <2.20
104-
, text >=1.2.3.0 && <1.3 || >=2.0 && <2.1
105-
, exceptions >=0.10.4 && <0.11
106+
, text >=1.2.3.0 && <1.3 || >=2.0 && <2.1
106107
, time >=1.6.0.1 && <1.13
107108

108109
-- Compat

benchmarks/aeson-benchmarks.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ library
7878
Data.Aeson.Internal
7979
Data.Aeson.Internal.ByteString
8080
Data.Aeson.Internal.Functions
81+
Data.Aeson.Internal.StrictBuilder
8182
Data.Aeson.Internal.Text
8283
Data.Aeson.Internal.TH
8384
Data.Aeson.Internal.Time

src/Data/Aeson/Encoding.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ module Data.Aeson.Encoding
1111
Encoding
1212
, Encoding'
1313
, encodingToLazyByteString
14+
, encodingToStrictByteString
1415
, fromEncoding
1516
, unsafeToEncoding
1617
, Series

src/Data/Aeson/Encoding/Internal.hs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ module Data.Aeson.Encoding.Internal
1010
Encoding' (..)
1111
, Encoding
1212
, encodingToLazyByteString
13+
, encodingToStrictByteString
1314
, unsafeToEncoding
1415
, retagEncoding
1516
, Series (..)
@@ -65,6 +66,7 @@ module Data.Aeson.Encoding.Internal
6566
import Prelude.Compat
6667

6768
import Data.Aeson.Types.Internal (Value, Key)
69+
import Data.Aeson.Internal.StrictBuilder (toStrictByteString)
6870
import Data.ByteString.Builder (Builder, char7, toLazyByteString)
6971
import Data.ByteString.Short (ShortByteString)
7072
import qualified Data.Aeson.Key as Key
@@ -77,6 +79,7 @@ import Data.Time.Calendar.Quarter.Compat (Quarter)
7779
import Data.Typeable (Typeable)
7880
import Data.Word (Word8, Word16, Word32, Word64)
7981
import qualified Data.Aeson.Encoding.Builder as EB
82+
import qualified Data.ByteString as BS
8083
import qualified Data.ByteString.Builder as B
8184
import qualified Data.ByteString.Lazy as BSL
8285
import qualified Data.Text.Lazy as LT
@@ -101,10 +104,21 @@ type Encoding = Encoding' Value
101104
unsafeToEncoding :: Builder -> Encoding' a
102105
unsafeToEncoding = Encoding
103106

107+
-- | Convert 'Encoding' to /lazy/ 'BSL.ByteString'.
104108
encodingToLazyByteString :: Encoding' a -> BSL.ByteString
105109
encodingToLazyByteString = toLazyByteString . fromEncoding
106110
{-# INLINE encodingToLazyByteString #-}
107111

112+
-- | Convert 'Encoding' to /strict/ 'BS.ByteString'.
113+
--
114+
-- This might or might not be more efficient than @'BSL.toStrict' . 'encodingToLazyByteString'@
115+
--
116+
-- @since 2.1.2.0
117+
--
118+
encodingToStrictByteString :: Encoding' a -> BS.ByteString
119+
encodingToStrictByteString = toStrictByteString . fromEncoding
120+
{-# INLINE encodingToStrictByteString #-}
121+
108122
retagEncoding :: Encoding' a -> Encoding' b
109123
retagEncoding = Encoding . fromEncoding
110124

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
{-# LANGUAGE BangPatterns #-}
2+
{-# LANGUAGE CPP #-}
3+
{-# LANGUAGE MagicHash #-}
4+
{-# LANGUAGE UnboxedTuples #-}
5+
module Data.Aeson.Internal.StrictBuilder (
6+
toStrictByteString,
7+
toStrictByteStringIO,
8+
) where
9+
10+
import Data.ByteString.Builder.Internal (BufferRange (BufferRange), BuildStep, Builder, fillWithBuildStep, runBuilder)
11+
import Data.ByteString.Internal (ByteString (..))
12+
import Data.Word (Word8)
13+
import GHC.Exts (Addr#, Ptr (..), minusAddr#, plusAddr#)
14+
import GHC.Exts (Int (I#), Int#, orI#, (+#))
15+
import GHC.Exts (MutableByteArray#, RealWorld, newPinnedByteArray#, resizeMutableByteArray#, shrinkMutableByteArray#)
16+
import GHC.ForeignPtr (ForeignPtr (ForeignPtr), ForeignPtrContents (PlainPtr))
17+
import GHC.IO (IO (IO), unIO, unsafePerformIO)
18+
19+
#if MIN_VERSION_base(4,16,0)
20+
import GHC.Exts (mutableByteArrayContents#)
21+
#else
22+
import GHC.Exts (byteArrayContents#, unsafeCoerce#)
23+
24+
mutableByteArrayContents# :: MutableByteArray# s -> Addr#
25+
mutableByteArrayContents# mba = byteArrayContents# (unsafeCoerce# mba)
26+
#endif
27+
28+
toStrictByteString :: Builder -> ByteString
29+
toStrictByteString b = unsafePerformIO (toStrictByteStringIO b)
30+
{-# NOINLINE toStrictByteString #-}
31+
32+
toStrictByteStringIO :: Builder -> IO ByteString
33+
toStrictByteStringIO b = IO $ \s ->
34+
case newPinnedByteArray# 4096# s of
35+
(# s', mba #) -> case mutableByteArrayContents# mba of
36+
start -> unIO (toStrictByteStringWorker mba 4096# start start (plusAddr# start 4096#) (runBuilder b)) s'
37+
38+
-- Progressively double the buffer size if it's reported to be full.
39+
-- (convertion to lazy bytestring allocates new buffer chunks).
40+
toStrictByteStringWorker
41+
:: MutableByteArray# RealWorld -- ^ the buffer bytearray
42+
-> Int# -- ^ size of the bytearray
43+
-> Addr# -- ^ beginning of the bytearray
44+
-> Addr# -- ^ current write position
45+
-> Addr# -- ^ end of the bytearray
46+
-> BuildStep ()
47+
-> IO ByteString
48+
toStrictByteStringWorker mba size start begin end !curr =
49+
fillWithBuildStep curr kDone kFull kChunk (BufferRange (Ptr begin) (Ptr end))
50+
where
51+
kDone :: Ptr Word8 -> () -> IO ByteString
52+
kDone (Ptr pos) _ = IO $ \s1 ->
53+
case minusAddr# pos start of { len ->
54+
case shrinkMutableByteArray# mba len s1 of { s2 ->
55+
#if MIN_VERSION_bytestring(0,11,0)
56+
(# s2 , BS (ForeignPtr start (PlainPtr mba)) (I# len) #)
57+
#else
58+
(# s2 , PS (ForeignPtr start (PlainPtr mba)) 0 (I# len) #)
59+
#endif
60+
}}
61+
62+
kFull :: Ptr Word8 -> Int -> BuildStep () -> IO ByteString
63+
kFull (Ptr pos) (I# nsize) next = IO $ \s1 ->
64+
-- orI# is an approximation of max
65+
case size +# orI# size nsize of { size' ->
66+
case resizeMutableByteArray# mba size' s1 of { (# s2, mba' #) ->
67+
case mutableByteArrayContents# mba' of { start' ->
68+
unIO (toStrictByteStringWorker mba' size' start' (plusAddr# start' (minusAddr# pos start)) (plusAddr# start' size') next) s2
69+
}}}
70+
71+
kChunk :: Ptr Word8 -> ByteString -> BuildStep () -> IO ByteString
72+
#if MIN_VERSION_bytestring(0,11,0)
73+
kChunk (Ptr pos) (BS _ 0) next = toStrictByteStringWorker mba size start pos end next
74+
#else
75+
kChunk (Ptr pos) (PS _ _ 0) next = toStrictByteStringWorker mba size start pos end next
76+
#endif
77+
kChunk _ _ _ = fail "TODO: non-empty chunk"

tests/PropUtils.hs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ module PropUtils (module PropUtils) where
77
import Prelude.Compat
88

99
import Data.Aeson (eitherDecode, encode)
10-
import Data.Aeson.Encoding (encodingToLazyByteString)
10+
import Data.Aeson.Encoding (encodingToLazyByteString, encodingToStrictByteString)
1111
import Data.Aeson.Internal (IResult(..), formatError, ifromJSON, iparse)
1212
import qualified Data.Aeson.Internal as I
1313
import Data.Aeson.Parser (value)
@@ -25,6 +25,7 @@ import Test.QuickCheck (Arbitrary(..), Property, Testable, (===), (.&&.), counte
2525
import Types
2626
import Text.Read (readMaybe)
2727
import qualified Data.Attoparsec.Lazy as L
28+
import qualified Data.Attoparsec.ByteString as S
2829
import qualified Data.ByteString.Lazy.Char8 as L
2930
import qualified Data.Map as Map
3031
import qualified Data.Text as T
@@ -66,6 +67,14 @@ roundTripEnc eq _ i =
6667
L.Done _ (IError path err) -> failure "fromJSON" (formatError path err) i
6768
L.Fail _ _ err -> failure "parse" err i
6869

70+
roundTripStrictEnc :: (FromJSON a, ToJSON a, Show a) =>
71+
(a -> a -> Property) -> a -> a -> Property
72+
roundTripStrictEnc eq _ i =
73+
case fmap ifromJSON . S.parseOnly value . encodingToStrictByteString . toEncoding $ i of
74+
Right (ISuccess v) -> v `eq` i
75+
Right (IError path err) -> failure "fromJSON" (formatError path err) i
76+
Left err -> failure "parse" err i
77+
6978
roundTripNoEnc :: (FromJSON a, ToJSON a, Show a) =>
7079
(a -> a -> Property) -> a -> a -> Property
7180
roundTripNoEnc eq _ i =
@@ -74,7 +83,10 @@ roundTripNoEnc eq _ i =
7483
(IError path err) -> failure "fromJSON" (formatError path err) i
7584

7685
roundTripEq :: (Eq a, FromJSON a, ToJSON a, Show a) => a -> a -> Property
77-
roundTripEq x y = roundTripEnc (===) x y .&&. roundTripNoEnc (===) x y
86+
roundTripEq x y =
87+
roundTripEnc (===) x y .&&.
88+
roundTripStrictEnc (===) x y .&&.
89+
roundTripNoEnc (===) x y
7890

7991
roundtripReadShow :: Value -> Property
8092
roundtripReadShow v = readMaybe (show v) === Just v

0 commit comments

Comments
 (0)