Skip to content

Commit f05a1de

Browse files
vdukhovnihs-viktor
andauthored
Implemented TH splices for validated ByteString literals (#712)
literalFromOctetString :: Quote m => String -> Code m ByteString literalFromHex :: Quote m => String -> Code m ByteString The former rejects inputs with non-octet code points above 0xFF. The latter rejects odd-length inputs or inputs with characters other than non-hexadecimal digits. Co-authored-by: Viktor Dukhovni <[email protected]>
1 parent 72b1552 commit f05a1de

File tree

4 files changed

+124
-3
lines changed

4 files changed

+124
-3
lines changed

Changelog.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
* API additions and behavior changes:
99
* Data.ByteString.Short now provides `lazyToShort` and `lazyFromShort`.
10-
10+
* New TH splices: `Data.ByteString.literalFromOctetString` and `Data.ByteString.literalFromHex`
11+
* These validate input strings prior to generating corresponding
12+
compile-time literal ByteStrings.
1113
<!--
1214
* Bug fixes:
1315
* Deprecations:

Data/ByteString.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ module Data.ByteString (
6767
toStrict,
6868
fromFilePath,
6969
toFilePath,
70+
literalFromOctetString,
71+
literalFromHex,
7072

7173
-- * Basic interface
7274
cons,

Data/ByteString/Internal/Type.hs

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
{-# OPTIONS_HADDOCK not-home #-}
55

6+
{-# LANGUAGE ConstraintKinds #-}
67
{-# LANGUAGE TemplateHaskellQuotes #-}
78
{-# LANGUAGE TypeFamilies #-}
89
{-# LANGUAGE UnliftedFFITypes #-}
@@ -42,6 +43,7 @@ module Data.ByteString.Internal.Type (
4243
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
4344
unsafePackAddress, unsafePackLenAddress,
4445
unsafePackLiteral, unsafePackLenLiteral,
46+
literalFromOctetString, literalFromHex,
4547

4648
-- * Low level imperative construction
4749
empty,
@@ -152,8 +154,9 @@ import Data.String (IsString(..))
152154

153155
import Control.Exception (assert, throw, Exception)
154156

155-
import Data.Bits ((.&.))
157+
import Data.Bits ((.|.), (.&.), complement, shiftL)
156158
import Data.Char (ord)
159+
import Data.Foldable (foldr')
157160
import Data.Word
158161

159162
import Data.Data (Data(..), mkConstr, mkDataType, Constr, DataType, Fixity(Prefix), constrIndex)
@@ -197,6 +200,14 @@ import GHC.ForeignPtr (unsafeWithForeignPtr)
197200

198201
import qualified Language.Haskell.TH.Lib as TH
199202
import qualified Language.Haskell.TH.Syntax as TH
203+
import Language.Haskell.TH.Syntax (Lift, TExp)
204+
#if __GLASGOW_HASKELL__ >= 900
205+
import Language.Haskell.TH.Syntax (Code, Quote)
206+
#endif
207+
208+
#if !MIN_VERSION_base(4,13,0)
209+
import Control.Monad.Fail (MonadFail)
210+
#endif
200211

201212
#if !HS_unsafeWithForeignPtr_AVAILABLE
202213
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
@@ -359,7 +370,7 @@ byteStringDataType :: DataType
359370
byteStringDataType = mkDataType "Data.ByteString.ByteString" [packConstr]
360371

361372
-- | @since 0.11.2.0
362-
instance TH.Lift ByteString where
373+
instance Lift ByteString where
363374
#if MIN_VERSION_template_haskell(2,16,0)
364375
-- template-haskell-2.16 first ships with ghc-8.10
365376
lift (BS ptr len) = [| unsafePackLenLiteral |]
@@ -530,6 +541,104 @@ packUptoLenChars len cs0 =
530541
go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs
531542
in go p0 cs0
532543

544+
#if __GLASGOW_HASKELL__ < 900
545+
type Quote m = (TH.Q ~ m)
546+
type Code m a = m (TExp a)
547+
#endif
548+
549+
liftTyped :: forall a m. (MonadFail m, Quote m, Lift a) => a -> Code m a
550+
#if MIN_VERSION_template_haskell(2,17,0)
551+
liftTyped = TH.liftTyped
552+
553+
liftCode :: forall a m. (MonadFail m, Quote m) => m (TExp a) -> Code m a
554+
liftCode = TH.liftCode
555+
#else
556+
liftTyped = TH.unsafeTExpCoerce . TH.lift
557+
558+
liftCode :: forall a m. (MonadFail m, Quote m) => m TH.Exp -> Code m a
559+
liftCode = TH.unsafeTExpCoerce
560+
#endif
561+
562+
data S2W = Octets {-# UNPACK #-} !Int [Word8]
563+
-- ^ Decoded some octets (<= 0xFF)
564+
| Hichar {-# UNPACK #-} !Int {-# UNPACK #-} !Word
565+
-- ^ Found a high char (> 0xFF)
566+
567+
data H2W = Hex {-# UNPACK #-} !Int [Word8]
568+
-- ^ Decoded some full bytes (nibble pairs)
569+
| Odd {-# UNPACK #-} !Int {-# UNPACK #-} !Word [Word8]
570+
-- ^ Decoded a nibble plus some full bytes
571+
| Bad {-# UNPACK #-} !Int {-# UNPACK #-} !Word
572+
-- ^ Found a non hex-digit character
573+
574+
-- | Template Haskell splice to convert string constants to compile-time
575+
-- ByteString literals. Unlike the 'IsString' instance, the input string
576+
-- is validated to ensure that each character is a valid /octet/, i.e. is
577+
-- at most @0xFF@ (255).
578+
--
579+
-- Example:
580+
--
581+
-- > :set -XTemplateHaskell
582+
-- > ehloCmd :: ByteString
583+
-- > ehloCmd = $$(literalFromOctetString "EHLO")
584+
--
585+
literalFromOctetString :: (MonadFail m, Quote m) => String -> Code m ByteString
586+
literalFromOctetString "" = [||empty||]
587+
literalFromOctetString s = case foldr' op (Octets 0 []) s of
588+
Octets n ws -> liftTyped (unsafePackLenBytes n ws)
589+
Hichar i w -> liftCode $ fail $ "non-octet character '\\" ++
590+
show w ++ "' at offset: " ++ show i
591+
where
592+
op :: Char -> S2W -> S2W
593+
op (fromIntegral . fromEnum -> !(w :: Word)) acc
594+
| w <= 0xff = case acc of
595+
Octets i ws -> Octets (i + 1) (fromIntegral w : ws)
596+
Hichar i w' -> Hichar (i + 1) w'
597+
| otherwise = Hichar 0 w
598+
599+
-- | Template Haskell splice to convert hex-encoded string constants to compile-time
600+
-- ByteString literals. The input string is validated to ensure that it consists of
601+
-- an even number of valid hexadecimal digits (case insensitive).
602+
--
603+
-- Example:
604+
--
605+
-- > :set -XTemplateHaskell
606+
-- > ehloCmd :: ByteString
607+
-- > ehloCmd = $$(literalFromHex "45484c4F")
608+
--
609+
literalFromHex :: (MonadFail m, Quote m) => String -> Code m ByteString
610+
literalFromHex "" = [||empty||]
611+
literalFromHex s =
612+
case foldr' op (Hex 0 []) s of
613+
Hex n ws -> liftTyped (unsafePackLenBytes n ws)
614+
Odd i _ _ -> liftCode $ fail $ "Odd input length: " ++ show (1 + 2 * i)
615+
Bad i w -> liftCode $ fail $ "Non-hexadecimal character '\\" ++
616+
show w ++ "' at offset: " ++ show i
617+
where
618+
-- Convert char to decimal digit value if result in [0, 9].
619+
-- Otherwise, for hex digits, it remains to:
620+
-- - fold upper and lower case by masking 0x20,
621+
-- - subtract another 0x11 (0x41 total),
622+
-- - check that result in [0,5]
623+
-- - add 0xa
624+
--
625+
c2d :: Char -> Word
626+
c2d c = fromIntegral (fromEnum c) - 0x30
627+
628+
op :: Char -> H2W -> H2W
629+
op (c2d -> !(d :: Word)) acc
630+
| d <= 9 = case acc of
631+
Hex i ws -> Odd i d ws
632+
Odd i lo ws -> Hex (i+1) $ fromIntegral ((d `shiftL` 4 .|. lo)) : ws
633+
Bad i w -> Bad (i + 1) w
634+
| l <- (d .&. complement 0x20) - 0x11
635+
, l <= 5
636+
, x <- l + 0xa = case acc of
637+
Hex i ws -> Odd i (l + 0xa) ws
638+
Odd i lo ws -> Hex (i+ 1) $ fromIntegral (x `shiftL` 4 .|. lo) : ws
639+
Bad i w -> Bad (i + 1) w
640+
| otherwise = Bad 0 (d + 0x30)
641+
533642
-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
534643
-- we would like to write a tight loop that just blasts the list into memory, on
535644
-- the other hand we want it to be unpacked lazily so we don't end up with a

tests/Lift.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ testSuite = testGroup "Lift"
3131
let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in
3232
bs === $$(TH.liftTyped $ BS.pack [0,1,2,3,0,1,2,3])
3333
#endif
34+
35+
, testProperty "literalFromOctetString" $
36+
let bs = "EHLO" :: BS.ByteString in
37+
bs === $$(BS.literalFromOctetString "EHLO")
38+
39+
, testProperty "literalFromHex" $
40+
let bs = "EHLO" :: BS.ByteString in
41+
bs === $$(BS.literalFromHex "45484c4F")
3442
]
3543

3644
, testGroup "lazy"

0 commit comments

Comments
 (0)