|
3 | 3 |
|
4 | 4 | {-# OPTIONS_HADDOCK not-home #-} |
5 | 5 |
|
| 6 | +{-# LANGUAGE ConstraintKinds #-} |
6 | 7 | {-# LANGUAGE TemplateHaskellQuotes #-} |
7 | 8 | {-# LANGUAGE TypeFamilies #-} |
8 | 9 | {-# LANGUAGE UnliftedFFITypes #-} |
@@ -42,6 +43,7 @@ module Data.ByteString.Internal.Type ( |
42 | 43 | unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, |
43 | 44 | unsafePackAddress, unsafePackLenAddress, |
44 | 45 | unsafePackLiteral, unsafePackLenLiteral, |
| 46 | + literalFromOctetString, literalFromHex, |
45 | 47 |
|
46 | 48 | -- * Low level imperative construction |
47 | 49 | empty, |
@@ -152,8 +154,9 @@ import Data.String (IsString(..)) |
152 | 154 |
|
153 | 155 | import Control.Exception (assert, throw, Exception) |
154 | 156 |
|
155 | | -import Data.Bits ((.&.)) |
| 157 | +import Data.Bits ((.|.), (.&.), complement, shiftL) |
156 | 158 | import Data.Char (ord) |
| 159 | +import Data.Foldable (foldr') |
157 | 160 | import Data.Word |
158 | 161 |
|
159 | 162 | import Data.Data (Data(..), mkConstr, mkDataType, Constr, DataType, Fixity(Prefix), constrIndex) |
@@ -197,6 +200,14 @@ import GHC.ForeignPtr (unsafeWithForeignPtr) |
197 | 200 |
|
198 | 201 | import qualified Language.Haskell.TH.Lib as TH |
199 | 202 | import qualified Language.Haskell.TH.Syntax as TH |
| 203 | +import Language.Haskell.TH.Syntax (Lift, TExp) |
| 204 | +#if __GLASGOW_HASKELL__ >= 900 |
| 205 | +import Language.Haskell.TH.Syntax (Code, Quote) |
| 206 | +#endif |
| 207 | + |
| 208 | +#if !MIN_VERSION_base(4,13,0) |
| 209 | +import Control.Monad.Fail (MonadFail) |
| 210 | +#endif |
200 | 211 |
|
201 | 212 | #if !HS_unsafeWithForeignPtr_AVAILABLE |
202 | 213 | unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b |
@@ -359,7 +370,7 @@ byteStringDataType :: DataType |
359 | 370 | byteStringDataType = mkDataType "Data.ByteString.ByteString" [packConstr] |
360 | 371 |
|
361 | 372 | -- | @since 0.11.2.0 |
362 | | -instance TH.Lift ByteString where |
| 373 | +instance Lift ByteString where |
363 | 374 | #if MIN_VERSION_template_haskell(2,16,0) |
364 | 375 | -- template-haskell-2.16 first ships with ghc-8.10 |
365 | 376 | lift (BS ptr len) = [| unsafePackLenLiteral |] |
@@ -530,6 +541,104 @@ packUptoLenChars len cs0 = |
530 | 541 | go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs |
531 | 542 | in go p0 cs0 |
532 | 543 |
|
| 544 | +#if __GLASGOW_HASKELL__ < 900 |
| 545 | +type Quote m = (TH.Q ~ m) |
| 546 | +type Code m a = m (TExp a) |
| 547 | +#endif |
| 548 | + |
| 549 | +liftTyped :: forall a m. (MonadFail m, Quote m, Lift a) => a -> Code m a |
| 550 | +#if MIN_VERSION_template_haskell(2,17,0) |
| 551 | +liftTyped = TH.liftTyped |
| 552 | + |
| 553 | +liftCode :: forall a m. (MonadFail m, Quote m) => m (TExp a) -> Code m a |
| 554 | +liftCode = TH.liftCode |
| 555 | +#else |
| 556 | +liftTyped = TH.unsafeTExpCoerce . TH.lift |
| 557 | + |
| 558 | +liftCode :: forall a m. (MonadFail m, Quote m) => m TH.Exp -> Code m a |
| 559 | +liftCode = TH.unsafeTExpCoerce |
| 560 | +#endif |
| 561 | + |
| 562 | +data S2W = Octets {-# UNPACK #-} !Int [Word8] |
| 563 | + -- ^ Decoded some octets (<= 0xFF) |
| 564 | + | Hichar {-# UNPACK #-} !Int {-# UNPACK #-} !Word |
| 565 | + -- ^ Found a high char (> 0xFF) |
| 566 | + |
| 567 | +data H2W = Hex {-# UNPACK #-} !Int [Word8] |
| 568 | + -- ^ Decoded some full bytes (nibble pairs) |
| 569 | + | Odd {-# UNPACK #-} !Int {-# UNPACK #-} !Word [Word8] |
| 570 | + -- ^ Decoded a nibble plus some full bytes |
| 571 | + | Bad {-# UNPACK #-} !Int {-# UNPACK #-} !Word |
| 572 | + -- ^ Found a non hex-digit character |
| 573 | + |
| 574 | +-- | Template Haskell splice to convert string constants to compile-time |
| 575 | +-- ByteString literals. Unlike the 'IsString' instance, the input string |
| 576 | +-- is validated to ensure that each character is a valid /octet/, i.e. is |
| 577 | +-- at most @0xFF@ (255). |
| 578 | +-- |
| 579 | +-- Example: |
| 580 | +-- |
| 581 | +-- > :set -XTemplateHaskell |
| 582 | +-- > ehloCmd :: ByteString |
| 583 | +-- > ehloCmd = $$(literalFromOctetString "EHLO") |
| 584 | +-- |
| 585 | +literalFromOctetString :: (MonadFail m, Quote m) => String -> Code m ByteString |
| 586 | +literalFromOctetString "" = [||empty||] |
| 587 | +literalFromOctetString s = case foldr' op (Octets 0 []) s of |
| 588 | + Octets n ws -> liftTyped (unsafePackLenBytes n ws) |
| 589 | + Hichar i w -> liftCode $ fail $ "non-octet character '\\" ++ |
| 590 | + show w ++ "' at offset: " ++ show i |
| 591 | + where |
| 592 | + op :: Char -> S2W -> S2W |
| 593 | + op (fromIntegral . fromEnum -> !(w :: Word)) acc |
| 594 | + | w <= 0xff = case acc of |
| 595 | + Octets i ws -> Octets (i + 1) (fromIntegral w : ws) |
| 596 | + Hichar i w' -> Hichar (i + 1) w' |
| 597 | + | otherwise = Hichar 0 w |
| 598 | + |
| 599 | +-- | Template Haskell splice to convert hex-encoded string constants to compile-time |
| 600 | +-- ByteString literals. The input string is validated to ensure that it consists of |
| 601 | +-- an even number of valid hexadecimal digits (case insensitive). |
| 602 | +-- |
| 603 | +-- Example: |
| 604 | +-- |
| 605 | +-- > :set -XTemplateHaskell |
| 606 | +-- > ehloCmd :: ByteString |
| 607 | +-- > ehloCmd = $$(literalFromHex "45484c4F") |
| 608 | +-- |
| 609 | +literalFromHex :: (MonadFail m, Quote m) => String -> Code m ByteString |
| 610 | +literalFromHex "" = [||empty||] |
| 611 | +literalFromHex s = |
| 612 | + case foldr' op (Hex 0 []) s of |
| 613 | + Hex n ws -> liftTyped (unsafePackLenBytes n ws) |
| 614 | + Odd i _ _ -> liftCode $ fail $ "Odd input length: " ++ show (1 + 2 * i) |
| 615 | + Bad i w -> liftCode $ fail $ "Non-hexadecimal character '\\" ++ |
| 616 | + show w ++ "' at offset: " ++ show i |
| 617 | + where |
| 618 | + -- Convert char to decimal digit value if result in [0, 9]. |
| 619 | + -- Otherwise, for hex digits, it remains to: |
| 620 | + -- - fold upper and lower case by masking 0x20, |
| 621 | + -- - subtract another 0x11 (0x41 total), |
| 622 | + -- - check that result in [0,5] |
| 623 | + -- - add 0xa |
| 624 | + -- |
| 625 | + c2d :: Char -> Word |
| 626 | + c2d c = fromIntegral (fromEnum c) - 0x30 |
| 627 | + |
| 628 | + op :: Char -> H2W -> H2W |
| 629 | + op (c2d -> !(d :: Word)) acc |
| 630 | + | d <= 9 = case acc of |
| 631 | + Hex i ws -> Odd i d ws |
| 632 | + Odd i lo ws -> Hex (i+1) $ fromIntegral ((d `shiftL` 4 .|. lo)) : ws |
| 633 | + Bad i w -> Bad (i + 1) w |
| 634 | + | l <- (d .&. complement 0x20) - 0x11 |
| 635 | + , l <= 5 |
| 636 | + , x <- l + 0xa = case acc of |
| 637 | + Hex i ws -> Odd i (l + 0xa) ws |
| 638 | + Odd i lo ws -> Hex (i+ 1) $ fromIntegral (x `shiftL` 4 .|. lo) : ws |
| 639 | + Bad i w -> Bad (i + 1) w |
| 640 | + | otherwise = Bad 0 (d + 0x30) |
| 641 | + |
533 | 642 | -- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand |
534 | 643 | -- we would like to write a tight loop that just blasts the list into memory, on |
535 | 644 | -- the other hand we want it to be unpacked lazily so we don't end up with a |
|
0 commit comments