{-# LANGUAGE
DoAndIfThenElse
, OverloadedStrings
+ , RecordWildCards
+ , ScopedTypeVariables
, UnicodeSyntax
#-}
-- |Provide facilities to encode/decode MIME parameter values in
)
where
import Control.Applicative
+import Control.Monad.Unicode
import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
import qualified Data.Ascii as A
import Data.Attoparsec.Char8 as P
import Data.Bits
import qualified Data.ByteString.Char8 as BS
import Data.Char
+import Data.Foldable
import Data.Map (Map)
import qualified Data.Map as M
import Data.Monoid.Unicode
+import qualified Data.Sequence as S
+import Data.Sequence.Unicode hiding ((∅))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding
+import Data.Traversable
import Data.Word
import Network.HTTP.Lucu.Parser.Http
import Network.HTTP.Lucu.Utils
-import Prelude hiding (takeWhile)
+import Prelude hiding (concat, mapM, takeWhile)
import Prelude.Unicode
printParams ∷ Map CIAscii Text → AsciiBuilder
| M.null params = (∅)
| otherwise = A.toAsciiBuilder "; " ⊕
joinWith "; " (map printPair $ M.toList params)
- where
- printPair ∷ (CIAscii, Text) → AsciiBuilder
- printPair (name, value)
- | T.any (> '\xFF') value
- = printPairInUTF8 name value
- | otherwise
- = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
-
- printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
- printPairInUTF8 name value
- = A.toAsciiBuilder (A.fromCIAscii name) ⊕
- A.toAsciiBuilder "*=utf-8''" ⊕
- escapeUnsafeChars (encodeUtf8 value) (∅)
-
- printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
- printPairInAscii name value
- = A.toAsciiBuilder (A.fromCIAscii name) ⊕
- A.toAsciiBuilder "=" ⊕
- if BS.any ((¬) ∘ isToken) (A.toByteString value) then
- quoteStr value
- else
- A.toAsciiBuilder value
-
- escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
- escapeUnsafeChars bs b
- = case BS.uncons bs of
- Nothing → b
- Just (c, bs')
- | isToken c → escapeUnsafeChars bs' $
- b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
- | otherwise → escapeUnsafeChars bs' $
- b ⊕ toHex (fromIntegral $ fromEnum c)
-
- toHex ∷ Word8 → AsciiBuilder
- toHex o = A.toAsciiBuilder "%" ⊕
- A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
- , toHex' (o .&. 0x0F) ])
-
- toHex' ∷ Word8 → Char
- toHex' o
- | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
- | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
+
+printPair ∷ (CIAscii, Text) → AsciiBuilder
+printPair (name, value)
+ | T.any (> '\xFF') value
+ = printPairInUTF8 name value
+ | otherwise
+ = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
+
+printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
+printPairInUTF8 name value
+ = A.toAsciiBuilder (A.fromCIAscii name) ⊕
+ A.toAsciiBuilder "*=utf-8''" ⊕
+ escapeUnsafeChars (encodeUtf8 value) (∅)
+
+printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
+printPairInAscii name value
+ = A.toAsciiBuilder (A.fromCIAscii name) ⊕
+ A.toAsciiBuilder "=" ⊕
+ if BS.any ((¬) ∘ isToken) (A.toByteString value) then
+ quoteStr value
+ else
+ A.toAsciiBuilder value
+
+escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
+escapeUnsafeChars bs b
+ = case BS.uncons bs of
+ Nothing → b
+ Just (c, bs')
+ | isToken c → escapeUnsafeChars bs' $
+ b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
+ | otherwise → escapeUnsafeChars bs' $
+ b ⊕ toHex (fromIntegral $ fromEnum c)
+
+toHex ∷ Word8 → AsciiBuilder
+toHex o = A.toAsciiBuilder "%" ⊕
+ A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
+ , toHex' (o .&. 0x0F) ])
+
+toHex' ∷ Word8 → Char
+toHex' o
+ | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
+ | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
data ExtendedParam
, apPayload ∷ !Ascii
}
+section ∷ ExtendedParam → Integer
+section (InitialEncodedParam {..}) = 0
+section ep = epSection ep
+
paramsP ∷ Parser (Map CIAscii Text)
-paramsP = decodeParams <$> P.many (try paramP)
+paramsP = decodeParams =≪ P.many (try paramP)
+
+paramP ∷ Parser ExtendedParam
+paramP = do skipMany lws
+ _ ← char ';'
+ skipMany lws
+ epm ← nameP
+ _ ← char '='
+ case epm of
+ (name, 0, True)
+ → do (charset, payload) ← initialEncodedValue
+ return $ InitialEncodedParam name charset payload
+ (name, sect, True)
+ → do payload ← encodedPayload
+ return $ ContinuedEncodedParam name sect payload
+ (name, sect, False)
+ → do payload ← token <|> quotedStr
+ return $ AsciiParam name sect payload
+
+nameP ∷ Parser (CIAscii, Integer, Bool)
+nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile1 (\c → isToken c ∧ c ≢ '*')
+ sect ← option 0 $
+ try $
+ do _ ← char '*'
+ n ← decimal
+ return n
+ isEncoded ← option False $
+ do _ ← char '*'
+ return True
+ return (name, sect, isEncoded)
+
+initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
+initialEncodedValue = do charset ← metadata
+ _ ← char '\''
+ _ ← metadata -- Ignore the language tag
+ _ ← char '\''
+ payload ← encodedPayload
+ return (charset, payload)
+ where
+ metadata ∷ Parser CIAscii
+ metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile (\c → isToken c ∧ c ≢ '\'')
+
+encodedPayload ∷ Parser BS.ByteString
+encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
+
+hexChar ∷ Parser BS.ByteString
+hexChar = do _ ← char '%'
+ h ← satisfy isHexChar
+ l ← satisfy isHexChar
+ return $ BS.singleton $ hexToChar h l
+
+isHexChar ∷ Char → Bool
+isHexChar = inClass "0-9a-fA-F"
+
+hexToChar ∷ Char → Char → Char
+hexToChar h l
+ = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
+
+hexToInt ∷ Char → Int
+hexToInt c
+ | c ≤ '9' = ord c - ord '0'
+ | c ≤ 'F' = ord c - ord 'A' + 10
+ | otherwise = ord c - ord 'a' + 10
+
+rawChars ∷ Parser BS.ByteString
+rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
+
+decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
+decodeParams = (mapM decodeSections =≪) ∘ sortBySection
+
+sortBySection ∷ ∀m. Monad m
+ ⇒ [ExtendedParam]
+ → m (Map CIAscii (Map Integer ExtendedParam))
+sortBySection = flip go (∅)
+ where
+ go ∷ [ExtendedParam]
+ → Map CIAscii (Map Integer ExtendedParam)
+ → m (Map CIAscii (Map Integer ExtendedParam))
+ go [] m = return m
+ go (x:xs) m
+ = case M.lookup (epName x) m of
+ Nothing
+ → let s = M.singleton (section x) x
+ m' = M.insert (epName x) s m
+ in
+ go xs m'
+ Just s
+ → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
+ (Nothing, s')
+ → let m' = M.insert (epName x) s' m
+ in
+ go xs m'
+ (Just _, _)
+ → fail (concat [ "Duplicate section "
+ , show $ section x
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName x
+ , "'"
+ ])
+
+decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
+decodeSections = flip (flip go 0) (∅)
where
- paramP ∷ Parser ExtendedParam
- paramP = do skipMany lws
- _ ← char ';'
- skipMany lws
- epm ← nameP
- _ ← char '='
- case epm of
- (name, 0, True)
- → do (charset, payload) ← initialEncodedValue
- return $ InitialEncodedParam name charset payload
- (name, section, True)
- → do payload ← encodedPayload
- return $ ContinuedEncodedParam name section payload
- (name, section, False)
- → do payload ← token <|> quotedStr
- return $ AsciiParam name section payload
-
- nameP ∷ Parser (CIAscii, Integer, Bool)
- nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
- takeWhile1 (\c → isToken c ∧ c ≢ '*')
- section ← option 0 $
- try $
- do _ ← char '*'
- n ← decimal
- return n
- isEncoded ← option False $
- do _ ← char '*'
- return True
- return (name, section, isEncoded)
-
- initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
- initialEncodedValue = do charset ← metadata
- _ ← char '\''
- _ ← metadata -- Ignore the language tag
- _ ← char '\''
- payload ← encodedPayload
- return (charset, payload)
- where
- metadata ∷ Parser CIAscii
- metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
- takeWhile (\c → isToken c ∧ c ≢ '\'')
-
- encodedPayload ∷ Parser BS.ByteString
- encodedPayload = BS.concat <$> P.many (hexChar <|> literal)
- where
- hexChar ∷ Parser BS.ByteString
- hexChar = do _ ← char '%'
- h ← satisfy isHexChar
- l ← satisfy isHexChar
- return $ BS.singleton $ hexToChar h l
-
- isHexChar ∷ Char → Bool
- isHexChar = inClass "0-9a-fA-F"
-
- hexToChar ∷ Char → Char → Char
- hexToChar h l
- = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
-
- hexToInt ∷ Char → Int
- hexToInt c
- | c ≤ '9' = ord c - ord '0'
- | c ≤ 'F' = ord c - ord 'A' + 10
- | otherwise = ord c - ord 'a' + 10
-
- literal ∷ Parser BS.ByteString
- literal = takeWhile1 (\c → isToken c ∧ c ≢ '%')
-
- decodeParams ∷ [ExtendedParam] → Map CIAscii Text
- decodeParams = error "FIXME"
+ go ∷ Map Integer ExtendedParam → Integer → S.Seq Text → m Text
+ go m expectedSect chunks
+ = case M.minViewWithKey m of
+ Nothing
+ → return $ T.concat $ toList chunks
+ Just ((sect, p), m')
+ | sect ≡ expectedSect
+ → error "FIXME"
+ | otherwise
+ → fail (concat [ "Missing section "
+ , show $ section p
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName p
+ , "'"
+ ])