--- /dev/null
+{-# LANGUAGE
+ DeriveDataTypeable
+ , DoAndIfThenElse
+ , GeneralizedNewtypeDeriving
+ , OverloadedStrings
+ , RecordWildCards
+ , TemplateHaskell
+ , UnicodeSyntax
+ #-}
+-- |Parsing and printing MIME parameter values
+-- (<http://tools.ietf.org/html/rfc2231>).
+module Network.HTTP.Lucu.MIMEParams
+ ( MIMEParams(..)
+ , printMIMEParams
+ , mimeParams
+ )
+ where
+import Control.Applicative
+import Control.Monad hiding (mapM)
+import Control.Monad.Unicode
+import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
+import qualified Data.Ascii as A
+import Data.Attoparsec.Char8 as P
+import Data.Bits
+import qualified Data.ByteString.Char8 as BS
+import Data.Char
+import Data.Data
+import Data.Foldable
+import Data.Map (Map)
+import qualified Data.Map as M
+import Data.Monoid
+import Data.Monoid.Unicode
+import Data.Sequence (Seq, ViewL(..))
+import qualified Data.Sequence as S
+import Data.Sequence.Unicode hiding ((∅))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Text.Encoding
+import Data.Text.Encoding.Error
+import Data.Traversable
+import Data.Word
+import Language.Haskell.TH.Syntax
+import Network.HTTP.Lucu.Parser.Http
+import Network.HTTP.Lucu.Utils
+import Prelude hiding (concat, mapM, takeWhile)
+import Prelude.Unicode
+
+-- |A map from MIME parameter attributes to values. Attributes are
+-- always case-insensitive according to RFC 2045
+-- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
+newtype MIMEParams
+ = MIMEParams (Map CIAscii Text)
+ deriving (Eq, Show, Read, Monoid, Typeable)
+
+instance Lift MIMEParams where
+ lift (MIMEParams m) = [| MIMEParams $(liftParams m) |]
+ where
+ liftParams ∷ Map CIAscii Text → Q Exp
+ liftParams = liftMap liftCIAscii liftText
+
+-- |Convert MIME parameter values to an 'AsciiBuilder'.
+printMIMEParams ∷ MIMEParams → AsciiBuilder
+{-# INLINEABLE printMIMEParams #-}
+printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m
+ -- THINKME: Use foldlWithKey' for newer Data.Map
+ where
+ f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
+ {-# INLINE f #-}
+ f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
+
+printPair ∷ CIAscii → Text → AsciiBuilder
+{-# INLINEABLE printPair #-}
+printPair name value
+ | T.any (> '\xFF') value
+ = printPairInUTF8 name value
+ | otherwise
+ = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
+
+printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
+{-# INLINEABLE printPairInUTF8 #-}
+printPairInUTF8 name value
+ = A.toAsciiBuilder (A.fromCIAscii name) ⊕
+ A.toAsciiBuilder "*=utf-8''" ⊕
+ escapeUnsafeChars (encodeUtf8 value) (∅)
+
+printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
+{-# INLINEABLE printPairInAscii #-}
+printPairInAscii name value
+ = A.toAsciiBuilder (A.fromCIAscii name) ⊕
+ A.toAsciiBuilder "=" ⊕
+ if BS.any ((¬) ∘ isToken) (A.toByteString value) then
+ quoteStr value
+ else
+ A.toAsciiBuilder value
+
+escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
+{-# INLINEABLE escapeUnsafeChars #-}
+escapeUnsafeChars bs b
+ = case BS.uncons bs of
+ Nothing → b
+ Just (c, bs')
+ | isToken c → escapeUnsafeChars bs' $
+ b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
+ | otherwise → escapeUnsafeChars bs' $
+ b ⊕ toHex (fromIntegral $ fromEnum c)
+
+toHex ∷ Word8 → AsciiBuilder
+{-# INLINEABLE toHex #-}
+toHex o = A.toAsciiBuilder "%" ⊕
+ A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
+ , toHex' (o .&. 0x0F) ])
+ where
+ toHex' ∷ Word8 → Char
+ {-# INLINEABLE toHex' #-}
+ toHex' h
+ | h ≤ 0x09 = toEnum $ fromIntegral
+ $ fromEnum '0' + fromIntegral h
+ | otherwise = toEnum $ fromIntegral
+ $ fromEnum 'A' + fromIntegral (h - 0x0A)
+
+data ExtendedParam
+ = InitialEncodedParam {
+ epName ∷ !CIAscii
+ , epCharset ∷ !CIAscii
+ , epPayload ∷ !BS.ByteString
+ }
+ | ContinuedEncodedParam {
+ epName ∷ !CIAscii
+ , epSection ∷ !Integer
+ , epPayload ∷ !BS.ByteString
+ }
+ | AsciiParam {
+ epName ∷ !CIAscii
+ , epSection ∷ !Integer
+ , apPayload ∷ !Ascii
+ }
+
+section ∷ ExtendedParam → Integer
+{-# INLINE section #-}
+section (InitialEncodedParam {..}) = 0
+section ep = epSection ep
+
+-- |'Parser' for MIME parameter values.
+mimeParams ∷ Parser MIMEParams
+{-# INLINEABLE mimeParams #-}
+mimeParams = decodeParams =≪ P.many (try paramP)
+
+paramP ∷ Parser ExtendedParam
+paramP = do skipMany lws
+ void $ char ';'
+ skipMany lws
+ epm ← nameP
+ void $ char '='
+ case epm of
+ (name, 0, True)
+ → do (charset, payload) ← initialEncodedValue
+ return $ InitialEncodedParam name charset payload
+ (name, sect, True)
+ → do payload ← encodedPayload
+ return $ ContinuedEncodedParam name sect payload
+ (name, sect, False)
+ → do payload ← token <|> quotedStr
+ return $ AsciiParam name sect payload
+
+nameP ∷ Parser (CIAscii, Integer, Bool)
+nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile1 (\c → isToken c ∧ c ≢ '*')
+ sect ← option 0 $ try (char '*' *> decimal )
+ isEncoded ← option False $ try (char '*' *> pure True)
+ return (name, sect, isEncoded)
+
+initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
+initialEncodedValue
+ = do charset ← metadata
+ void $ char '\''
+ void $ metadata -- Ignore the language tag
+ void $ char '\''
+ payload ← encodedPayload
+ if charset ≡ "" then
+ -- NOTE: I'm not sure this is the right thing, but RFC
+ -- 2231 doesn't tell us what we should do when the
+ -- charset is omitted.
+ return ("US-ASCII", payload)
+ -- FIXME: Rethink about this behaviour.
+ else
+ return (charset, payload)
+ where
+ metadata ∷ Parser CIAscii
+ metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
+ takeWhile (\c → c ≢ '\'' ∧ isToken c)
+
+encodedPayload ∷ Parser BS.ByteString
+{-# INLINE encodedPayload #-}
+encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
+
+hexChar ∷ Parser BS.ByteString
+{-# INLINEABLE hexChar #-}
+hexChar = do void $ char '%'
+ h ← satisfy isHexChar
+ l ← satisfy isHexChar
+ return $ BS.singleton $ hexToChar h l
+
+isHexChar ∷ Char → Bool
+isHexChar = inClass "0-9a-fA-F"
+
+hexToChar ∷ Char → Char → Char
+{-# INLINE hexToChar #-}
+hexToChar h l
+ = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
+
+hexToInt ∷ Char → Int
+{-# INLINEABLE hexToInt #-}
+hexToInt c
+ | c ≤ '9' = ord c - ord '0'
+ | c ≤ 'F' = ord c - ord 'A' + 10
+ | otherwise = ord c - ord 'a' + 10
+
+rawChars ∷ Parser BS.ByteString
+{-# INLINE rawChars #-}
+rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
+
+decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
+{-# INLINE decodeParams #-}
+decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection
+
+sortBySection ∷ Monad m
+ ⇒ [ExtendedParam]
+ → m (Map CIAscii (Map Integer ExtendedParam))
+sortBySection = flip go (∅)
+ where
+ go ∷ Monad m
+ ⇒ [ExtendedParam]
+ → Map CIAscii (Map Integer ExtendedParam)
+ → m (Map CIAscii (Map Integer ExtendedParam))
+ go [] m = return m
+ go (x:xs) m
+ = case M.lookup (epName x) m of
+ Nothing
+ → let s = M.singleton (section x) x
+ m' = M.insert (epName x) s m
+ in
+ go xs m'
+ Just s
+ → case M.lookup (section x) s of
+ Nothing
+ → let s' = M.insert (section x) x s
+ m' = M.insert (epName x) s' m
+ in
+ go xs m'
+ Just _
+ → fail (concat [ "Duplicate section "
+ , show $ section x
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName x
+ , "'"
+ ])
+
+decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text
+decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
+ where
+ toSeq ∷ Monad m
+ ⇒ Map Integer ExtendedParam
+ → Integer
+ → Seq ExtendedParam
+ → m (Seq ExtendedParam)
+ toSeq m expectedSect sects
+ = case M.minViewWithKey m of
+ Nothing
+ → return sects
+ Just ((sect, p), m')
+ | sect ≡ expectedSect
+ → toSeq m' (expectedSect + 1) (sects ⊳ p)
+ | otherwise
+ → fail (concat [ "Missing section "
+ , show $ section p
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii $ epName p
+ , "'"
+ ])
+
+ decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
+ decodeSeq sects
+ = case S.viewl sects of
+ EmptyL
+ → fail "decodeSeq: internal error: empty seq"
+ InitialEncodedParam {..} :< xs
+ → do d ← getDecoder epCharset
+ t ← decodeStr d epPayload
+ decodeSeq' (Just d) xs $ S.singleton t
+ ContinuedEncodedParam {..} :< _
+ → fail "decodeSeq: internal error: CEP at section 0"
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' Nothing xs $ S.singleton t
+
+ decodeSeq' ∷ Monad m
+ ⇒ Maybe Decoder
+ → Seq ExtendedParam
+ → Seq Text
+ → m Text
+ decodeSeq' decoder sects chunks
+ = case S.viewl sects of
+ EmptyL
+ → return $ T.concat $ toList chunks
+ InitialEncodedParam {..} :< _
+ → fail "decodeSeq': internal error: IEP at section > 0"
+ ContinuedEncodedParam {..} :< xs
+ → case decoder of
+ Just d
+ → do t ← decodeStr d epPayload
+ decodeSeq' decoder xs $ chunks ⊳ t
+ Nothing
+ → fail (concat [ "Section "
+ , show epSection
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii epName
+ , "' is encoded but its first section is not"
+ ])
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' decoder xs $ chunks ⊳ t
+
+type Decoder = BS.ByteString → Either UnicodeException Text
+
+decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
+decodeStr decoder str
+ = case decoder str of
+ Right t → return t
+ Left e → fail $ show e
+
+getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
+getDecoder charset
+ | charset ≡ "UTF-8" = return decodeUtf8'
+ | charset ≡ "US-ASCII" = return decodeUtf8'
+ | otherwise = fail $ "No decoders found for charset: "
+ ⧺ A.toString (A.fromCIAscii charset)