{-# LANGUAGE DeriveDataTypeable , DoAndIfThenElse , GeneralizedNewtypeDeriving , OverloadedStrings , RecordWildCards , TemplateHaskell , UnicodeSyntax #-} -- |Parsing and printing MIME parameter values -- (). module Network.HTTP.Lucu.MIMEParams ( MIMEParams(..) , printMIMEParams , mimeParams ) where import Control.Applicative import Control.Monad hiding (mapM) import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii, AsciiBuilder) import qualified Data.Ascii as A import Data.Attoparsec.Char8 as P import Data.Bits import qualified Data.ByteString.Char8 as BS import Data.Char import Data.Data import Data.Foldable import Data.Map (Map) import qualified Data.Map as M import Data.Monoid import Data.Monoid.Unicode import Data.Sequence (Seq, ViewL(..)) import qualified Data.Sequence as S import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding import Data.Text.Encoding.Error import Data.Traversable import Data.Word import Language.Haskell.TH.Syntax import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.Utils import Prelude hiding (concat, mapM, takeWhile) import Prelude.Unicode -- |A 'Map' from MIME parameter attributes to values. Attributes are -- always case-insensitive according to RFC 2045 -- (). newtype MIMEParams = MIMEParams (Map CIAscii Text) deriving (Eq, Show, Read, Monoid, Typeable) instance Lift MIMEParams where lift (MIMEParams m) = [| MIMEParams $(liftParams m) |] where liftParams ∷ Map CIAscii Text → Q Exp liftParams = liftMap liftCIAscii liftText -- |Convert MIME parameter values to an 'AsciiBuilder'. printMIMEParams ∷ MIMEParams → AsciiBuilder {-# INLINEABLE printMIMEParams #-} printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m -- THINKME: Use foldlWithKey' for newer Data.Map where f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder {-# INLINE f #-} f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v printPair ∷ CIAscii → Text → AsciiBuilder {-# INLINEABLE printPair #-} printPair name value | T.any (> '\xFF') value = printPairInUTF8 name value | otherwise = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder {-# INLINEABLE printPairInUTF8 #-} printPairInUTF8 name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "*=utf-8''" ⊕ escapeUnsafeChars (encodeUtf8 value) (∅) printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder {-# INLINEABLE printPairInAscii #-} printPairInAscii name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "=" ⊕ if BS.any ((¬) ∘ isToken) (A.toByteString value) then quoteStr value else A.toAsciiBuilder value escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder {-# INLINEABLE escapeUnsafeChars #-} escapeUnsafeChars bs b = case BS.uncons bs of Nothing → b Just (c, bs') | isToken c → escapeUnsafeChars bs' $ b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) | otherwise → escapeUnsafeChars bs' $ b ⊕ toHex (fromIntegral $ fromEnum c) toHex ∷ Word8 → AsciiBuilder {-# INLINEABLE toHex #-} toHex o = A.toAsciiBuilder "%" ⊕ A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) , toHex' (o .&. 0x0F) ]) where toHex' ∷ Word8 → Char {-# INLINEABLE toHex' #-} toHex' h | h ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral h | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (h - 0x0A) data ExtendedParam = InitialEncodedParam { epName ∷ !CIAscii , epCharset ∷ !CIAscii , epPayload ∷ !BS.ByteString } | ContinuedEncodedParam { epName ∷ !CIAscii , epSection ∷ !Integer , epPayload ∷ !BS.ByteString } | AsciiParam { epName ∷ !CIAscii , epSection ∷ !Integer , apPayload ∷ !Ascii } section ∷ ExtendedParam → Integer {-# INLINE section #-} section (InitialEncodedParam {..}) = 0 section ep = epSection ep -- |'Parser' for MIME parameter values. mimeParams ∷ Parser MIMEParams {-# INLINEABLE mimeParams #-} mimeParams = decodeParams =≪ P.many (try paramP) paramP ∷ Parser ExtendedParam paramP = do skipMany lws void $ char ';' skipMany lws epm ← nameP void $ char '=' case epm of (name, 0, True) → do (charset, payload) ← initialEncodedValue return $ InitialEncodedParam name charset payload (name, sect, True) → do payload ← encodedPayload return $ ContinuedEncodedParam name sect payload (name, sect, False) → do payload ← token <|> quotedStr return $ AsciiParam name sect payload nameP ∷ Parser (CIAscii, Integer, Bool) nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile1 (\c → isToken c ∧ c ≢ '*') sect ← option 0 $ try (char '*' *> decimal ) isEncoded ← option False $ try (char '*' *> pure True) return (name, sect, isEncoded) initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) initialEncodedValue = do charset ← metadata void $ char '\'' void $ metadata -- Ignore the language tag void $ char '\'' payload ← encodedPayload if charset ≡ "" then -- NOTE: I'm not sure this is the right thing, but RFC -- 2231 doesn't tell us what we should do when the -- charset is omitted. return ("US-ASCII", payload) -- FIXME: Rethink about this behaviour. else return (charset, payload) where metadata ∷ Parser CIAscii metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile (\c → c ≢ '\'' ∧ isToken c) encodedPayload ∷ Parser BS.ByteString {-# INLINE encodedPayload #-} encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars) hexChar ∷ Parser BS.ByteString {-# INLINEABLE hexChar #-} hexChar = do void $ char '%' h ← satisfy isHexChar l ← satisfy isHexChar return $ BS.singleton $ hexToChar h l isHexChar ∷ Char → Bool isHexChar = inClass "0-9a-fA-F" hexToChar ∷ Char → Char → Char {-# INLINE hexToChar #-} hexToChar h l = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l hexToInt ∷ Char → Int {-# INLINEABLE hexToInt #-} hexToInt c | c ≤ '9' = ord c - ord '0' | c ≤ 'F' = ord c - ord 'A' + 10 | otherwise = ord c - ord 'a' + 10 rawChars ∷ Parser BS.ByteString {-# INLINE rawChars #-} rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%') decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams {-# INLINE decodeParams #-} decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection sortBySection ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii (Map Integer ExtendedParam)) sortBySection = flip go (∅) where go ∷ Monad m ⇒ [ExtendedParam] → Map CIAscii (Map Integer ExtendedParam) → m (Map CIAscii (Map Integer ExtendedParam)) go [] m = return m go (x:xs) m = case M.lookup (epName x) m of Nothing → let s = M.singleton (section x) x m' = M.insert (epName x) s m in go xs m' Just s → case M.lookup (section x) s of Nothing → let s' = M.insert (section x) x s m' = M.insert (epName x) s' m in go xs m' Just _ → fail (concat [ "Duplicate section " , show $ section x , " for parameter '" , A.toString $ A.fromCIAscii $ epName x , "'" ]) decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) where toSeq ∷ Monad m ⇒ Map Integer ExtendedParam → Integer → Seq ExtendedParam → m (Seq ExtendedParam) toSeq m expectedSect sects = case M.minViewWithKey m of Nothing → return sects Just ((sect, p), m') | sect ≡ expectedSect → toSeq m' (expectedSect + 1) (sects ⊳ p) | otherwise → fail (concat [ "Missing section " , show $ section p , " for parameter '" , A.toString $ A.fromCIAscii $ epName p , "'" ]) decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text decodeSeq sects = case S.viewl sects of EmptyL → fail "decodeSeq: internal error: empty seq" InitialEncodedParam {..} :< xs → do d ← getDecoder epCharset t ← decodeStr d epPayload decodeSeq' (Just d) xs $ S.singleton t ContinuedEncodedParam {..} :< _ → fail "decodeSeq: internal error: CEP at section 0" AsciiParam {..} :< xs → let t = A.toText apPayload in decodeSeq' Nothing xs $ S.singleton t decodeSeq' ∷ Monad m ⇒ Maybe Decoder → Seq ExtendedParam → Seq Text → m Text decodeSeq' decoder sects chunks = case S.viewl sects of EmptyL → return $ T.concat $ toList chunks InitialEncodedParam {..} :< _ → fail "decodeSeq': internal error: IEP at section > 0" ContinuedEncodedParam {..} :< xs → case decoder of Just d → do t ← decodeStr d epPayload decodeSeq' decoder xs $ chunks ⊳ t Nothing → fail (concat [ "Section " , show epSection , " for parameter '" , A.toString $ A.fromCIAscii epName , "' is encoded but its first section is not" ]) AsciiParam {..} :< xs → let t = A.toText apPayload in decodeSeq' decoder xs $ chunks ⊳ t type Decoder = BS.ByteString → Either UnicodeException Text decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text decodeStr decoder str = case decoder str of Right t → return t Left e → fail $ show e getDecoder ∷ Monad m ⇒ CIAscii → m Decoder getDecoder charset | charset ≡ "UTF-8" = return decodeUtf8' | charset ≡ "US-ASCII" = return decodeUtf8' | otherwise = fail $ "No decoders found for charset: " ⧺ A.toString (A.fromCIAscii charset)