X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMIMEParams.hs;fp=Network%2FHTTP%2FLucu%2FMIMEParams.hs;h=b3edeb5836745779d8f2820eae92b90d19f98fcf;hp=0000000000000000000000000000000000000000;hb=5e561403ba8ad9c440cc2bf2bacb61ebc3c7a111;hpb=7eed467cbc7ed48c1b88766f0c7eb6bb77be09ef diff --git a/Network/HTTP/Lucu/MIMEParams.hs b/Network/HTTP/Lucu/MIMEParams.hs new file mode 100644 index 0000000..b3edeb5 --- /dev/null +++ b/Network/HTTP/Lucu/MIMEParams.hs @@ -0,0 +1,338 @@ +{-# LANGUAGE + DeriveDataTypeable + , DoAndIfThenElse + , GeneralizedNewtypeDeriving + , OverloadedStrings + , RecordWildCards + , TemplateHaskell + , UnicodeSyntax + #-} +-- |Parsing and printing MIME parameter values +-- (). +module Network.HTTP.Lucu.MIMEParams + ( MIMEParams(..) + , printMIMEParams + , mimeParams + ) + where +import Control.Applicative +import Control.Monad hiding (mapM) +import Control.Monad.Unicode +import Data.Ascii (Ascii, CIAscii, AsciiBuilder) +import qualified Data.Ascii as A +import Data.Attoparsec.Char8 as P +import Data.Bits +import qualified Data.ByteString.Char8 as BS +import Data.Char +import Data.Data +import Data.Foldable +import Data.Map (Map) +import qualified Data.Map as M +import Data.Monoid +import Data.Monoid.Unicode +import Data.Sequence (Seq, ViewL(..)) +import qualified Data.Sequence as S +import Data.Sequence.Unicode hiding ((∅)) +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding +import Data.Text.Encoding.Error +import Data.Traversable +import Data.Word +import Language.Haskell.TH.Syntax +import Network.HTTP.Lucu.Parser.Http +import Network.HTTP.Lucu.Utils +import Prelude hiding (concat, mapM, takeWhile) +import Prelude.Unicode + +-- |A map from MIME parameter attributes to values. Attributes are +-- always case-insensitive according to RFC 2045 +-- (). +newtype MIMEParams + = MIMEParams (Map CIAscii Text) + deriving (Eq, Show, Read, Monoid, Typeable) + +instance Lift MIMEParams where + lift (MIMEParams m) = [| MIMEParams $(liftParams m) |] + where + liftParams ∷ Map CIAscii Text → Q Exp + liftParams = liftMap liftCIAscii liftText + +-- |Convert MIME parameter values to an 'AsciiBuilder'. +printMIMEParams ∷ MIMEParams → AsciiBuilder +{-# INLINEABLE printMIMEParams #-} +printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m + -- THINKME: Use foldlWithKey' for newer Data.Map + where + f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder + {-# INLINE f #-} + f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v + +printPair ∷ CIAscii → Text → AsciiBuilder +{-# INLINEABLE printPair #-} +printPair name value + | T.any (> '\xFF') value + = printPairInUTF8 name value + | otherwise + = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) + +printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder +{-# INLINEABLE printPairInUTF8 #-} +printPairInUTF8 name value + = A.toAsciiBuilder (A.fromCIAscii name) ⊕ + A.toAsciiBuilder "*=utf-8''" ⊕ + escapeUnsafeChars (encodeUtf8 value) (∅) + +printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder +{-# INLINEABLE printPairInAscii #-} +printPairInAscii name value + = A.toAsciiBuilder (A.fromCIAscii name) ⊕ + A.toAsciiBuilder "=" ⊕ + if BS.any ((¬) ∘ isToken) (A.toByteString value) then + quoteStr value + else + A.toAsciiBuilder value + +escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder +{-# INLINEABLE escapeUnsafeChars #-} +escapeUnsafeChars bs b + = case BS.uncons bs of + Nothing → b + Just (c, bs') + | isToken c → escapeUnsafeChars bs' $ + b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) + | otherwise → escapeUnsafeChars bs' $ + b ⊕ toHex (fromIntegral $ fromEnum c) + +toHex ∷ Word8 → AsciiBuilder +{-# INLINEABLE toHex #-} +toHex o = A.toAsciiBuilder "%" ⊕ + A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) + , toHex' (o .&. 0x0F) ]) + where + toHex' ∷ Word8 → Char + {-# INLINEABLE toHex' #-} + toHex' h + | h ≤ 0x09 = toEnum $ fromIntegral + $ fromEnum '0' + fromIntegral h + | otherwise = toEnum $ fromIntegral + $ fromEnum 'A' + fromIntegral (h - 0x0A) + +data ExtendedParam + = InitialEncodedParam { + epName ∷ !CIAscii + , epCharset ∷ !CIAscii + , epPayload ∷ !BS.ByteString + } + | ContinuedEncodedParam { + epName ∷ !CIAscii + , epSection ∷ !Integer + , epPayload ∷ !BS.ByteString + } + | AsciiParam { + epName ∷ !CIAscii + , epSection ∷ !Integer + , apPayload ∷ !Ascii + } + +section ∷ ExtendedParam → Integer +{-# INLINE section #-} +section (InitialEncodedParam {..}) = 0 +section ep = epSection ep + +-- |'Parser' for MIME parameter values. +mimeParams ∷ Parser MIMEParams +{-# INLINEABLE mimeParams #-} +mimeParams = decodeParams =≪ P.many (try paramP) + +paramP ∷ Parser ExtendedParam +paramP = do skipMany lws + void $ char ';' + skipMany lws + epm ← nameP + void $ char '=' + case epm of + (name, 0, True) + → do (charset, payload) ← initialEncodedValue + return $ InitialEncodedParam name charset payload + (name, sect, True) + → do payload ← encodedPayload + return $ ContinuedEncodedParam name sect payload + (name, sect, False) + → do payload ← token <|> quotedStr + return $ AsciiParam name sect payload + +nameP ∷ Parser (CIAscii, Integer, Bool) +nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> + takeWhile1 (\c → isToken c ∧ c ≢ '*') + sect ← option 0 $ try (char '*' *> decimal ) + isEncoded ← option False $ try (char '*' *> pure True) + return (name, sect, isEncoded) + +initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) +initialEncodedValue + = do charset ← metadata + void $ char '\'' + void $ metadata -- Ignore the language tag + void $ char '\'' + payload ← encodedPayload + if charset ≡ "" then + -- NOTE: I'm not sure this is the right thing, but RFC + -- 2231 doesn't tell us what we should do when the + -- charset is omitted. + return ("US-ASCII", payload) + -- FIXME: Rethink about this behaviour. + else + return (charset, payload) + where + metadata ∷ Parser CIAscii + metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> + takeWhile (\c → c ≢ '\'' ∧ isToken c) + +encodedPayload ∷ Parser BS.ByteString +{-# INLINE encodedPayload #-} +encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars) + +hexChar ∷ Parser BS.ByteString +{-# INLINEABLE hexChar #-} +hexChar = do void $ char '%' + h ← satisfy isHexChar + l ← satisfy isHexChar + return $ BS.singleton $ hexToChar h l + +isHexChar ∷ Char → Bool +isHexChar = inClass "0-9a-fA-F" + +hexToChar ∷ Char → Char → Char +{-# INLINE hexToChar #-} +hexToChar h l + = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l + +hexToInt ∷ Char → Int +{-# INLINEABLE hexToInt #-} +hexToInt c + | c ≤ '9' = ord c - ord '0' + | c ≤ 'F' = ord c - ord 'A' + 10 + | otherwise = ord c - ord 'a' + 10 + +rawChars ∷ Parser BS.ByteString +{-# INLINE rawChars #-} +rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%') + +decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams +{-# INLINE decodeParams #-} +decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection + +sortBySection ∷ Monad m + ⇒ [ExtendedParam] + → m (Map CIAscii (Map Integer ExtendedParam)) +sortBySection = flip go (∅) + where + go ∷ Monad m + ⇒ [ExtendedParam] + → Map CIAscii (Map Integer ExtendedParam) + → m (Map CIAscii (Map Integer ExtendedParam)) + go [] m = return m + go (x:xs) m + = case M.lookup (epName x) m of + Nothing + → let s = M.singleton (section x) x + m' = M.insert (epName x) s m + in + go xs m' + Just s + → case M.lookup (section x) s of + Nothing + → let s' = M.insert (section x) x s + m' = M.insert (epName x) s' m + in + go xs m' + Just _ + → fail (concat [ "Duplicate section " + , show $ section x + , " for parameter '" + , A.toString $ A.fromCIAscii $ epName x + , "'" + ]) + +decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text +decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) + where + toSeq ∷ Monad m + ⇒ Map Integer ExtendedParam + → Integer + → Seq ExtendedParam + → m (Seq ExtendedParam) + toSeq m expectedSect sects + = case M.minViewWithKey m of + Nothing + → return sects + Just ((sect, p), m') + | sect ≡ expectedSect + → toSeq m' (expectedSect + 1) (sects ⊳ p) + | otherwise + → fail (concat [ "Missing section " + , show $ section p + , " for parameter '" + , A.toString $ A.fromCIAscii $ epName p + , "'" + ]) + + decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text + decodeSeq sects + = case S.viewl sects of + EmptyL + → fail "decodeSeq: internal error: empty seq" + InitialEncodedParam {..} :< xs + → do d ← getDecoder epCharset + t ← decodeStr d epPayload + decodeSeq' (Just d) xs $ S.singleton t + ContinuedEncodedParam {..} :< _ + → fail "decodeSeq: internal error: CEP at section 0" + AsciiParam {..} :< xs + → let t = A.toText apPayload + in + decodeSeq' Nothing xs $ S.singleton t + + decodeSeq' ∷ Monad m + ⇒ Maybe Decoder + → Seq ExtendedParam + → Seq Text + → m Text + decodeSeq' decoder sects chunks + = case S.viewl sects of + EmptyL + → return $ T.concat $ toList chunks + InitialEncodedParam {..} :< _ + → fail "decodeSeq': internal error: IEP at section > 0" + ContinuedEncodedParam {..} :< xs + → case decoder of + Just d + → do t ← decodeStr d epPayload + decodeSeq' decoder xs $ chunks ⊳ t + Nothing + → fail (concat [ "Section " + , show epSection + , " for parameter '" + , A.toString $ A.fromCIAscii epName + , "' is encoded but its first section is not" + ]) + AsciiParam {..} :< xs + → let t = A.toText apPayload + in + decodeSeq' decoder xs $ chunks ⊳ t + +type Decoder = BS.ByteString → Either UnicodeException Text + +decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text +decodeStr decoder str + = case decoder str of + Right t → return t + Left e → fail $ show e + +getDecoder ∷ Monad m ⇒ CIAscii → m Decoder +getDecoder charset + | charset ≡ "UTF-8" = return decodeUtf8' + | charset ≡ "US-ASCII" = return decodeUtf8' + | otherwise = fail $ "No decoders found for charset: " + ⧺ A.toString (A.fromCIAscii charset)