{-# LANGUAGE DoAndIfThenElse , OverloadedStrings , RecordWildCards , ScopedTypeVariables , UnicodeSyntax #-} -- |Provide facilities to encode/decode MIME parameter values in -- character sets other than US-ASCII. See: -- http://www.faqs.org/rfcs/rfc2231.html module Network.HTTP.Lucu.RFC2231 ( printParams , paramsP ) where import Control.Applicative import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii, AsciiBuilder) import qualified Data.Ascii as A import Data.Attoparsec.Char8 as P import Data.Bits import qualified Data.ByteString.Char8 as BS import Data.Char import Data.Foldable import Data.Map (Map) import qualified Data.Map as M import Data.Monoid.Unicode import qualified Data.Sequence as S import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding import Data.Traversable import Data.Word import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.Utils import Prelude hiding (concat, mapM, takeWhile) import Prelude.Unicode printParams ∷ Map CIAscii Text → AsciiBuilder printParams params | M.null params = (∅) | otherwise = A.toAsciiBuilder "; " ⊕ joinWith "; " (map printPair $ M.toList params) printPair ∷ (CIAscii, Text) → AsciiBuilder printPair (name, value) | T.any (> '\xFF') value = printPairInUTF8 name value | otherwise = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder printPairInUTF8 name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "*=utf-8''" ⊕ escapeUnsafeChars (encodeUtf8 value) (∅) printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder printPairInAscii name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "=" ⊕ if BS.any ((¬) ∘ isToken) (A.toByteString value) then quoteStr value else A.toAsciiBuilder value escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder escapeUnsafeChars bs b = case BS.uncons bs of Nothing → b Just (c, bs') | isToken c → escapeUnsafeChars bs' $ b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) | otherwise → escapeUnsafeChars bs' $ b ⊕ toHex (fromIntegral $ fromEnum c) toHex ∷ Word8 → AsciiBuilder toHex o = A.toAsciiBuilder "%" ⊕ A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) , toHex' (o .&. 0x0F) ]) toHex' ∷ Word8 → Char toHex' o | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A) data ExtendedParam = InitialEncodedParam { epName ∷ !CIAscii , epCharset ∷ !CIAscii , epPayload ∷ !BS.ByteString } | ContinuedEncodedParam { epName ∷ !CIAscii , epSection ∷ !Integer , epPayload ∷ !BS.ByteString } | AsciiParam { epName ∷ !CIAscii , epSection ∷ !Integer , apPayload ∷ !Ascii } section ∷ ExtendedParam → Integer section (InitialEncodedParam {..}) = 0 section ep = epSection ep paramsP ∷ Parser (Map CIAscii Text) paramsP = decodeParams =≪ P.many (try paramP) paramP ∷ Parser ExtendedParam paramP = do skipMany lws _ ← char ';' skipMany lws epm ← nameP _ ← char '=' case epm of (name, 0, True) → do (charset, payload) ← initialEncodedValue return $ InitialEncodedParam name charset payload (name, sect, True) → do payload ← encodedPayload return $ ContinuedEncodedParam name sect payload (name, sect, False) → do payload ← token <|> quotedStr return $ AsciiParam name sect payload nameP ∷ Parser (CIAscii, Integer, Bool) nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile1 (\c → isToken c ∧ c ≢ '*') sect ← option 0 $ try $ do _ ← char '*' n ← decimal return n isEncoded ← option False $ do _ ← char '*' return True return (name, sect, isEncoded) initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) initialEncodedValue = do charset ← metadata _ ← char '\'' _ ← metadata -- Ignore the language tag _ ← char '\'' payload ← encodedPayload return (charset, payload) where metadata ∷ Parser CIAscii metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile (\c → isToken c ∧ c ≢ '\'') encodedPayload ∷ Parser BS.ByteString encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars) hexChar ∷ Parser BS.ByteString hexChar = do _ ← char '%' h ← satisfy isHexChar l ← satisfy isHexChar return $ BS.singleton $ hexToChar h l isHexChar ∷ Char → Bool isHexChar = inClass "0-9a-fA-F" hexToChar ∷ Char → Char → Char hexToChar h l = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l hexToInt ∷ Char → Int hexToInt c | c ≤ '9' = ord c - ord '0' | c ≤ 'F' = ord c - ord 'A' + 10 | otherwise = ord c - ord 'a' + 10 rawChars ∷ Parser BS.ByteString rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%') decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text) decodeParams = (mapM decodeSections =≪) ∘ sortBySection sortBySection ∷ ∀m. Monad m ⇒ [ExtendedParam] → m (Map CIAscii (Map Integer ExtendedParam)) sortBySection = flip go (∅) where go ∷ [ExtendedParam] → Map CIAscii (Map Integer ExtendedParam) → m (Map CIAscii (Map Integer ExtendedParam)) go [] m = return m go (x:xs) m = case M.lookup (epName x) m of Nothing → let s = M.singleton (section x) x m' = M.insert (epName x) s m in go xs m' Just s → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of (Nothing, s') → let m' = M.insert (epName x) s' m in go xs m' (Just _, _) → fail (concat [ "Duplicate section " , show $ section x , " for parameter '" , A.toString $ A.fromCIAscii $ epName x , "'" ]) decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text decodeSections = flip (flip go 0) (∅) where go ∷ Map Integer ExtendedParam → Integer → S.Seq Text → m Text go m expectedSect chunks = case M.minViewWithKey m of Nothing → return $ T.concat $ toList chunks Just ((sect, p), m') | sect ≡ expectedSect → error "FIXME" | otherwise → fail (concat [ "Missing section " , show $ section p , " for parameter '" , A.toString $ A.fromCIAscii $ epName p , "'" ])