{-# LANGUAGE DoAndIfThenElse , OverloadedStrings , UnicodeSyntax #-} -- |Provide facilities to encode/decode MIME parameter values in -- character sets other than US-ASCII. See: -- http://www.faqs.org/rfcs/rfc2231.html module Network.HTTP.Lucu.RFC2231 ( printParams , paramsP ) where import Control.Applicative import Data.Ascii (Ascii, CIAscii, AsciiBuilder) import qualified Data.Ascii as A import Data.Attoparsec.Char8 as P import Data.Bits import qualified Data.ByteString.Char8 as BS import Data.Char import Data.Map (Map) import qualified Data.Map as M import Data.Monoid.Unicode import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding import Data.Word import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.Utils import Prelude hiding (takeWhile) import Prelude.Unicode printParams ∷ Map CIAscii Text → AsciiBuilder printParams params | M.null params = (∅) | otherwise = A.toAsciiBuilder "; " ⊕ joinWith "; " (map printPair $ M.toList params) where printPair ∷ (CIAscii, Text) → AsciiBuilder printPair (name, value) | T.any (> '\xFF') value = printPairInUTF8 name value | otherwise = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder printPairInUTF8 name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "*=utf-8''" ⊕ escapeUnsafeChars (encodeUtf8 value) (∅) printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder printPairInAscii name value = A.toAsciiBuilder (A.fromCIAscii name) ⊕ A.toAsciiBuilder "=" ⊕ if BS.any ((¬) ∘ isToken) (A.toByteString value) then quoteStr value else A.toAsciiBuilder value escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder escapeUnsafeChars bs b = case BS.uncons bs of Nothing → b Just (c, bs') | isToken c → escapeUnsafeChars bs' $ b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) | otherwise → escapeUnsafeChars bs' $ b ⊕ toHex (fromIntegral $ fromEnum c) toHex ∷ Word8 → AsciiBuilder toHex o = A.toAsciiBuilder "%" ⊕ A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) , toHex' (o .&. 0x0F) ]) toHex' ∷ Word8 → Char toHex' o | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A) data ExtendedParam = InitialEncodedParam { epName ∷ !CIAscii , epCharset ∷ !CIAscii , epPayload ∷ !BS.ByteString } | ContinuedEncodedParam { epName ∷ !CIAscii , epSection ∷ !Integer , epPayload ∷ !BS.ByteString } | AsciiParam { epName ∷ !CIAscii , epSection ∷ !Integer , apPayload ∷ !Ascii } paramsP ∷ Parser (Map CIAscii Text) paramsP = decodeParams <$> P.many (try paramP) where paramP ∷ Parser ExtendedParam paramP = do skipMany lws _ ← char ';' skipMany lws epm ← nameP _ ← char '=' case epm of (name, 0, True) → do (charset, payload) ← initialEncodedValue return $ InitialEncodedParam name charset payload (name, section, True) → do payload ← encodedPayload return $ ContinuedEncodedParam name section payload (name, section, False) → do payload ← token <|> quotedStr return $ AsciiParam name section payload nameP ∷ Parser (CIAscii, Integer, Bool) nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile1 (\c → isToken c ∧ c ≢ '*') section ← option 0 $ try $ do _ ← char '*' n ← decimal return n isEncoded ← option False $ do _ ← char '*' return True return (name, section, isEncoded) initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) initialEncodedValue = do charset ← metadata _ ← char '\'' _ ← metadata -- Ignore the language tag _ ← char '\'' payload ← encodedPayload return (charset, payload) where metadata ∷ Parser CIAscii metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> takeWhile (\c → isToken c ∧ c ≢ '\'') encodedPayload ∷ Parser BS.ByteString encodedPayload = BS.concat <$> P.many (hexChar <|> literal) where hexChar ∷ Parser BS.ByteString hexChar = do _ ← char '%' h ← satisfy isHexChar l ← satisfy isHexChar return $ BS.singleton $ hexToChar h l isHexChar ∷ Char → Bool isHexChar = inClass "0-9a-fA-F" hexToChar ∷ Char → Char → Char hexToChar h l = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l hexToInt ∷ Char → Int hexToInt c | c ≤ '9' = ord c - ord '0' | c ≤ 'F' = ord c - ord 'A' + 10 | otherwise = ord c - ord 'a' + 10 literal ∷ Parser BS.ByteString literal = takeWhile1 (\c → isToken c ∧ c ≢ '%') decodeParams ∷ [ExtendedParam] → Map CIAscii Text decodeParams = error "FIXME"