6 -- |Provide facilities to encode/decode MIME parameter values in
7 -- character sets other than US-ASCII. See:
8 -- http://www.faqs.org/rfcs/rfc2231.html
9 module Network.HTTP.Lucu.RFC2231
14 import Control.Applicative
15 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
16 import qualified Data.Ascii as A
17 import Data.Attoparsec.Char8 as P
19 import qualified Data.ByteString.Char8 as BS
22 import qualified Data.Map as M
23 import Data.Monoid.Unicode
24 import Data.Text (Text)
25 import qualified Data.Text as T
26 import Data.Text.Encoding
28 import Network.HTTP.Lucu.Parser.Http
29 import Network.HTTP.Lucu.Utils
30 import Prelude hiding (takeWhile)
31 import Prelude.Unicode
33 printParams ∷ Map CIAscii Text → AsciiBuilder
36 | otherwise = A.toAsciiBuilder "; " ⊕
37 joinWith "; " (map printPair $ M.toList params)
39 printPair ∷ (CIAscii, Text) → AsciiBuilder
40 printPair (name, value)
41 | T.any (> '\xFF') value
42 = printPairInUTF8 name value
44 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
46 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
47 printPairInUTF8 name value
48 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
49 A.toAsciiBuilder "*=utf-8''" ⊕
50 escapeUnsafeChars (encodeUtf8 value) (∅)
52 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
53 printPairInAscii name value
54 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
55 A.toAsciiBuilder "=" ⊕
56 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
59 A.toAsciiBuilder value
61 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
62 escapeUnsafeChars bs b
63 = case BS.uncons bs of
66 | isToken c → escapeUnsafeChars bs' $
67 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
68 | otherwise → escapeUnsafeChars bs' $
69 b ⊕ toHex (fromIntegral $ fromEnum c)
71 toHex ∷ Word8 → AsciiBuilder
72 toHex o = A.toAsciiBuilder "%" ⊕
73 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
74 , toHex' (o .&. 0x0F) ])
78 | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
79 | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
83 = InitialEncodedParam {
85 , epCharset ∷ !CIAscii
86 , epPayload ∷ !BS.ByteString
88 | ContinuedEncodedParam {
90 , epSection ∷ !Integer
91 , epPayload ∷ !BS.ByteString
95 , epSection ∷ !Integer
99 paramsP ∷ Parser (Map CIAscii Text)
100 paramsP = decodeParams <$> P.many (try paramP)
102 paramP ∷ Parser ExtendedParam
103 paramP = do skipMany lws
110 → do (charset, payload) ← initialEncodedValue
111 return $ InitialEncodedParam name charset payload
112 (name, section, True)
113 → do payload ← encodedPayload
114 return $ ContinuedEncodedParam name section payload
115 (name, section, False)
116 → do payload ← token <|> quotedStr
117 return $ AsciiParam name section payload
119 nameP ∷ Parser (CIAscii, Integer, Bool)
120 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
121 takeWhile1 (\c → isToken c ∧ c ≢ '*')
127 isEncoded ← option False $
130 return (name, section, isEncoded)
132 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
133 initialEncodedValue = do charset ← metadata
135 _ ← metadata -- Ignore the language tag
137 payload ← encodedPayload
138 return (charset, payload)
140 metadata ∷ Parser CIAscii
141 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
142 takeWhile (\c → isToken c ∧ c ≢ '\'')
144 encodedPayload ∷ Parser BS.ByteString
145 encodedPayload = BS.concat <$> P.many (hexChar <|> literal)
147 hexChar ∷ Parser BS.ByteString
148 hexChar = do _ ← char '%'
149 h ← satisfy isHexChar
150 l ← satisfy isHexChar
151 return $ BS.singleton $ hexToChar h l
153 isHexChar ∷ Char → Bool
154 isHexChar = inClass "0-9a-fA-F"
156 hexToChar ∷ Char → Char → Char
158 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
160 hexToInt ∷ Char → Int
162 | c ≤ '9' = ord c - ord '0'
163 | c ≤ 'F' = ord c - ord 'A' + 10
164 | otherwise = ord c - ord 'a' + 10
166 literal ∷ Parser BS.ByteString
167 literal = takeWhile1 (\c → isToken c ∧ c ≢ '%')
169 decodeParams ∷ [ExtendedParam] → Map CIAscii Text
170 decodeParams = error "FIXME"