8 -- |Provide facilities to encode/decode MIME parameter values in
9 -- character sets other than US-ASCII. See:
10 -- http://www.faqs.org/rfcs/rfc2231.html
11 module Network.HTTP.Lucu.RFC2231
16 import Control.Applicative
17 import Control.Monad.Unicode
18 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
19 import qualified Data.Ascii as A
20 import Data.Attoparsec.Char8 as P
22 import qualified Data.ByteString.Char8 as BS
26 import qualified Data.Map as M
27 import Data.Monoid.Unicode
28 import Data.Sequence (Seq, ViewL(..))
29 import qualified Data.Sequence as S
30 import Data.Sequence.Unicode hiding ((∅))
31 import Data.Text (Text)
32 import qualified Data.Text as T
33 import qualified Data.Text.ICU.Convert as TC
34 import Data.Text.Encoding
35 import Data.Traversable
37 import Network.HTTP.Lucu.Parser.Http
38 import Network.HTTP.Lucu.Utils
39 import Prelude hiding (concat, mapM, takeWhile)
40 import Prelude.Unicode
42 printParams ∷ Map CIAscii Text → AsciiBuilder
45 | otherwise = A.toAsciiBuilder "; " ⊕
46 joinWith "; " (map printPair $ M.toList params)
48 printPair ∷ (CIAscii, Text) → AsciiBuilder
49 printPair (name, value)
50 | T.any (> '\xFF') value
51 = printPairInUTF8 name value
53 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
55 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
56 printPairInUTF8 name value
57 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
58 A.toAsciiBuilder "*=utf-8''" ⊕
59 escapeUnsafeChars (encodeUtf8 value) (∅)
61 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
62 printPairInAscii name value
63 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
64 A.toAsciiBuilder "=" ⊕
65 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
68 A.toAsciiBuilder value
70 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
71 escapeUnsafeChars bs b
72 = case BS.uncons bs of
75 | isToken c → escapeUnsafeChars bs' $
76 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
77 | otherwise → escapeUnsafeChars bs' $
78 b ⊕ toHex (fromIntegral $ fromEnum c)
80 toHex ∷ Word8 → AsciiBuilder
81 toHex o = A.toAsciiBuilder "%" ⊕
82 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
83 , toHex' (o .&. 0x0F) ])
87 | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
88 | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
92 = InitialEncodedParam {
94 , epCharset ∷ !CIAscii
95 , epPayload ∷ !BS.ByteString
97 | ContinuedEncodedParam {
99 , epSection ∷ !Integer
100 , epPayload ∷ !BS.ByteString
104 , epSection ∷ !Integer
108 section ∷ ExtendedParam → Integer
109 section (InitialEncodedParam {..}) = 0
110 section ep = epSection ep
112 paramsP ∷ Parser (Map CIAscii Text)
113 paramsP = decodeParams =≪ P.many (try paramP)
115 paramP ∷ Parser ExtendedParam
116 paramP = do skipMany lws
123 → do (charset, payload) ← initialEncodedValue
124 return $ InitialEncodedParam name charset payload
126 → do payload ← encodedPayload
127 return $ ContinuedEncodedParam name sect payload
129 → do payload ← token <|> quotedStr
130 return $ AsciiParam name sect payload
132 nameP ∷ Parser (CIAscii, Integer, Bool)
133 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
134 takeWhile1 (\c → isToken c ∧ c ≢ '*')
140 isEncoded ← option False $
143 return (name, sect, isEncoded)
145 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
147 = do charset ← metadata
149 _ ← metadata -- Ignore the language tag
151 payload ← encodedPayload
153 -- NOTE: I'm not sure this is the right thing, but RFC
154 -- 2231 doesn't tell us what should we do when the
155 -- charset is omitted.
156 return ("US-ASCII", payload)
158 return (charset, payload)
160 metadata ∷ Parser CIAscii
161 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
162 takeWhile (\c → isToken c ∧ c ≢ '\'')
164 encodedPayload ∷ Parser BS.ByteString
165 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
167 hexChar ∷ Parser BS.ByteString
168 hexChar = do _ ← char '%'
169 h ← satisfy isHexChar
170 l ← satisfy isHexChar
171 return $ BS.singleton $ hexToChar h l
173 isHexChar ∷ Char → Bool
174 isHexChar = inClass "0-9a-fA-F"
176 hexToChar ∷ Char → Char → Char
178 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
180 hexToInt ∷ Char → Int
182 | c ≤ '9' = ord c - ord '0'
183 | c ≤ 'F' = ord c - ord 'A' + 10
184 | otherwise = ord c - ord 'a' + 10
186 rawChars ∷ Parser BS.ByteString
187 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
189 decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
190 decodeParams = (mapM decodeSections =≪) ∘ sortBySection
192 sortBySection ∷ ∀m. Monad m
194 → m (Map CIAscii (Map Integer ExtendedParam))
195 sortBySection = flip go (∅)
198 → Map CIAscii (Map Integer ExtendedParam)
199 → m (Map CIAscii (Map Integer ExtendedParam))
202 = case M.lookup (epName x) m of
204 → let s = M.singleton (section x) x
205 m' = M.insert (epName x) s m
209 → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
211 → let m' = M.insert (epName x) s' m
215 → fail (concat [ "Duplicate section "
218 , A.toString $ A.fromCIAscii $ epName x
222 decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
223 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
225 toSeq ∷ Map Integer ExtendedParam
228 → m (Seq ExtendedParam)
229 toSeq m expectedSect sects
230 = case M.minViewWithKey m of
234 | sect ≡ expectedSect
235 → toSeq m' (expectedSect + 1) (sects ⊳ p)
237 → fail (concat [ "Missing section "
240 , A.toString $ A.fromCIAscii $ epName p
244 decodeSeq ∷ Seq ExtendedParam → m Text
246 = case S.viewl sects of
248 → fail "decodeSeq: internal error: empty seq"
249 InitialEncodedParam {..} :< xs
250 → do conv ← openConv epCharset
251 let t = TC.toUnicode conv epPayload
252 decodeSeq' (Just conv) xs $ S.singleton t
253 ContinuedEncodedParam {..} :< _
254 → fail "decodeSeq: internal error: ContinuedEncodedParam at section 0"
255 AsciiParam {..} :< xs
256 → let t = A.toText apPayload
258 decodeSeq' Nothing xs $ S.singleton t
260 decodeSeq' ∷ Maybe (TC.Converter)
264 decodeSeq' convM sects chunks
265 = case S.viewl sects of
267 → return $ T.concat $ toList chunks
268 InitialEncodedParam {..} :< _
269 → fail "decodeSeq': internal error: InitialEncodedParam at section > 0"
270 ContinuedEncodedParam {..} :< xs
273 → let t = TC.toUnicode conv epPayload
275 decodeSeq' convM xs $ chunks ⊳ t
277 → fail (concat [ "Section "
280 , A.toString $ A.fromCIAscii epName
281 , "' is encoded but its section 0 is not"
283 AsciiParam {..} :< xs
284 → let t = A.toText apPayload
286 decodeSeq' convM xs $ chunks ⊳ t
288 openConv ∷ CIAscii → m TC.Converter