7 -- |Provide functionalities to encode/decode MIME parameter values in
8 -- character sets other than US-ASCII. See:
9 -- <http://www.faqs.org/rfcs/rfc2231.html>
11 -- You usually don't have to use this module directly.
12 module Network.HTTP.Lucu.RFC2231
17 import Control.Applicative
18 import Control.Monad hiding (mapM)
19 import Control.Monad.Unicode
20 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
21 import qualified Data.Ascii as A
22 import Data.Attoparsec.Char8 as P
24 import qualified Data.ByteString.Char8 as BS
28 import qualified Data.Map as M
29 import Data.Monoid.Unicode
30 import Data.Sequence (Seq, ViewL(..))
31 import qualified Data.Sequence as S
32 import Data.Sequence.Unicode hiding ((∅))
33 import Data.Text (Text)
34 import qualified Data.Text as T
35 import Data.Text.Encoding
36 import Data.Text.Encoding.Error
37 import Data.Traversable
39 import Network.HTTP.Lucu.Parser.Http
40 import Network.HTTP.Lucu.Utils
41 import Prelude hiding (concat, mapM, takeWhile)
42 import Prelude.Unicode
44 -- |Convert MIME parameter values to an 'AsciiBuilder'.
45 printMIMEParams ∷ Map CIAscii Text → AsciiBuilder
46 {-# INLINEABLE printMIMEParams #-}
47 printMIMEParams m = M.foldlWithKey f (∅) m
48 -- THINKME: Use foldlWithKey' for newer Data.Map
50 f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
52 f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
54 printPair ∷ CIAscii → Text → AsciiBuilder
55 {-# INLINEABLE printPair #-}
57 | T.any (> '\xFF') value
58 = printPairInUTF8 name value
60 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
62 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
63 {-# INLINEABLE printPairInUTF8 #-}
64 printPairInUTF8 name value
65 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
66 A.toAsciiBuilder "*=utf-8''" ⊕
67 escapeUnsafeChars (encodeUtf8 value) (∅)
69 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
70 {-# INLINEABLE printPairInAscii #-}
71 printPairInAscii name value
72 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
73 A.toAsciiBuilder "=" ⊕
74 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
77 A.toAsciiBuilder value
79 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
80 {-# INLINEABLE escapeUnsafeChars #-}
81 escapeUnsafeChars bs b
82 = case BS.uncons bs of
85 | isToken c → escapeUnsafeChars bs' $
86 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
87 | otherwise → escapeUnsafeChars bs' $
88 b ⊕ toHex (fromIntegral $ fromEnum c)
90 toHex ∷ Word8 → AsciiBuilder
91 {-# INLINEABLE toHex #-}
92 toHex o = A.toAsciiBuilder "%" ⊕
93 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
94 , toHex' (o .&. 0x0F) ])
97 {-# INLINEABLE toHex' #-}
99 | h ≤ 0x09 = toEnum $ fromIntegral
100 $ fromEnum '0' + fromIntegral h
101 | otherwise = toEnum $ fromIntegral
102 $ fromEnum 'A' + fromIntegral (h - 0x0A)
105 = InitialEncodedParam {
107 , epCharset ∷ !CIAscii
108 , epPayload ∷ !BS.ByteString
110 | ContinuedEncodedParam {
112 , epSection ∷ !Integer
113 , epPayload ∷ !BS.ByteString
117 , epSection ∷ !Integer
121 section ∷ ExtendedParam → Integer
122 {-# INLINE section #-}
123 section (InitialEncodedParam {..}) = 0
124 section ep = epSection ep
126 -- |'Parser' for MIME parameter values.
127 mimeParams ∷ Parser (Map CIAscii Text)
128 {-# INLINEABLE mimeParams #-}
129 mimeParams = decodeParams =≪ P.many (try paramP)
131 paramP ∷ Parser ExtendedParam
132 paramP = do skipMany lws
139 → do (charset, payload) ← initialEncodedValue
140 return $ InitialEncodedParam name charset payload
142 → do payload ← encodedPayload
143 return $ ContinuedEncodedParam name sect payload
145 → do payload ← token <|> quotedStr
146 return $ AsciiParam name sect payload
148 nameP ∷ Parser (CIAscii, Integer, Bool)
149 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
150 takeWhile1 (\c → isToken c ∧ c ≢ '*')
151 sect ← option 0 $ try (char '*' *> decimal )
152 isEncoded ← option False $ try (char '*' *> pure True)
153 return (name, sect, isEncoded)
155 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
157 = do charset ← metadata
159 void $ metadata -- Ignore the language tag
161 payload ← encodedPayload
163 -- NOTE: I'm not sure this is the right thing, but RFC
164 -- 2231 doesn't tell us what we should do when the
165 -- charset is omitted.
166 return ("US-ASCII", payload)
167 -- FIXME: Rethink about this behaviour.
169 return (charset, payload)
171 metadata ∷ Parser CIAscii
172 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
173 takeWhile (\c → c ≢ '\'' ∧ isToken c)
175 encodedPayload ∷ Parser BS.ByteString
176 {-# INLINE encodedPayload #-}
177 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
179 hexChar ∷ Parser BS.ByteString
180 {-# INLINEABLE hexChar #-}
181 hexChar = do void $ char '%'
182 h ← satisfy isHexChar
183 l ← satisfy isHexChar
184 return $ BS.singleton $ hexToChar h l
186 isHexChar ∷ Char → Bool
187 isHexChar = inClass "0-9a-fA-F"
189 hexToChar ∷ Char → Char → Char
190 {-# INLINE hexToChar #-}
192 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
194 hexToInt ∷ Char → Int
195 {-# INLINEABLE hexToInt #-}
197 | c ≤ '9' = ord c - ord '0'
198 | c ≤ 'F' = ord c - ord 'A' + 10
199 | otherwise = ord c - ord 'a' + 10
201 rawChars ∷ Parser BS.ByteString
202 {-# INLINE rawChars #-}
203 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
205 decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
206 {-# INLINE decodeParams #-}
207 decodeParams = (mapM decodeSections =≪) ∘ sortBySection
209 sortBySection ∷ Monad m
211 → m (Map CIAscii (Map Integer ExtendedParam))
212 sortBySection = flip go (∅)
216 → Map CIAscii (Map Integer ExtendedParam)
217 → m (Map CIAscii (Map Integer ExtendedParam))
220 = case M.lookup (epName x) m of
222 → let s = M.singleton (section x) x
223 m' = M.insert (epName x) s m
227 → case M.lookup (section x) s of
229 → let s' = M.insert (section x) x s
230 m' = M.insert (epName x) s' m
234 → fail (concat [ "Duplicate section "
237 , A.toString $ A.fromCIAscii $ epName x
241 decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text
242 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
245 ⇒ Map Integer ExtendedParam
248 → m (Seq ExtendedParam)
249 toSeq m expectedSect sects
250 = case M.minViewWithKey m of
254 | sect ≡ expectedSect
255 → toSeq m' (expectedSect + 1) (sects ⊳ p)
257 → fail (concat [ "Missing section "
260 , A.toString $ A.fromCIAscii $ epName p
264 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
266 = case S.viewl sects of
268 → fail "decodeSeq: internal error: empty seq"
269 InitialEncodedParam {..} :< xs
270 → do d ← getDecoder epCharset
271 t ← decodeStr d epPayload
272 decodeSeq' (Just d) xs $ S.singleton t
273 ContinuedEncodedParam {..} :< _
274 → fail "decodeSeq: internal error: CEP at section 0"
275 AsciiParam {..} :< xs
276 → let t = A.toText apPayload
278 decodeSeq' Nothing xs $ S.singleton t
285 decodeSeq' decoder sects chunks
286 = case S.viewl sects of
288 → return $ T.concat $ toList chunks
289 InitialEncodedParam {..} :< _
290 → fail "decodeSeq': internal error: IEP at section > 0"
291 ContinuedEncodedParam {..} :< xs
294 → do t ← decodeStr d epPayload
295 decodeSeq' decoder xs $ chunks ⊳ t
297 → fail (concat [ "Section "
300 , A.toString $ A.fromCIAscii epName
301 , "' is encoded but its first section is not"
303 AsciiParam {..} :< xs
304 → let t = A.toText apPayload
306 decodeSeq' decoder xs $ chunks ⊳ t
308 type Decoder = BS.ByteString → Either UnicodeException Text
310 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
311 decodeStr decoder str
312 = case decoder str of
314 Left e → fail $ show e
316 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
318 | charset ≡ "UTF-8" = return decodeUtf8'
319 | charset ≡ "US-ASCII" = return decodeUtf8'
320 | otherwise = fail $ "No decoders found for charset: "
321 ⧺ A.toString (A.fromCIAscii charset)