5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -ddump-splices #-} -- FIXME
14 -- GHC 7.0.3 gives us a false warning.
15 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
16 -- |Parsing and printing MIME parameter values
17 -- (<http://tools.ietf.org/html/rfc2231>).
18 module Network.HTTP.Lucu.MIMEParams
24 import Control.Applicative hiding (empty)
26 import Control.Monad hiding (mapM)
27 import Control.Monad.Unicode
28 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
29 import qualified Data.Ascii as A
30 import Data.Attoparsec.Char8 as P
32 import qualified Data.ByteString.Char8 as BS
34 import Data.Collections
35 import Data.Collections.BaseInstances ()
36 import qualified Data.Collections.Newtype.TH as C
37 import qualified Data.Map as M (Map)
39 import Data.Monoid.Unicode
40 import Data.Sequence (Seq)
41 import Data.Text (Text)
42 import qualified Data.Text as T
43 import Data.Text.Encoding
44 import Data.Text.Encoding.Error
47 import Network.HTTP.Lucu.OrphanInstances ()
48 import Network.HTTP.Lucu.Parser.Http
49 import Network.HTTP.Lucu.Utils
50 import Prelude hiding (concat, filter, foldr, lookup, mapM, null, takeWhile)
51 import Prelude.Unicode
53 -- |A 'Map' from MIME parameter attributes to values. Attributes are
54 -- always case-insensitive according to RFC 2045
55 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
57 = MIMEParams (M.Map CIAscii Text)
58 deriving (Eq, Show, Read, Monoid, Typeable)
60 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
64 instance Foldable MIMEParams (CIAscii, Text) where
66 null (MIMEParams m) = null m
68 size (MIMEParams m) = size m
70 foldr f b (MIMEParams m) = foldr f b m
73 instance Collection MIMEParams (CIAscii, Text) where
75 filter f (MIMEParams m) = MIMEParams $ filter f m
78 instance Indexed MIMEParams CIAscii Text where
80 index k (MIMEParams m) = index k m
82 adjust f k (MIMEParams m) = MIMEParams $ adjust f k m
83 {-# INLINE inDomain #-}
84 inDomain k (MIMEParams m) = inDomain k m
87 instance Map MIMEParams CIAscii Text where
89 lookup k (MIMEParams m) = lookup k m
90 {-# INLINE mapWithKey #-}
91 mapWithKey f (MIMEParams m)
92 = MIMEParams $ mapWithKey f m
93 {-# INLINE unionWith #-}
94 unionWith f (MIMEParams α) (MIMEParams β)
95 = MIMEParams $ unionWith f α β
96 {-# INLINE intersectionWith #-}
97 intersectionWith f (MIMEParams α) (MIMEParams β)
98 = MIMEParams $ intersectionWith f α β
99 {-# INLINE differenceWith #-}
100 differenceWith f (MIMEParams α) (MIMEParams β)
101 = MIMEParams $ differenceWith f α β
102 {-# INLINE isSubmapBy #-}
103 isSubmapBy f (MIMEParams α) (MIMEParams β)
105 {-# INLINE isProperSubmapBy #-}
106 isProperSubmapBy f (MIMEParams α) (MIMEParams β)
107 = isProperSubmapBy f α β
109 -- FIXME: auto-derive
110 instance SortingCollection MIMEParams (CIAscii, Text) where
111 {-# INLINE minView #-}
112 minView (MIMEParams m) = second MIMEParams <$> minView m
114 -- |Convert MIME parameter values to an 'AsciiBuilder'.
115 printMIMEParams ∷ MIMEParams → AsciiBuilder
116 {-# INLINEABLE printMIMEParams #-}
117 printMIMEParams = foldl' f (∅)
119 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
121 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
123 printPair ∷ CIAscii → Text → AsciiBuilder
124 {-# INLINEABLE printPair #-}
126 | T.any (> '\xFF') value
127 = printPairInUTF8 name value
129 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
131 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
132 {-# INLINEABLE printPairInUTF8 #-}
133 printPairInUTF8 name value
134 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
135 A.toAsciiBuilder "*=utf-8''" ⊕
136 escapeUnsafeChars (encodeUtf8 value) (∅)
138 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
139 {-# INLINEABLE printPairInAscii #-}
140 printPairInAscii name value
141 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
142 A.toAsciiBuilder "=" ⊕
143 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
146 A.toAsciiBuilder value
148 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
149 {-# INLINEABLE escapeUnsafeChars #-}
150 escapeUnsafeChars bs b
151 = case BS.uncons bs of
154 | isToken c → escapeUnsafeChars bs' $
155 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
156 | otherwise → escapeUnsafeChars bs' $
157 b ⊕ toHex (fromIntegral $ fromEnum c)
159 toHex ∷ Word8 → AsciiBuilder
160 {-# INLINEABLE toHex #-}
161 toHex o = A.toAsciiBuilder "%" ⊕
162 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
163 , toHex' (o .&. 0x0F) ])
165 toHex' ∷ Word8 → Char
166 {-# INLINEABLE toHex' #-}
168 | h ≤ 0x09 = toEnum $ fromIntegral
169 $ fromEnum '0' + fromIntegral h
170 | otherwise = toEnum $ fromIntegral
171 $ fromEnum 'A' + fromIntegral (h - 0x0A)
174 = InitialEncodedParam {
176 , epCharset ∷ !CIAscii
177 , epPayload ∷ !BS.ByteString
179 | ContinuedEncodedParam {
181 , epSection ∷ !Integer
182 , epPayload ∷ !BS.ByteString
186 , epSection ∷ !Integer
190 section ∷ ExtendedParam → Integer
191 {-# INLINE section #-}
192 section (InitialEncodedParam {..}) = 0
193 section ep = epSection ep
195 -- |'Parser' for MIME parameter values.
196 mimeParams ∷ Parser MIMEParams
197 {-# INLINEABLE mimeParams #-}
198 mimeParams = decodeParams =≪ P.many (try paramP)
200 paramP ∷ Parser ExtendedParam
201 paramP = do skipMany lws
208 → do (charset, payload) ← initialEncodedValue
209 return $ InitialEncodedParam name charset payload
211 → do payload ← encodedPayload
212 return $ ContinuedEncodedParam name sect payload
214 → do payload ← token <|> quotedStr
215 return $ AsciiParam name sect payload
217 nameP ∷ Parser (CIAscii, Integer, Bool)
218 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
219 takeWhile1 (\c → isToken c ∧ c ≢ '*')
220 sect ← option 0 $ try (char '*' *> decimal )
221 isEncoded ← option False $ try (char '*' *> pure True)
222 return (name, sect, isEncoded)
224 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
226 = do charset ← metadata
228 void $ metadata -- Ignore the language tag
230 payload ← encodedPayload
232 -- NOTE: I'm not sure this is the right thing, but RFC
233 -- 2231 doesn't tell us what we should do when the
234 -- charset is omitted.
235 fail "charset is missing"
237 return (charset, payload)
239 metadata ∷ Parser CIAscii
240 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
241 takeWhile (\c → c ≢ '\'' ∧ isToken c)
243 encodedPayload ∷ Parser BS.ByteString
244 {-# INLINE encodedPayload #-}
245 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
247 hexChar ∷ Parser BS.ByteString
248 {-# INLINEABLE hexChar #-}
249 hexChar = do void $ char '%'
250 h ← satisfy isHexChar
251 l ← satisfy isHexChar
252 return $ BS.singleton $ hexToChar h l
254 isHexChar ∷ Char → Bool
255 isHexChar = inClass "0-9a-fA-F"
257 hexToChar ∷ Char → Char → Char
258 {-# INLINE hexToChar #-}
260 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
262 hexToInt ∷ Char → Int
263 {-# INLINEABLE hexToInt #-}
265 | c ≤ '9' = ord c - ord '0'
266 | c ≤ 'F' = ord c - ord 'A' + 10
267 | otherwise = ord c - ord 'a' + 10
269 rawChars ∷ Parser BS.ByteString
270 {-# INLINE rawChars #-}
271 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
273 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
274 {-# INLINE decodeParams #-}
275 decodeParams = (MIMEParams <$>)
276 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
279 sortBySection ∷ Monad m
281 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
282 sortBySection = flip go (∅)
286 → M.Map CIAscii (M.Map Integer ExtendedParam)
287 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
290 = case lookup (epName x) m of
292 → let s = singleton (section x, x)
293 m' = insert (epName x, s) m
297 → case lookup (section x) s of
299 → let s' = insert (section x, x ) s
300 m' = insert (epName x, s') m
304 → fail (concat [ "Duplicate section "
307 , A.toString $ A.fromCIAscii $ epName x
311 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
312 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
315 ⇒ M.Map Integer ExtendedParam
318 → m (Seq ExtendedParam)
319 toSeq m expectedSect sects
324 | sect ≡ expectedSect
325 → toSeq m' (expectedSect + 1) (sects ⊳ p)
327 → fail (concat [ "Missing section "
330 , A.toString $ A.fromCIAscii $ epName p
334 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
336 = case front sects of
338 → fail "decodeSeq: internal error: empty seq"
339 Just (InitialEncodedParam {..}, xs)
340 → do d ← getDecoder epCharset
341 t ← decodeStr d epPayload
342 decodeSeq' (Just d) xs $ singleton t
343 Just (ContinuedEncodedParam {..}, _)
344 → fail "decodeSeq: internal error: CEP at section 0"
345 Just (AsciiParam {..}, xs)
346 → let t = A.toText apPayload
348 decodeSeq' Nothing xs $ singleton t
355 decodeSeq' decoder sects chunks
356 = case front sects of
358 → return $ T.concat $ toList chunks
359 Just (InitialEncodedParam {}, _)
360 → fail "decodeSeq': internal error: IEP at section > 0"
361 Just (ContinuedEncodedParam {..}, xs)
364 → do t ← decodeStr d epPayload
365 decodeSeq' decoder xs $ chunks ⊳ t
367 → fail (concat [ "Section "
370 , A.toString $ A.fromCIAscii epName
371 , "' is encoded but its first section is not"
373 Just (AsciiParam {..}, xs)
374 → let t = A.toText apPayload
376 decodeSeq' decoder xs $ chunks ⊳ t
378 type Decoder = BS.ByteString → Either UnicodeException Text
380 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
381 decodeStr decoder str
382 = case decoder str of
384 Left e → fail $ show e
386 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
388 | charset ≡ "UTF-8" = return decodeUtf8'
389 | charset ≡ "US-ASCII" = return decodeUtf8'
390 | otherwise = fail $ "No decoders found for charset: "
391 ⧺ A.toString (A.fromCIAscii charset)