5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -ddump-splices #-} -- FIXME
14 -- GHC 7.0.3 gives us a false warning.
15 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
16 -- |Parsing and printing MIME parameter values
17 -- (<http://tools.ietf.org/html/rfc2231>).
18 module Network.HTTP.Lucu.MIMEParams
24 import Control.Applicative hiding (empty)
26 import Control.Monad hiding (mapM)
27 import Control.Monad.Unicode
28 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
29 import qualified Data.Ascii as A
30 import Data.Attoparsec.Char8 as P
32 import qualified Data.ByteString.Char8 as BS
34 import Data.Collections
35 import Data.Collections.BaseInstances ()
36 import qualified Data.Collections.Newtype.TH as C
37 import qualified Data.Map as M (Map)
39 import Data.Monoid.Unicode
40 import Data.Sequence (Seq)
41 import Data.Text (Text)
42 import qualified Data.Text as T
43 import Data.Text.Encoding
44 import Data.Text.Encoding.Error
47 import Network.HTTP.Lucu.OrphanInstances ()
48 import Network.HTTP.Lucu.Parser.Http
49 import Network.HTTP.Lucu.Utils
50 import Prelude hiding (concat, filter, foldr, lookup, mapM, null, takeWhile)
51 import Prelude.Unicode
53 -- |A 'Map' from MIME parameter attributes to values. Attributes are
54 -- always case-insensitive according to RFC 2045
55 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
57 = MIMEParams (M.Map CIAscii Text)
58 deriving (Eq, Show, Read, Monoid, Typeable)
60 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
61 instance Foldable MIMEParams (CIAscii, Text)
65 instance Collection MIMEParams (CIAscii, Text) where
67 filter f (MIMEParams m) = MIMEParams $ filter f m
70 instance Indexed MIMEParams CIAscii Text where
72 index k (MIMEParams m) = index k m
74 adjust f k (MIMEParams m) = MIMEParams $ adjust f k m
75 {-# INLINE inDomain #-}
76 inDomain k (MIMEParams m) = inDomain k m
79 instance Map MIMEParams CIAscii Text where
81 lookup k (MIMEParams m) = lookup k m
82 {-# INLINE mapWithKey #-}
83 mapWithKey f (MIMEParams m)
84 = MIMEParams $ mapWithKey f m
85 {-# INLINE unionWith #-}
86 unionWith f (MIMEParams α) (MIMEParams β)
87 = MIMEParams $ unionWith f α β
88 {-# INLINE intersectionWith #-}
89 intersectionWith f (MIMEParams α) (MIMEParams β)
90 = MIMEParams $ intersectionWith f α β
91 {-# INLINE differenceWith #-}
92 differenceWith f (MIMEParams α) (MIMEParams β)
93 = MIMEParams $ differenceWith f α β
94 {-# INLINE isSubmapBy #-}
95 isSubmapBy f (MIMEParams α) (MIMEParams β)
97 {-# INLINE isProperSubmapBy #-}
98 isProperSubmapBy f (MIMEParams α) (MIMEParams β)
99 = isProperSubmapBy f α β
101 -- FIXME: auto-derive
102 instance SortingCollection MIMEParams (CIAscii, Text) where
103 {-# INLINE minView #-}
104 minView (MIMEParams m) = second MIMEParams <$> minView m
106 -- |Convert MIME parameter values to an 'AsciiBuilder'.
107 printMIMEParams ∷ MIMEParams → AsciiBuilder
108 {-# INLINEABLE printMIMEParams #-}
109 printMIMEParams = foldl' f (∅)
111 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
113 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
115 printPair ∷ CIAscii → Text → AsciiBuilder
116 {-# INLINEABLE printPair #-}
118 | T.any (> '\xFF') value
119 = printPairInUTF8 name value
121 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
123 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
124 {-# INLINEABLE printPairInUTF8 #-}
125 printPairInUTF8 name value
126 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
127 A.toAsciiBuilder "*=utf-8''" ⊕
128 escapeUnsafeChars (encodeUtf8 value) (∅)
130 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
131 {-# INLINEABLE printPairInAscii #-}
132 printPairInAscii name value
133 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
134 A.toAsciiBuilder "=" ⊕
135 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
138 A.toAsciiBuilder value
140 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
141 {-# INLINEABLE escapeUnsafeChars #-}
142 escapeUnsafeChars bs b
143 = case BS.uncons bs of
146 | isToken c → escapeUnsafeChars bs' $
147 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
148 | otherwise → escapeUnsafeChars bs' $
149 b ⊕ toHex (fromIntegral $ fromEnum c)
151 toHex ∷ Word8 → AsciiBuilder
152 {-# INLINEABLE toHex #-}
153 toHex o = A.toAsciiBuilder "%" ⊕
154 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
155 , toHex' (o .&. 0x0F) ])
157 toHex' ∷ Word8 → Char
158 {-# INLINEABLE toHex' #-}
160 | h ≤ 0x09 = toEnum $ fromIntegral
161 $ fromEnum '0' + fromIntegral h
162 | otherwise = toEnum $ fromIntegral
163 $ fromEnum 'A' + fromIntegral (h - 0x0A)
166 = InitialEncodedParam {
168 , epCharset ∷ !CIAscii
169 , epPayload ∷ !BS.ByteString
171 | ContinuedEncodedParam {
173 , epSection ∷ !Integer
174 , epPayload ∷ !BS.ByteString
178 , epSection ∷ !Integer
182 section ∷ ExtendedParam → Integer
183 {-# INLINE section #-}
184 section (InitialEncodedParam {..}) = 0
185 section ep = epSection ep
187 -- |'Parser' for MIME parameter values.
188 mimeParams ∷ Parser MIMEParams
189 {-# INLINEABLE mimeParams #-}
190 mimeParams = decodeParams =≪ P.many (try paramP)
192 paramP ∷ Parser ExtendedParam
193 paramP = do skipMany lws
200 → do (charset, payload) ← initialEncodedValue
201 return $ InitialEncodedParam name charset payload
203 → do payload ← encodedPayload
204 return $ ContinuedEncodedParam name sect payload
206 → do payload ← token <|> quotedStr
207 return $ AsciiParam name sect payload
209 nameP ∷ Parser (CIAscii, Integer, Bool)
210 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
211 takeWhile1 (\c → isToken c ∧ c ≢ '*')
212 sect ← option 0 $ try (char '*' *> decimal )
213 isEncoded ← option False $ try (char '*' *> pure True)
214 return (name, sect, isEncoded)
216 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
218 = do charset ← metadata
220 void $ metadata -- Ignore the language tag
222 payload ← encodedPayload
224 -- NOTE: I'm not sure this is the right thing, but RFC
225 -- 2231 doesn't tell us what we should do when the
226 -- charset is omitted.
227 fail "charset is missing"
229 return (charset, payload)
231 metadata ∷ Parser CIAscii
232 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
233 takeWhile (\c → c ≢ '\'' ∧ isToken c)
235 encodedPayload ∷ Parser BS.ByteString
236 {-# INLINE encodedPayload #-}
237 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
239 hexChar ∷ Parser BS.ByteString
240 {-# INLINEABLE hexChar #-}
241 hexChar = do void $ char '%'
242 h ← satisfy isHexChar
243 l ← satisfy isHexChar
244 return $ BS.singleton $ hexToChar h l
246 isHexChar ∷ Char → Bool
247 isHexChar = inClass "0-9a-fA-F"
249 hexToChar ∷ Char → Char → Char
250 {-# INLINE hexToChar #-}
252 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
254 hexToInt ∷ Char → Int
255 {-# INLINEABLE hexToInt #-}
257 | c ≤ '9' = ord c - ord '0'
258 | c ≤ 'F' = ord c - ord 'A' + 10
259 | otherwise = ord c - ord 'a' + 10
261 rawChars ∷ Parser BS.ByteString
262 {-# INLINE rawChars #-}
263 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
265 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
266 {-# INLINE decodeParams #-}
267 decodeParams = (MIMEParams <$>)
268 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
271 sortBySection ∷ Monad m
273 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
274 sortBySection = flip go (∅)
278 → M.Map CIAscii (M.Map Integer ExtendedParam)
279 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
282 = case lookup (epName x) m of
284 → let s = singleton (section x, x)
285 m' = insert (epName x, s) m
289 → case lookup (section x) s of
291 → let s' = insert (section x, x ) s
292 m' = insert (epName x, s') m
296 → fail (concat [ "Duplicate section "
299 , A.toString $ A.fromCIAscii $ epName x
303 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
304 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
307 ⇒ M.Map Integer ExtendedParam
310 → m (Seq ExtendedParam)
311 toSeq m expectedSect sects
316 | sect ≡ expectedSect
317 → toSeq m' (expectedSect + 1) (sects ⊳ p)
319 → fail (concat [ "Missing section "
322 , A.toString $ A.fromCIAscii $ epName p
326 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
328 = case front sects of
330 → fail "decodeSeq: internal error: empty seq"
331 Just (InitialEncodedParam {..}, xs)
332 → do d ← getDecoder epCharset
333 t ← decodeStr d epPayload
334 decodeSeq' (Just d) xs $ singleton t
335 Just (ContinuedEncodedParam {..}, _)
336 → fail "decodeSeq: internal error: CEP at section 0"
337 Just (AsciiParam {..}, xs)
338 → let t = A.toText apPayload
340 decodeSeq' Nothing xs $ singleton t
347 decodeSeq' decoder sects chunks
348 = case front sects of
350 → return $ T.concat $ toList chunks
351 Just (InitialEncodedParam {}, _)
352 → fail "decodeSeq': internal error: IEP at section > 0"
353 Just (ContinuedEncodedParam {..}, xs)
356 → do t ← decodeStr d epPayload
357 decodeSeq' decoder xs $ chunks ⊳ t
359 → fail (concat [ "Section "
362 , A.toString $ A.fromCIAscii epName
363 , "' is encoded but its first section is not"
365 Just (AsciiParam {..}, xs)
366 → let t = A.toText apPayload
368 decodeSeq' decoder xs $ chunks ⊳ t
370 type Decoder = BS.ByteString → Either UnicodeException Text
372 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
373 decodeStr decoder str
374 = case decoder str of
376 Left e → fail $ show e
378 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
380 | charset ≡ "UTF-8" = return decodeUtf8'
381 | charset ≡ "US-ASCII" = return decodeUtf8'
382 | otherwise = fail $ "No decoders found for charset: "
383 ⧺ A.toString (A.fromCIAscii charset)