5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -ddump-splices #-} -- FIXME
14 -- THINKME: GHC 7.0.3 gives us a false warning.
15 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
16 -- |Parsing and printing MIME parameter values
17 -- (<http://tools.ietf.org/html/rfc2231>).
18 module Network.HTTP.Lucu.MIMEParams
24 import Control.Applicative hiding (empty)
26 import Control.Monad hiding (mapM)
27 import Control.Monad.Unicode
28 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
29 import qualified Data.Ascii as A
30 import Data.Attoparsec.Char8 as P
32 import qualified Data.ByteString.Char8 as BS
34 import Data.Collections
35 import Data.Collections.BaseInstances ()
36 import qualified Data.Collections.Newtype.TH as C
37 import qualified Data.Map as M (Map)
39 import Data.Monoid.Unicode
40 import Data.Sequence (Seq)
41 import Data.Text (Text)
42 import qualified Data.Text as T
43 import Data.Text.Encoding
44 import Data.Text.Encoding.Error
47 import Network.HTTP.Lucu.OrphanInstances ()
48 import Network.HTTP.Lucu.Parser.Http
49 import Network.HTTP.Lucu.Utils
50 import Prelude hiding (concat, filter, foldr, lookup, mapM, null, takeWhile)
51 import Prelude.Unicode
53 -- |A 'Map' from MIME parameter attributes to values. Attributes are
54 -- always case-insensitive according to RFC 2045
55 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
57 = MIMEParams (M.Map CIAscii Text)
58 deriving (Eq, Show, Read, Monoid, Typeable)
60 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
61 instance Foldable MIMEParams (CIAscii, Text)
62 instance Collection MIMEParams (CIAscii, Text)
63 instance Indexed MIMEParams CIAscii Text
67 instance Map MIMEParams CIAscii Text where
69 lookup k (MIMEParams m) = lookup k m
70 {-# INLINE mapWithKey #-}
71 mapWithKey f (MIMEParams m)
72 = MIMEParams $ mapWithKey f m
73 {-# INLINE unionWith #-}
74 unionWith f (MIMEParams α) (MIMEParams β)
75 = MIMEParams $ unionWith f α β
76 {-# INLINE intersectionWith #-}
77 intersectionWith f (MIMEParams α) (MIMEParams β)
78 = MIMEParams $ intersectionWith f α β
79 {-# INLINE differenceWith #-}
80 differenceWith f (MIMEParams α) (MIMEParams β)
81 = MIMEParams $ differenceWith f α β
82 {-# INLINE isSubmapBy #-}
83 isSubmapBy f (MIMEParams α) (MIMEParams β)
85 {-# INLINE isProperSubmapBy #-}
86 isProperSubmapBy f (MIMEParams α) (MIMEParams β)
87 = isProperSubmapBy f α β
90 instance SortingCollection MIMEParams (CIAscii, Text) where
91 {-# INLINE minView #-}
92 minView (MIMEParams m) = second MIMEParams <$> minView m
94 -- |Convert MIME parameter values to an 'AsciiBuilder'.
95 printMIMEParams ∷ MIMEParams → AsciiBuilder
96 {-# INLINEABLE printMIMEParams #-}
97 printMIMEParams = foldl' f (∅)
99 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
101 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
103 printPair ∷ CIAscii → Text → AsciiBuilder
104 {-# INLINEABLE printPair #-}
106 | T.any (> '\xFF') value
107 = printPairInUTF8 name value
109 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
111 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
112 {-# INLINEABLE printPairInUTF8 #-}
113 printPairInUTF8 name value
114 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
115 A.toAsciiBuilder "*=utf-8''" ⊕
116 escapeUnsafeChars (encodeUtf8 value) (∅)
118 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
119 {-# INLINEABLE printPairInAscii #-}
120 printPairInAscii name value
121 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
122 A.toAsciiBuilder "=" ⊕
123 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
126 A.toAsciiBuilder value
128 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
129 {-# INLINEABLE escapeUnsafeChars #-}
130 escapeUnsafeChars bs b
131 = case BS.uncons bs of
134 | isToken c → escapeUnsafeChars bs' $
135 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
136 | otherwise → escapeUnsafeChars bs' $
137 b ⊕ toHex (fromIntegral $ fromEnum c)
139 toHex ∷ Word8 → AsciiBuilder
140 {-# INLINEABLE toHex #-}
141 toHex o = A.toAsciiBuilder "%" ⊕
142 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
143 , toHex' (o .&. 0x0F) ])
145 toHex' ∷ Word8 → Char
146 {-# INLINEABLE toHex' #-}
148 | h ≤ 0x09 = toEnum $ fromIntegral
149 $ fromEnum '0' + fromIntegral h
150 | otherwise = toEnum $ fromIntegral
151 $ fromEnum 'A' + fromIntegral (h - 0x0A)
154 = InitialEncodedParam {
156 , epCharset ∷ !CIAscii
157 , epPayload ∷ !BS.ByteString
159 | ContinuedEncodedParam {
161 , epSection ∷ !Integer
162 , epPayload ∷ !BS.ByteString
166 , epSection ∷ !Integer
170 section ∷ ExtendedParam → Integer
171 {-# INLINE section #-}
172 section (InitialEncodedParam {..}) = 0
173 section ep = epSection ep
175 -- |'Parser' for MIME parameter values.
176 mimeParams ∷ Parser MIMEParams
177 {-# INLINEABLE mimeParams #-}
178 mimeParams = decodeParams =≪ P.many (try paramP)
180 paramP ∷ Parser ExtendedParam
181 paramP = do skipMany lws
188 → do (charset, payload) ← initialEncodedValue
189 return $ InitialEncodedParam name charset payload
191 → do payload ← encodedPayload
192 return $ ContinuedEncodedParam name sect payload
194 → do payload ← token <|> quotedStr
195 return $ AsciiParam name sect payload
197 nameP ∷ Parser (CIAscii, Integer, Bool)
198 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
199 takeWhile1 (\c → isToken c ∧ c ≢ '*')
200 sect ← option 0 $ try (char '*' *> decimal )
201 isEncoded ← option False $ try (char '*' *> pure True)
202 return (name, sect, isEncoded)
204 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
206 = do charset ← metadata
208 void $ metadata -- Ignore the language tag
210 payload ← encodedPayload
212 -- NOTE: I'm not sure this is the right thing, but RFC
213 -- 2231 doesn't tell us what we should do when the
214 -- charset is omitted.
215 fail "charset is missing"
217 return (charset, payload)
219 metadata ∷ Parser CIAscii
220 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
221 takeWhile (\c → c ≢ '\'' ∧ isToken c)
223 encodedPayload ∷ Parser BS.ByteString
224 {-# INLINE encodedPayload #-}
225 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
227 hexChar ∷ Parser BS.ByteString
228 {-# INLINEABLE hexChar #-}
229 hexChar = do void $ char '%'
230 h ← satisfy isHexChar
231 l ← satisfy isHexChar
232 return $ BS.singleton $ hexToChar h l
234 isHexChar ∷ Char → Bool
235 isHexChar = inClass "0-9a-fA-F"
237 hexToChar ∷ Char → Char → Char
238 {-# INLINE hexToChar #-}
240 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
242 hexToInt ∷ Char → Int
243 {-# INLINEABLE hexToInt #-}
245 | c ≤ '9' = ord c - ord '0'
246 | c ≤ 'F' = ord c - ord 'A' + 10
247 | otherwise = ord c - ord 'a' + 10
249 rawChars ∷ Parser BS.ByteString
250 {-# INLINE rawChars #-}
251 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
253 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
254 {-# INLINE decodeParams #-}
255 decodeParams = (MIMEParams <$>)
256 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
259 sortBySection ∷ Monad m
261 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
262 sortBySection = flip go (∅)
266 → M.Map CIAscii (M.Map Integer ExtendedParam)
267 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
270 = case lookup (epName x) m of
272 → let s = singleton (section x, x)
273 m' = insert (epName x, s) m
277 → case lookup (section x) s of
279 → let s' = insert (section x, x ) s
280 m' = insert (epName x, s') m
284 → fail (concat [ "Duplicate section "
287 , A.toString $ A.fromCIAscii $ epName x
291 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
292 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
295 ⇒ M.Map Integer ExtendedParam
298 → m (Seq ExtendedParam)
299 toSeq m expectedSect sects
304 | sect ≡ expectedSect
305 → toSeq m' (expectedSect + 1) (sects ⊳ p)
307 → fail (concat [ "Missing section "
310 , A.toString $ A.fromCIAscii $ epName p
314 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
316 = case front sects of
318 → fail "decodeSeq: internal error: empty seq"
319 Just (InitialEncodedParam {..}, xs)
320 → do d ← getDecoder epCharset
321 t ← decodeStr d epPayload
322 decodeSeq' (Just d) xs $ singleton t
323 Just (ContinuedEncodedParam {..}, _)
324 → fail "decodeSeq: internal error: CEP at section 0"
325 Just (AsciiParam {..}, xs)
326 → let t = A.toText apPayload
328 decodeSeq' Nothing xs $ singleton t
335 decodeSeq' decoder sects chunks
336 = case front sects of
338 → return $ T.concat $ toList chunks
339 Just (InitialEncodedParam {}, _)
340 → fail "decodeSeq': internal error: IEP at section > 0"
341 Just (ContinuedEncodedParam {..}, xs)
344 → do t ← decodeStr d epPayload
345 decodeSeq' decoder xs $ chunks ⊳ t
347 → fail (concat [ "Section "
350 , A.toString $ A.fromCIAscii epName
351 , "' is encoded but its first section is not"
353 Just (AsciiParam {..}, xs)
354 → let t = A.toText apPayload
356 decodeSeq' decoder xs $ chunks ⊳ t
358 type Decoder = BS.ByteString → Either UnicodeException Text
360 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
361 decodeStr decoder str
362 = case decoder str of
364 Left e → fail $ show e
366 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
368 | charset ≡ "UTF-8" = return decodeUtf8'
369 | charset ≡ "US-ASCII" = return decodeUtf8'
370 | otherwise = fail $ "No decoders found for charset: "
371 ⧺ A.toString (A.fromCIAscii charset)