5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -ddump-splices #-} -- FIXME
14 -- THINKME: GHC 7.0.3 gives us a false warning.
15 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
16 -- |Parsing and printing MIME parameter values
17 -- (<http://tools.ietf.org/html/rfc2231>).
18 module Network.HTTP.Lucu.MIMEParams
24 import Control.Applicative hiding (empty)
25 import Control.Monad hiding (mapM)
26 import Control.Monad.Unicode
27 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
28 import qualified Data.Ascii as A
29 import Data.Attoparsec.Char8 as P
31 import qualified Data.ByteString.Char8 as BS
33 import Data.Collections
34 import Data.Collections.BaseInstances ()
35 import qualified Data.Collections.Newtype.TH as C
36 import qualified Data.Map as M (Map)
38 import Data.Monoid.Unicode
39 import Data.Sequence (Seq)
40 import Data.Text (Text)
41 import qualified Data.Text as T
42 import Data.Text.Encoding
43 import Data.Text.Encoding.Error
46 import Network.HTTP.Lucu.OrphanInstances ()
47 import Network.HTTP.Lucu.Parser.Http
48 import Network.HTTP.Lucu.Utils
49 import Prelude hiding (concat, filter, foldr, lookup, mapM, null, takeWhile)
50 import Prelude.Unicode
52 -- |A 'Map' from MIME parameter attributes to values. Attributes are
53 -- always case-insensitive according to RFC 2045
54 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
56 = MIMEParams (M.Map CIAscii Text)
57 deriving (Eq, Show, Read, Monoid, Typeable)
59 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
60 instance Foldable MIMEParams (CIAscii, Text)
61 instance Collection MIMEParams (CIAscii, Text)
62 instance Indexed MIMEParams CIAscii Text
63 -- instance Map MIMEParams CIAscii Text
64 instance SortingCollection MIMEParams (CIAscii, Text)
68 instance Map MIMEParams CIAscii Text where
70 lookup k (MIMEParams m) = lookup k m
71 {-# INLINE mapWithKey #-}
72 mapWithKey f (MIMEParams m)
73 = MIMEParams $ mapWithKey f m
74 {-# INLINE unionWith #-}
75 unionWith f (MIMEParams α) (MIMEParams β)
76 = MIMEParams $ unionWith f α β
77 {-# INLINE intersectionWith #-}
78 intersectionWith f (MIMEParams α) (MIMEParams β)
79 = MIMEParams $ intersectionWith f α β
80 {-# INLINE differenceWith #-}
81 differenceWith f (MIMEParams α) (MIMEParams β)
82 = MIMEParams $ differenceWith f α β
83 {-# INLINE isSubmapBy #-}
84 isSubmapBy f (MIMEParams α) (MIMEParams β)
86 {-# INLINE isProperSubmapBy #-}
87 isProperSubmapBy f (MIMEParams α) (MIMEParams β)
88 = isProperSubmapBy f α β
90 -- |Convert MIME parameter values to an 'AsciiBuilder'.
91 printMIMEParams ∷ MIMEParams → AsciiBuilder
92 {-# INLINEABLE printMIMEParams #-}
93 printMIMEParams = foldl' f (∅)
95 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
97 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
99 printPair ∷ CIAscii → Text → AsciiBuilder
100 {-# INLINEABLE printPair #-}
102 | T.any (> '\xFF') value
103 = printPairInUTF8 name value
105 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
107 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
108 {-# INLINEABLE printPairInUTF8 #-}
109 printPairInUTF8 name value
110 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
111 A.toAsciiBuilder "*=utf-8''" ⊕
112 escapeUnsafeChars (encodeUtf8 value) (∅)
114 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
115 {-# INLINEABLE printPairInAscii #-}
116 printPairInAscii name value
117 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
118 A.toAsciiBuilder "=" ⊕
119 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
122 A.toAsciiBuilder value
124 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
125 {-# INLINEABLE escapeUnsafeChars #-}
126 escapeUnsafeChars bs b
127 = case BS.uncons bs of
130 | isToken c → escapeUnsafeChars bs' $
131 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
132 | otherwise → escapeUnsafeChars bs' $
133 b ⊕ toHex (fromIntegral $ fromEnum c)
135 toHex ∷ Word8 → AsciiBuilder
136 {-# INLINEABLE toHex #-}
137 toHex o = A.toAsciiBuilder "%" ⊕
138 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
139 , toHex' (o .&. 0x0F) ])
141 toHex' ∷ Word8 → Char
142 {-# INLINEABLE toHex' #-}
144 | h ≤ 0x09 = toEnum $ fromIntegral
145 $ fromEnum '0' + fromIntegral h
146 | otherwise = toEnum $ fromIntegral
147 $ fromEnum 'A' + fromIntegral (h - 0x0A)
150 = InitialEncodedParam {
152 , epCharset ∷ !CIAscii
153 , epPayload ∷ !BS.ByteString
155 | ContinuedEncodedParam {
157 , epSection ∷ !Integer
158 , epPayload ∷ !BS.ByteString
162 , epSection ∷ !Integer
166 section ∷ ExtendedParam → Integer
167 {-# INLINE section #-}
168 section (InitialEncodedParam {..}) = 0
169 section ep = epSection ep
171 -- |'Parser' for MIME parameter values.
172 mimeParams ∷ Parser MIMEParams
173 {-# INLINEABLE mimeParams #-}
174 mimeParams = decodeParams =≪ P.many (try paramP)
176 paramP ∷ Parser ExtendedParam
177 paramP = do skipMany lws
184 → do (charset, payload) ← initialEncodedValue
185 return $ InitialEncodedParam name charset payload
187 → do payload ← encodedPayload
188 return $ ContinuedEncodedParam name sect payload
190 → do payload ← token <|> quotedStr
191 return $ AsciiParam name sect payload
193 nameP ∷ Parser (CIAscii, Integer, Bool)
194 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
195 takeWhile1 (\c → isToken c ∧ c ≢ '*')
196 sect ← option 0 $ try (char '*' *> decimal )
197 isEncoded ← option False $ try (char '*' *> pure True)
198 return (name, sect, isEncoded)
200 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
202 = do charset ← metadata
204 void $ metadata -- Ignore the language tag
206 payload ← encodedPayload
208 -- NOTE: I'm not sure this is the right thing, but RFC
209 -- 2231 doesn't tell us what we should do when the
210 -- charset is omitted.
211 fail "charset is missing"
213 return (charset, payload)
215 metadata ∷ Parser CIAscii
216 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
217 takeWhile (\c → c ≢ '\'' ∧ isToken c)
219 encodedPayload ∷ Parser BS.ByteString
220 {-# INLINE encodedPayload #-}
221 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
223 hexChar ∷ Parser BS.ByteString
224 {-# INLINEABLE hexChar #-}
225 hexChar = do void $ char '%'
226 h ← satisfy isHexChar
227 l ← satisfy isHexChar
228 return $ BS.singleton $ hexToChar h l
230 isHexChar ∷ Char → Bool
231 isHexChar = inClass "0-9a-fA-F"
233 hexToChar ∷ Char → Char → Char
234 {-# INLINE hexToChar #-}
236 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
238 hexToInt ∷ Char → Int
239 {-# INLINEABLE hexToInt #-}
241 | c ≤ '9' = ord c - ord '0'
242 | c ≤ 'F' = ord c - ord 'A' + 10
243 | otherwise = ord c - ord 'a' + 10
245 rawChars ∷ Parser BS.ByteString
246 {-# INLINE rawChars #-}
247 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
249 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
250 {-# INLINE decodeParams #-}
251 decodeParams = (MIMEParams <$>)
252 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
255 sortBySection ∷ Monad m
257 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
258 sortBySection = flip go (∅)
262 → M.Map CIAscii (M.Map Integer ExtendedParam)
263 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
266 = case lookup (epName x) m of
268 → let s = singleton (section x, x)
269 m' = insert (epName x, s) m
273 → case lookup (section x) s of
275 → let s' = insert (section x, x ) s
276 m' = insert (epName x, s') m
280 → fail (concat [ "Duplicate section "
283 , A.toString $ A.fromCIAscii $ epName x
287 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
288 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
291 ⇒ M.Map Integer ExtendedParam
294 → m (Seq ExtendedParam)
295 toSeq m expectedSect sects
300 | sect ≡ expectedSect
301 → toSeq m' (expectedSect + 1) (sects ⊳ p)
303 → fail (concat [ "Missing section "
306 , A.toString $ A.fromCIAscii $ epName p
310 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
312 = case front sects of
314 → fail "decodeSeq: internal error: empty seq"
315 Just (InitialEncodedParam {..}, xs)
316 → do d ← getDecoder epCharset
317 t ← decodeStr d epPayload
318 decodeSeq' (Just d) xs $ singleton t
319 Just (ContinuedEncodedParam {..}, _)
320 → fail "decodeSeq: internal error: CEP at section 0"
321 Just (AsciiParam {..}, xs)
322 → let t = A.toText apPayload
324 decodeSeq' Nothing xs $ singleton t
331 decodeSeq' decoder sects chunks
332 = case front sects of
334 → return $ T.concat $ toList chunks
335 Just (InitialEncodedParam {}, _)
336 → fail "decodeSeq': internal error: IEP at section > 0"
337 Just (ContinuedEncodedParam {..}, xs)
340 → do t ← decodeStr d epPayload
341 decodeSeq' decoder xs $ chunks ⊳ t
343 → fail (concat [ "Section "
346 , A.toString $ A.fromCIAscii epName
347 , "' is encoded but its first section is not"
349 Just (AsciiParam {..}, xs)
350 → let t = A.toText apPayload
352 decodeSeq' decoder xs $ chunks ⊳ t
354 type Decoder = BS.ByteString → Either UnicodeException Text
356 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
357 decodeStr decoder str
358 = case decoder str of
360 Left e → fail $ show e
362 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
364 | charset ≡ "UTF-8" = return decodeUtf8'
365 | charset ≡ "US-ASCII" = return decodeUtf8'
366 | otherwise = fail $ "No decoders found for charset: "
367 ⧺ A.toString (A.fromCIAscii charset)