5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 -- |Parsing and printing MIME parameter values
14 -- (<http://tools.ietf.org/html/rfc2231>).
15 module Network.HTTP.Lucu.MIMEParams
21 import Control.Applicative hiding (empty)
23 import Control.Monad hiding (mapM)
24 import Control.Monad.Unicode
25 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
26 import qualified Data.Ascii as A
27 import Data.Attoparsec.Char8 as P
29 import qualified Data.ByteString.Char8 as BS
31 import Data.Collections
32 import Data.Collections.BaseInstances ()
33 import qualified Data.Map as M (Map)
35 import Data.Monoid.Unicode
36 import Data.Sequence (Seq)
37 import Data.Text (Text)
38 import qualified Data.Text as T
39 import Data.Text.Encoding
40 import Data.Text.Encoding.Error
43 import Network.HTTP.Lucu.OrphanInstances ()
44 import Network.HTTP.Lucu.Parser.Http
45 import Network.HTTP.Lucu.Utils
46 import Prelude hiding (concat, filter, foldr, lookup, mapM, null, takeWhile)
47 import Prelude.Unicode
49 -- |A 'Map' from MIME parameter attributes to values. Attributes are
50 -- always case-insensitive according to RFC 2045
51 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
53 = MIMEParams (M.Map CIAscii Text)
54 deriving (Eq, Show, Read, Monoid, Typeable)
57 instance Unfoldable MIMEParams (CIAscii, Text) where
59 insert p (MIMEParams m)
60 = MIMEParams $ insert p m
64 {-# INLINE singleton #-}
66 = MIMEParams $ singleton p
67 {-# INLINE insertMany #-}
68 insertMany f (MIMEParams m)
69 = MIMEParams $ insertMany f m
70 {-# INLINE insertManySorted #-}
71 insertManySorted f (MIMEParams m)
72 = MIMEParams $ insertManySorted f m
75 instance Foldable MIMEParams (CIAscii, Text) where
77 null (MIMEParams m) = null m
79 size (MIMEParams m) = size m
81 foldr f b (MIMEParams m) = foldr f b m
84 instance Collection MIMEParams (CIAscii, Text) where
86 filter f (MIMEParams m) = MIMEParams $ filter f m
89 instance Indexed MIMEParams CIAscii Text where
91 index k (MIMEParams m) = index k m
93 adjust f k (MIMEParams m) = MIMEParams $ adjust f k m
94 {-# INLINE inDomain #-}
95 inDomain k (MIMEParams m) = inDomain k m
98 instance Map MIMEParams CIAscii Text where
100 lookup k (MIMEParams m) = lookup k m
101 {-# INLINE mapWithKey #-}
102 mapWithKey f (MIMEParams m)
103 = MIMEParams $ mapWithKey f m
104 {-# INLINE unionWith #-}
105 unionWith f (MIMEParams α) (MIMEParams β)
106 = MIMEParams $ unionWith f α β
107 {-# INLINE intersectionWith #-}
108 intersectionWith f (MIMEParams α) (MIMEParams β)
109 = MIMEParams $ intersectionWith f α β
110 {-# INLINE differenceWith #-}
111 differenceWith f (MIMEParams α) (MIMEParams β)
112 = MIMEParams $ differenceWith f α β
113 {-# INLINE isSubmapBy #-}
114 isSubmapBy f (MIMEParams α) (MIMEParams β)
116 {-# INLINE isProperSubmapBy #-}
117 isProperSubmapBy f (MIMEParams α) (MIMEParams β)
118 = isProperSubmapBy f α β
120 -- FIXME: auto-derive
121 instance SortingCollection MIMEParams (CIAscii, Text) where
122 {-# INLINE minView #-}
123 minView (MIMEParams m) = second MIMEParams <$> minView m
125 -- |Convert MIME parameter values to an 'AsciiBuilder'.
126 printMIMEParams ∷ MIMEParams → AsciiBuilder
127 {-# INLINEABLE printMIMEParams #-}
128 printMIMEParams = foldl' f (∅)
130 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
132 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
134 printPair ∷ CIAscii → Text → AsciiBuilder
135 {-# INLINEABLE printPair #-}
137 | T.any (> '\xFF') value
138 = printPairInUTF8 name value
140 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
142 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
143 {-# INLINEABLE printPairInUTF8 #-}
144 printPairInUTF8 name value
145 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
146 A.toAsciiBuilder "*=utf-8''" ⊕
147 escapeUnsafeChars (encodeUtf8 value) (∅)
149 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
150 {-# INLINEABLE printPairInAscii #-}
151 printPairInAscii name value
152 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
153 A.toAsciiBuilder "=" ⊕
154 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
157 A.toAsciiBuilder value
159 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
160 {-# INLINEABLE escapeUnsafeChars #-}
161 escapeUnsafeChars bs b
162 = case BS.uncons bs of
165 | isToken c → escapeUnsafeChars bs' $
166 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
167 | otherwise → escapeUnsafeChars bs' $
168 b ⊕ toHex (fromIntegral $ fromEnum c)
170 toHex ∷ Word8 → AsciiBuilder
171 {-# INLINEABLE toHex #-}
172 toHex o = A.toAsciiBuilder "%" ⊕
173 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
174 , toHex' (o .&. 0x0F) ])
176 toHex' ∷ Word8 → Char
177 {-# INLINEABLE toHex' #-}
179 | h ≤ 0x09 = toEnum $ fromIntegral
180 $ fromEnum '0' + fromIntegral h
181 | otherwise = toEnum $ fromIntegral
182 $ fromEnum 'A' + fromIntegral (h - 0x0A)
185 = InitialEncodedParam {
187 , epCharset ∷ !CIAscii
188 , epPayload ∷ !BS.ByteString
190 | ContinuedEncodedParam {
192 , epSection ∷ !Integer
193 , epPayload ∷ !BS.ByteString
197 , epSection ∷ !Integer
201 section ∷ ExtendedParam → Integer
202 {-# INLINE section #-}
203 section (InitialEncodedParam {..}) = 0
204 section ep = epSection ep
206 -- |'Parser' for MIME parameter values.
207 mimeParams ∷ Parser MIMEParams
208 {-# INLINEABLE mimeParams #-}
209 mimeParams = decodeParams =≪ P.many (try paramP)
211 paramP ∷ Parser ExtendedParam
212 paramP = do skipMany lws
219 → do (charset, payload) ← initialEncodedValue
220 return $ InitialEncodedParam name charset payload
222 → do payload ← encodedPayload
223 return $ ContinuedEncodedParam name sect payload
225 → do payload ← token <|> quotedStr
226 return $ AsciiParam name sect payload
228 nameP ∷ Parser (CIAscii, Integer, Bool)
229 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
230 takeWhile1 (\c → isToken c ∧ c ≢ '*')
231 sect ← option 0 $ try (char '*' *> decimal )
232 isEncoded ← option False $ try (char '*' *> pure True)
233 return (name, sect, isEncoded)
235 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
237 = do charset ← metadata
239 void $ metadata -- Ignore the language tag
241 payload ← encodedPayload
243 -- NOTE: I'm not sure this is the right thing, but RFC
244 -- 2231 doesn't tell us what we should do when the
245 -- charset is omitted.
246 fail "charset is missing"
248 return (charset, payload)
250 metadata ∷ Parser CIAscii
251 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
252 takeWhile (\c → c ≢ '\'' ∧ isToken c)
254 encodedPayload ∷ Parser BS.ByteString
255 {-# INLINE encodedPayload #-}
256 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
258 hexChar ∷ Parser BS.ByteString
259 {-# INLINEABLE hexChar #-}
260 hexChar = do void $ char '%'
261 h ← satisfy isHexChar
262 l ← satisfy isHexChar
263 return $ BS.singleton $ hexToChar h l
265 isHexChar ∷ Char → Bool
266 isHexChar = inClass "0-9a-fA-F"
268 hexToChar ∷ Char → Char → Char
269 {-# INLINE hexToChar #-}
271 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
273 hexToInt ∷ Char → Int
274 {-# INLINEABLE hexToInt #-}
276 | c ≤ '9' = ord c - ord '0'
277 | c ≤ 'F' = ord c - ord 'A' + 10
278 | otherwise = ord c - ord 'a' + 10
280 rawChars ∷ Parser BS.ByteString
281 {-# INLINE rawChars #-}
282 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
284 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
285 {-# INLINE decodeParams #-}
286 decodeParams = (MIMEParams <$>)
287 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
290 sortBySection ∷ Monad m
292 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
293 sortBySection = flip go (∅)
297 → M.Map CIAscii (M.Map Integer ExtendedParam)
298 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
301 = case lookup (epName x) m of
303 → let s = singleton (section x, x)
304 m' = insert (epName x, s) m
308 → case lookup (section x) s of
310 → let s' = insert (section x, x ) s
311 m' = insert (epName x, s') m
315 → fail (concat [ "Duplicate section "
318 , A.toString $ A.fromCIAscii $ epName x
322 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
323 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
326 ⇒ M.Map Integer ExtendedParam
329 → m (Seq ExtendedParam)
330 toSeq m expectedSect sects
335 | sect ≡ expectedSect
336 → toSeq m' (expectedSect + 1) (sects ⊳ p)
338 → fail (concat [ "Missing section "
341 , A.toString $ A.fromCIAscii $ epName p
345 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
347 = case front sects of
349 → fail "decodeSeq: internal error: empty seq"
350 Just (InitialEncodedParam {..}, xs)
351 → do d ← getDecoder epCharset
352 t ← decodeStr d epPayload
353 decodeSeq' (Just d) xs $ singleton t
354 Just (ContinuedEncodedParam {..}, _)
355 → fail "decodeSeq: internal error: CEP at section 0"
356 Just (AsciiParam {..}, xs)
357 → let t = A.toText apPayload
359 decodeSeq' Nothing xs $ singleton t
366 decodeSeq' decoder sects chunks
367 = case front sects of
369 → return $ T.concat $ toList chunks
370 Just (InitialEncodedParam {}, _)
371 → fail "decodeSeq': internal error: IEP at section > 0"
372 Just (ContinuedEncodedParam {..}, xs)
375 → do t ← decodeStr d epPayload
376 decodeSeq' decoder xs $ chunks ⊳ t
378 → fail (concat [ "Section "
381 , A.toString $ A.fromCIAscii epName
382 , "' is encoded but its first section is not"
384 Just (AsciiParam {..}, xs)
385 → let t = A.toText apPayload
387 decodeSeq' decoder xs $ chunks ⊳ t
389 type Decoder = BS.ByteString → Either UnicodeException Text
391 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
392 decodeStr decoder str
393 = case decoder str of
395 Left e → fail $ show e
397 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
399 | charset ≡ "UTF-8" = return decodeUtf8'
400 | charset ≡ "US-ASCII" = return decodeUtf8'
401 | otherwise = fail $ "No decoders found for charset: "
402 ⧺ A.toString (A.fromCIAscii charset)