5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 -- |Parsing and printing MIME parameter values
14 -- (<http://tools.ietf.org/html/rfc2231>).
15 module Network.HTTP.Lucu.MIMEParams
21 import Control.Applicative hiding (empty)
23 import Control.Monad hiding (mapM)
24 import Control.Monad.Unicode
25 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
26 import qualified Data.Ascii as A
27 import Data.Attoparsec.Char8 as P
29 import qualified Data.ByteString.Char8 as BS
31 import Data.Collections
32 import Data.Collections.BaseInstances ()
33 import qualified Data.Map as M (Map)
35 import Data.Monoid.Unicode
36 import Data.Sequence (Seq)
37 import Data.Text (Text)
38 import qualified Data.Text as T
39 import Data.Text.Encoding
40 import Data.Text.Encoding.Error
43 import Language.Haskell.TH.Syntax
44 import Network.HTTP.Lucu.OrphanInstances ()
45 import Network.HTTP.Lucu.Parser.Http
46 import Network.HTTP.Lucu.Utils
47 import Prelude hiding (concat, filter, foldr, lookup, mapM, null, takeWhile)
48 import Prelude.Unicode
50 -- |A 'Map' from MIME parameter attributes to values. Attributes are
51 -- always case-insensitive according to RFC 2045
52 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
54 = MIMEParams (M.Map CIAscii Text)
55 deriving (Eq, Show, Read, Monoid, Typeable)
57 instance Lift MIMEParams where
58 lift (MIMEParams m) = [| MIMEParams $(lift m) |]
60 instance Unfoldable MIMEParams (CIAscii, Text) where
62 insert p (MIMEParams m)
63 = MIMEParams $ insert p m
67 {-# INLINE singleton #-}
69 = MIMEParams $ singleton p
70 {-# INLINE insertMany #-}
71 insertMany f (MIMEParams m)
72 = MIMEParams $ insertMany f m
73 {-# INLINE insertManySorted #-}
74 insertManySorted f (MIMEParams m)
75 = MIMEParams $ insertManySorted f m
77 instance Foldable MIMEParams (CIAscii, Text) where
79 null (MIMEParams m) = null m
81 size (MIMEParams m) = size m
83 foldr f b (MIMEParams m) = foldr f b m
85 instance Collection MIMEParams (CIAscii, Text) where
87 filter f (MIMEParams m) = MIMEParams $ filter f m
89 instance Indexed MIMEParams CIAscii Text where
91 index k (MIMEParams m) = index k m
93 adjust f k (MIMEParams m) = MIMEParams $ adjust f k m
94 {-# INLINE inDomain #-}
95 inDomain k (MIMEParams m) = inDomain k m
97 instance Map MIMEParams CIAscii Text where
99 lookup k (MIMEParams m) = lookup k m
100 {-# INLINE mapWithKey #-}
101 mapWithKey f (MIMEParams m)
102 = MIMEParams $ mapWithKey f m
103 {-# INLINE unionWith #-}
104 unionWith f (MIMEParams α) (MIMEParams β)
105 = MIMEParams $ unionWith f α β
106 {-# INLINE intersectionWith #-}
107 intersectionWith f (MIMEParams α) (MIMEParams β)
108 = MIMEParams $ intersectionWith f α β
109 {-# INLINE differenceWith #-}
110 differenceWith f (MIMEParams α) (MIMEParams β)
111 = MIMEParams $ differenceWith f α β
112 {-# INLINE isSubmapBy #-}
113 isSubmapBy f (MIMEParams α) (MIMEParams β)
115 {-# INLINE isProperSubmapBy #-}
116 isProperSubmapBy f (MIMEParams α) (MIMEParams β)
117 = isProperSubmapBy f α β
119 instance SortingCollection MIMEParams (CIAscii, Text) where
120 {-# INLINE minView #-}
121 minView (MIMEParams m) = second MIMEParams <$> minView m
123 -- |Convert MIME parameter values to an 'AsciiBuilder'.
124 printMIMEParams ∷ MIMEParams → AsciiBuilder
125 {-# INLINEABLE printMIMEParams #-}
126 printMIMEParams = foldl' f (∅)
128 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
130 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
132 printPair ∷ CIAscii → Text → AsciiBuilder
133 {-# INLINEABLE printPair #-}
135 | T.any (> '\xFF') value
136 = printPairInUTF8 name value
138 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
140 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
141 {-# INLINEABLE printPairInUTF8 #-}
142 printPairInUTF8 name value
143 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
144 A.toAsciiBuilder "*=utf-8''" ⊕
145 escapeUnsafeChars (encodeUtf8 value) (∅)
147 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
148 {-# INLINEABLE printPairInAscii #-}
149 printPairInAscii name value
150 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
151 A.toAsciiBuilder "=" ⊕
152 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
155 A.toAsciiBuilder value
157 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
158 {-# INLINEABLE escapeUnsafeChars #-}
159 escapeUnsafeChars bs b
160 = case BS.uncons bs of
163 | isToken c → escapeUnsafeChars bs' $
164 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
165 | otherwise → escapeUnsafeChars bs' $
166 b ⊕ toHex (fromIntegral $ fromEnum c)
168 toHex ∷ Word8 → AsciiBuilder
169 {-# INLINEABLE toHex #-}
170 toHex o = A.toAsciiBuilder "%" ⊕
171 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
172 , toHex' (o .&. 0x0F) ])
174 toHex' ∷ Word8 → Char
175 {-# INLINEABLE toHex' #-}
177 | h ≤ 0x09 = toEnum $ fromIntegral
178 $ fromEnum '0' + fromIntegral h
179 | otherwise = toEnum $ fromIntegral
180 $ fromEnum 'A' + fromIntegral (h - 0x0A)
183 = InitialEncodedParam {
185 , epCharset ∷ !CIAscii
186 , epPayload ∷ !BS.ByteString
188 | ContinuedEncodedParam {
190 , epSection ∷ !Integer
191 , epPayload ∷ !BS.ByteString
195 , epSection ∷ !Integer
199 section ∷ ExtendedParam → Integer
200 {-# INLINE section #-}
201 section (InitialEncodedParam {..}) = 0
202 section ep = epSection ep
204 -- |'Parser' for MIME parameter values.
205 mimeParams ∷ Parser MIMEParams
206 {-# INLINEABLE mimeParams #-}
207 mimeParams = decodeParams =≪ P.many (try paramP)
209 paramP ∷ Parser ExtendedParam
210 paramP = do skipMany lws
217 → do (charset, payload) ← initialEncodedValue
218 return $ InitialEncodedParam name charset payload
220 → do payload ← encodedPayload
221 return $ ContinuedEncodedParam name sect payload
223 → do payload ← token <|> quotedStr
224 return $ AsciiParam name sect payload
226 nameP ∷ Parser (CIAscii, Integer, Bool)
227 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
228 takeWhile1 (\c → isToken c ∧ c ≢ '*')
229 sect ← option 0 $ try (char '*' *> decimal )
230 isEncoded ← option False $ try (char '*' *> pure True)
231 return (name, sect, isEncoded)
233 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
235 = do charset ← metadata
237 void $ metadata -- Ignore the language tag
239 payload ← encodedPayload
241 -- NOTE: I'm not sure this is the right thing, but RFC
242 -- 2231 doesn't tell us what we should do when the
243 -- charset is omitted.
244 fail "charset is missing"
246 return (charset, payload)
248 metadata ∷ Parser CIAscii
249 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
250 takeWhile (\c → c ≢ '\'' ∧ isToken c)
252 encodedPayload ∷ Parser BS.ByteString
253 {-# INLINE encodedPayload #-}
254 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
256 hexChar ∷ Parser BS.ByteString
257 {-# INLINEABLE hexChar #-}
258 hexChar = do void $ char '%'
259 h ← satisfy isHexChar
260 l ← satisfy isHexChar
261 return $ BS.singleton $ hexToChar h l
263 isHexChar ∷ Char → Bool
264 isHexChar = inClass "0-9a-fA-F"
266 hexToChar ∷ Char → Char → Char
267 {-# INLINE hexToChar #-}
269 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
271 hexToInt ∷ Char → Int
272 {-# INLINEABLE hexToInt #-}
274 | c ≤ '9' = ord c - ord '0'
275 | c ≤ 'F' = ord c - ord 'A' + 10
276 | otherwise = ord c - ord 'a' + 10
278 rawChars ∷ Parser BS.ByteString
279 {-# INLINE rawChars #-}
280 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
282 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
283 {-# INLINE decodeParams #-}
284 decodeParams = (MIMEParams <$>)
285 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
288 sortBySection ∷ Monad m
290 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
291 sortBySection = flip go (∅)
295 → M.Map CIAscii (M.Map Integer ExtendedParam)
296 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
299 = case lookup (epName x) m of
301 → let s = singleton (section x, x)
302 m' = insert (epName x, s) m
306 → case lookup (section x) s of
308 → let s' = insert (section x, x ) s
309 m' = insert (epName x, s') m
313 → fail (concat [ "Duplicate section "
316 , A.toString $ A.fromCIAscii $ epName x
320 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
321 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
324 ⇒ M.Map Integer ExtendedParam
327 → m (Seq ExtendedParam)
328 toSeq m expectedSect sects
333 | sect ≡ expectedSect
334 → toSeq m' (expectedSect + 1) (sects ⊳ p)
336 → fail (concat [ "Missing section "
339 , A.toString $ A.fromCIAscii $ epName p
343 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
345 = case front sects of
347 → fail "decodeSeq: internal error: empty seq"
348 Just (InitialEncodedParam {..}, xs)
349 → do d ← getDecoder epCharset
350 t ← decodeStr d epPayload
351 decodeSeq' (Just d) xs $ singleton t
352 Just (ContinuedEncodedParam {..}, _)
353 → fail "decodeSeq: internal error: CEP at section 0"
354 Just (AsciiParam {..}, xs)
355 → let t = A.toText apPayload
357 decodeSeq' Nothing xs $ singleton t
364 decodeSeq' decoder sects chunks
365 = case front sects of
367 → return $ T.concat $ toList chunks
368 Just (InitialEncodedParam {}, _)
369 → fail "decodeSeq': internal error: IEP at section > 0"
370 Just (ContinuedEncodedParam {..}, xs)
373 → do t ← decodeStr d epPayload
374 decodeSeq' decoder xs $ chunks ⊳ t
376 → fail (concat [ "Section "
379 , A.toString $ A.fromCIAscii epName
380 , "' is encoded but its first section is not"
382 Just (AsciiParam {..}, xs)
383 → let t = A.toText apPayload
385 decodeSeq' decoder xs $ chunks ⊳ t
387 type Decoder = BS.ByteString → Either UnicodeException Text
389 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
390 decodeStr decoder str
391 = case decoder str of
393 Left e → fail $ show e
395 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
397 | charset ≡ "UTF-8" = return decodeUtf8'
398 | charset ≡ "US-ASCII" = return decodeUtf8'
399 | otherwise = fail $ "No decoders found for charset: "
400 ⧺ A.toString (A.fromCIAscii charset)