5 , GeneralizedNewtypeDeriving
11 -- |Parsing and printing MIME parameter values
12 -- (<http://tools.ietf.org/html/rfc2231>).
13 module Network.HTTP.Lucu.MIMEParams
19 import Control.Applicative
20 import Control.Monad hiding (mapM)
21 import Control.Monad.Unicode
22 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
23 import qualified Data.Ascii as A
24 import Data.Attoparsec.Char8 as P
26 import qualified Data.ByteString.Char8 as BS
31 import qualified Data.Map as M
33 import Data.Monoid.Unicode
34 import Data.Sequence (Seq, ViewL(..))
35 import qualified Data.Sequence as S
36 import Data.Sequence.Unicode hiding ((∅))
37 import Data.Text (Text)
38 import qualified Data.Text as T
39 import Data.Text.Encoding
40 import Data.Text.Encoding.Error
41 import Data.Traversable
43 import Language.Haskell.TH.Syntax
44 import Network.HTTP.Lucu.Parser.Http
45 import Network.HTTP.Lucu.Utils
46 import Prelude hiding (concat, mapM, takeWhile)
47 import Prelude.Unicode
49 -- |A 'Map' from MIME parameter attributes to values. Attributes are
50 -- always case-insensitive according to RFC 2045
51 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
53 = MIMEParams (Map CIAscii Text)
54 deriving (Eq, Show, Read, Monoid, Typeable)
56 instance Lift MIMEParams where
57 lift (MIMEParams m) = [| MIMEParams $(liftParams m) |]
59 liftParams ∷ Map CIAscii Text → Q Exp
60 liftParams = liftMap liftCIAscii liftText
62 -- |Convert MIME parameter values to an 'AsciiBuilder'.
63 printMIMEParams ∷ MIMEParams → AsciiBuilder
64 {-# INLINEABLE printMIMEParams #-}
65 #if MIN_VERSION_containers(0, 4, 1)
66 printMIMEParams (MIMEParams m) = M.foldlWithKey' f (∅) m
68 printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m
71 f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
73 f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
75 printPair ∷ CIAscii → Text → AsciiBuilder
76 {-# INLINEABLE printPair #-}
78 | T.any (> '\xFF') value
79 = printPairInUTF8 name value
81 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
83 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
84 {-# INLINEABLE printPairInUTF8 #-}
85 printPairInUTF8 name value
86 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
87 A.toAsciiBuilder "*=utf-8''" ⊕
88 escapeUnsafeChars (encodeUtf8 value) (∅)
90 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
91 {-# INLINEABLE printPairInAscii #-}
92 printPairInAscii name value
93 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
94 A.toAsciiBuilder "=" ⊕
95 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
98 A.toAsciiBuilder value
100 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
101 {-# INLINEABLE escapeUnsafeChars #-}
102 escapeUnsafeChars bs b
103 = case BS.uncons bs of
106 | isToken c → escapeUnsafeChars bs' $
107 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
108 | otherwise → escapeUnsafeChars bs' $
109 b ⊕ toHex (fromIntegral $ fromEnum c)
111 toHex ∷ Word8 → AsciiBuilder
112 {-# INLINEABLE toHex #-}
113 toHex o = A.toAsciiBuilder "%" ⊕
114 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
115 , toHex' (o .&. 0x0F) ])
117 toHex' ∷ Word8 → Char
118 {-# INLINEABLE toHex' #-}
120 | h ≤ 0x09 = toEnum $ fromIntegral
121 $ fromEnum '0' + fromIntegral h
122 | otherwise = toEnum $ fromIntegral
123 $ fromEnum 'A' + fromIntegral (h - 0x0A)
126 = InitialEncodedParam {
128 , epCharset ∷ !CIAscii
129 , epPayload ∷ !BS.ByteString
131 | ContinuedEncodedParam {
133 , epSection ∷ !Integer
134 , epPayload ∷ !BS.ByteString
138 , epSection ∷ !Integer
142 section ∷ ExtendedParam → Integer
143 {-# INLINE section #-}
144 section (InitialEncodedParam {..}) = 0
145 section ep = epSection ep
147 -- |'Parser' for MIME parameter values.
148 mimeParams ∷ Parser MIMEParams
149 {-# INLINEABLE mimeParams #-}
150 mimeParams = decodeParams =≪ P.many (try paramP)
152 paramP ∷ Parser ExtendedParam
153 paramP = do skipMany lws
160 → do (charset, payload) ← initialEncodedValue
161 return $ InitialEncodedParam name charset payload
163 → do payload ← encodedPayload
164 return $ ContinuedEncodedParam name sect payload
166 → do payload ← token <|> quotedStr
167 return $ AsciiParam name sect payload
169 nameP ∷ Parser (CIAscii, Integer, Bool)
170 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
171 takeWhile1 (\c → isToken c ∧ c ≢ '*')
172 sect ← option 0 $ try (char '*' *> decimal )
173 isEncoded ← option False $ try (char '*' *> pure True)
174 return (name, sect, isEncoded)
176 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
178 = do charset ← metadata
180 void $ metadata -- Ignore the language tag
182 payload ← encodedPayload
184 -- NOTE: I'm not sure this is the right thing, but RFC
185 -- 2231 doesn't tell us what we should do when the
186 -- charset is omitted.
187 return ("US-ASCII", payload)
188 -- FIXME: Rethink about this behaviour.
190 return (charset, payload)
192 metadata ∷ Parser CIAscii
193 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
194 takeWhile (\c → c ≢ '\'' ∧ isToken c)
196 encodedPayload ∷ Parser BS.ByteString
197 {-# INLINE encodedPayload #-}
198 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
200 hexChar ∷ Parser BS.ByteString
201 {-# INLINEABLE hexChar #-}
202 hexChar = do void $ char '%'
203 h ← satisfy isHexChar
204 l ← satisfy isHexChar
205 return $ BS.singleton $ hexToChar h l
207 isHexChar ∷ Char → Bool
208 isHexChar = inClass "0-9a-fA-F"
210 hexToChar ∷ Char → Char → Char
211 {-# INLINE hexToChar #-}
213 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
215 hexToInt ∷ Char → Int
216 {-# INLINEABLE hexToInt #-}
218 | c ≤ '9' = ord c - ord '0'
219 | c ≤ 'F' = ord c - ord 'A' + 10
220 | otherwise = ord c - ord 'a' + 10
222 rawChars ∷ Parser BS.ByteString
223 {-# INLINE rawChars #-}
224 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
226 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
227 {-# INLINE decodeParams #-}
228 decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection
230 sortBySection ∷ Monad m
232 → m (Map CIAscii (Map Integer ExtendedParam))
233 sortBySection = flip go (∅)
237 → Map CIAscii (Map Integer ExtendedParam)
238 → m (Map CIAscii (Map Integer ExtendedParam))
241 = case M.lookup (epName x) m of
243 → let s = M.singleton (section x) x
244 m' = M.insert (epName x) s m
248 → case M.lookup (section x) s of
250 → let s' = M.insert (section x) x s
251 m' = M.insert (epName x) s' m
255 → fail (concat [ "Duplicate section "
258 , A.toString $ A.fromCIAscii $ epName x
262 decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text
263 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
266 ⇒ Map Integer ExtendedParam
269 → m (Seq ExtendedParam)
270 toSeq m expectedSect sects
271 = case M.minViewWithKey m of
275 | sect ≡ expectedSect
276 → toSeq m' (expectedSect + 1) (sects ⊳ p)
278 → fail (concat [ "Missing section "
281 , A.toString $ A.fromCIAscii $ epName p
285 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
287 = case S.viewl sects of
289 → fail "decodeSeq: internal error: empty seq"
290 InitialEncodedParam {..} :< xs
291 → do d ← getDecoder epCharset
292 t ← decodeStr d epPayload
293 decodeSeq' (Just d) xs $ S.singleton t
294 ContinuedEncodedParam {..} :< _
295 → fail "decodeSeq: internal error: CEP at section 0"
296 AsciiParam {..} :< xs
297 → let t = A.toText apPayload
299 decodeSeq' Nothing xs $ S.singleton t
306 decodeSeq' decoder sects chunks
307 = case S.viewl sects of
309 → return $ T.concat $ toList chunks
310 InitialEncodedParam {..} :< _
311 → fail "decodeSeq': internal error: IEP at section > 0"
312 ContinuedEncodedParam {..} :< xs
315 → do t ← decodeStr d epPayload
316 decodeSeq' decoder xs $ chunks ⊳ t
318 → fail (concat [ "Section "
321 , A.toString $ A.fromCIAscii epName
322 , "' is encoded but its first section is not"
324 AsciiParam {..} :< xs
325 → let t = A.toText apPayload
327 decodeSeq' decoder xs $ chunks ⊳ t
329 type Decoder = BS.ByteString → Either UnicodeException Text
331 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
332 decodeStr decoder str
333 = case decoder str of
335 Left e → fail $ show e
337 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
339 | charset ≡ "UTF-8" = return decodeUtf8'
340 | charset ≡ "US-ASCII" = return decodeUtf8'
341 | otherwise = fail $ "No decoders found for charset: "
342 ⧺ A.toString (A.fromCIAscii charset)