4 , GeneralizedNewtypeDeriving
10 -- |Parsing and printing MIME parameter values
11 -- (<http://tools.ietf.org/html/rfc2231>).
12 module Network.HTTP.Lucu.MIMEParams
18 import Control.Applicative
19 import Control.Monad hiding (mapM)
20 import Control.Monad.Unicode
21 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
22 import qualified Data.Ascii as A
23 import Data.Attoparsec.Char8 as P
25 import qualified Data.ByteString.Char8 as BS
30 import qualified Data.Map as M
32 import Data.Monoid.Unicode
33 import Data.Sequence (Seq, ViewL(..))
34 import qualified Data.Sequence as S
35 import Data.Sequence.Unicode hiding ((∅))
36 import Data.Text (Text)
37 import qualified Data.Text as T
38 import Data.Text.Encoding
39 import Data.Text.Encoding.Error
40 import Data.Traversable
42 import Language.Haskell.TH.Syntax
43 import Network.HTTP.Lucu.Parser.Http
44 import Network.HTTP.Lucu.Utils
45 import Prelude hiding (concat, mapM, takeWhile)
46 import Prelude.Unicode
48 -- |A 'Map' from MIME parameter attributes to values. Attributes are
49 -- always case-insensitive according to RFC 2045
50 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
52 = MIMEParams (Map CIAscii Text)
53 deriving (Eq, Show, Read, Monoid, Typeable)
55 instance Lift MIMEParams where
56 lift (MIMEParams m) = [| MIMEParams $(liftParams m) |]
58 liftParams ∷ Map CIAscii Text → Q Exp
59 liftParams = liftMap liftCIAscii liftText
61 -- |Convert MIME parameter values to an 'AsciiBuilder'.
62 printMIMEParams ∷ MIMEParams → AsciiBuilder
63 {-# INLINEABLE printMIMEParams #-}
64 printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m
65 -- THINKME: Use foldlWithKey' for newer Data.Map
67 f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
69 f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
71 printPair ∷ CIAscii → Text → AsciiBuilder
72 {-# INLINEABLE printPair #-}
74 | T.any (> '\xFF') value
75 = printPairInUTF8 name value
77 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
79 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
80 {-# INLINEABLE printPairInUTF8 #-}
81 printPairInUTF8 name value
82 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
83 A.toAsciiBuilder "*=utf-8''" ⊕
84 escapeUnsafeChars (encodeUtf8 value) (∅)
86 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
87 {-# INLINEABLE printPairInAscii #-}
88 printPairInAscii name value
89 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
90 A.toAsciiBuilder "=" ⊕
91 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
94 A.toAsciiBuilder value
96 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
97 {-# INLINEABLE escapeUnsafeChars #-}
98 escapeUnsafeChars bs b
99 = case BS.uncons bs of
102 | isToken c → escapeUnsafeChars bs' $
103 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
104 | otherwise → escapeUnsafeChars bs' $
105 b ⊕ toHex (fromIntegral $ fromEnum c)
107 toHex ∷ Word8 → AsciiBuilder
108 {-# INLINEABLE toHex #-}
109 toHex o = A.toAsciiBuilder "%" ⊕
110 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
111 , toHex' (o .&. 0x0F) ])
113 toHex' ∷ Word8 → Char
114 {-# INLINEABLE toHex' #-}
116 | h ≤ 0x09 = toEnum $ fromIntegral
117 $ fromEnum '0' + fromIntegral h
118 | otherwise = toEnum $ fromIntegral
119 $ fromEnum 'A' + fromIntegral (h - 0x0A)
122 = InitialEncodedParam {
124 , epCharset ∷ !CIAscii
125 , epPayload ∷ !BS.ByteString
127 | ContinuedEncodedParam {
129 , epSection ∷ !Integer
130 , epPayload ∷ !BS.ByteString
134 , epSection ∷ !Integer
138 section ∷ ExtendedParam → Integer
139 {-# INLINE section #-}
140 section (InitialEncodedParam {..}) = 0
141 section ep = epSection ep
143 -- |'Parser' for MIME parameter values.
144 mimeParams ∷ Parser MIMEParams
145 {-# INLINEABLE mimeParams #-}
146 mimeParams = decodeParams =≪ P.many (try paramP)
148 paramP ∷ Parser ExtendedParam
149 paramP = do skipMany lws
156 → do (charset, payload) ← initialEncodedValue
157 return $ InitialEncodedParam name charset payload
159 → do payload ← encodedPayload
160 return $ ContinuedEncodedParam name sect payload
162 → do payload ← token <|> quotedStr
163 return $ AsciiParam name sect payload
165 nameP ∷ Parser (CIAscii, Integer, Bool)
166 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
167 takeWhile1 (\c → isToken c ∧ c ≢ '*')
168 sect ← option 0 $ try (char '*' *> decimal )
169 isEncoded ← option False $ try (char '*' *> pure True)
170 return (name, sect, isEncoded)
172 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
174 = do charset ← metadata
176 void $ metadata -- Ignore the language tag
178 payload ← encodedPayload
180 -- NOTE: I'm not sure this is the right thing, but RFC
181 -- 2231 doesn't tell us what we should do when the
182 -- charset is omitted.
183 return ("US-ASCII", payload)
184 -- FIXME: Rethink about this behaviour.
186 return (charset, payload)
188 metadata ∷ Parser CIAscii
189 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
190 takeWhile (\c → c ≢ '\'' ∧ isToken c)
192 encodedPayload ∷ Parser BS.ByteString
193 {-# INLINE encodedPayload #-}
194 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
196 hexChar ∷ Parser BS.ByteString
197 {-# INLINEABLE hexChar #-}
198 hexChar = do void $ char '%'
199 h ← satisfy isHexChar
200 l ← satisfy isHexChar
201 return $ BS.singleton $ hexToChar h l
203 isHexChar ∷ Char → Bool
204 isHexChar = inClass "0-9a-fA-F"
206 hexToChar ∷ Char → Char → Char
207 {-# INLINE hexToChar #-}
209 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
211 hexToInt ∷ Char → Int
212 {-# INLINEABLE hexToInt #-}
214 | c ≤ '9' = ord c - ord '0'
215 | c ≤ 'F' = ord c - ord 'A' + 10
216 | otherwise = ord c - ord 'a' + 10
218 rawChars ∷ Parser BS.ByteString
219 {-# INLINE rawChars #-}
220 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
222 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
223 {-# INLINE decodeParams #-}
224 decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection
226 sortBySection ∷ Monad m
228 → m (Map CIAscii (Map Integer ExtendedParam))
229 sortBySection = flip go (∅)
233 → Map CIAscii (Map Integer ExtendedParam)
234 → m (Map CIAscii (Map Integer ExtendedParam))
237 = case M.lookup (epName x) m of
239 → let s = M.singleton (section x) x
240 m' = M.insert (epName x) s m
244 → case M.lookup (section x) s of
246 → let s' = M.insert (section x) x s
247 m' = M.insert (epName x) s' m
251 → fail (concat [ "Duplicate section "
254 , A.toString $ A.fromCIAscii $ epName x
258 decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text
259 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
262 ⇒ Map Integer ExtendedParam
265 → m (Seq ExtendedParam)
266 toSeq m expectedSect sects
267 = case M.minViewWithKey m of
271 | sect ≡ expectedSect
272 → toSeq m' (expectedSect + 1) (sects ⊳ p)
274 → fail (concat [ "Missing section "
277 , A.toString $ A.fromCIAscii $ epName p
281 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
283 = case S.viewl sects of
285 → fail "decodeSeq: internal error: empty seq"
286 InitialEncodedParam {..} :< xs
287 → do d ← getDecoder epCharset
288 t ← decodeStr d epPayload
289 decodeSeq' (Just d) xs $ S.singleton t
290 ContinuedEncodedParam {..} :< _
291 → fail "decodeSeq: internal error: CEP at section 0"
292 AsciiParam {..} :< xs
293 → let t = A.toText apPayload
295 decodeSeq' Nothing xs $ S.singleton t
302 decodeSeq' decoder sects chunks
303 = case S.viewl sects of
305 → return $ T.concat $ toList chunks
306 InitialEncodedParam {..} :< _
307 → fail "decodeSeq': internal error: IEP at section > 0"
308 ContinuedEncodedParam {..} :< xs
311 → do t ← decodeStr d epPayload
312 decodeSeq' decoder xs $ chunks ⊳ t
314 → fail (concat [ "Section "
317 , A.toString $ A.fromCIAscii epName
318 , "' is encoded but its first section is not"
320 AsciiParam {..} :< xs
321 → let t = A.toText apPayload
323 decodeSeq' decoder xs $ chunks ⊳ t
325 type Decoder = BS.ByteString → Either UnicodeException Text
327 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
328 decodeStr decoder str
329 = case decoder str of
331 Left e → fail $ show e
333 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
335 | charset ≡ "UTF-8" = return decodeUtf8'
336 | charset ≡ "US-ASCII" = return decodeUtf8'
337 | otherwise = fail $ "No decoders found for charset: "
338 ⧺ A.toString (A.fromCIAscii charset)