5 , GeneralizedNewtypeDeriving
11 -- |Parsing and printing MIME parameter values
12 -- (<http://tools.ietf.org/html/rfc2231>).
13 module Network.HTTP.Lucu.MIMEParams
19 import Control.Applicative
20 import Control.Monad hiding (mapM)
21 import Control.Monad.Unicode
22 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
23 import qualified Data.Ascii as A
24 import Data.Attoparsec.Char8 as P
26 import qualified Data.ByteString.Char8 as BS
31 import qualified Data.Map as M
33 import Data.Monoid.Unicode
34 import Data.Sequence (Seq, ViewL(..))
35 import qualified Data.Sequence as S
36 import Data.Sequence.Unicode hiding ((∅))
37 import Data.Text (Text)
38 import qualified Data.Text as T
39 import Data.Text.Encoding
40 import Data.Text.Encoding.Error
41 import Data.Traversable
43 import Language.Haskell.TH.Syntax
44 import Network.HTTP.Lucu.Parser.Http
45 import Network.HTTP.Lucu.Utils
46 import Prelude hiding (concat, mapM, takeWhile)
47 import Prelude.Unicode
49 -- |A 'Map' from MIME parameter attributes to values. Attributes are
50 -- always case-insensitive according to RFC 2045
51 -- (<http://tools.ietf.org/html/rfc2045#section-5.1>).
53 = MIMEParams (Map CIAscii Text)
54 deriving (Eq, Show, Read, Monoid, Typeable)
56 instance Lift MIMEParams where
57 lift (MIMEParams m) = [| MIMEParams $(liftParams m) |]
59 liftParams ∷ Map CIAscii Text → Q Exp
60 liftParams = liftMap liftCIAscii liftText
62 -- |Convert MIME parameter values to an 'AsciiBuilder'.
63 printMIMEParams ∷ MIMEParams → AsciiBuilder
64 {-# INLINEABLE printMIMEParams #-}
65 #if MIN_VERSION_containers(0, 4, 1)
66 printMIMEParams (MIMEParams m) = M.foldlWithKey' f (∅) m
68 printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m
71 f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
73 f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
75 printPair ∷ CIAscii → Text → AsciiBuilder
76 {-# INLINEABLE printPair #-}
78 | T.any (> '\xFF') value
79 = printPairInUTF8 name value
81 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
83 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
84 {-# INLINEABLE printPairInUTF8 #-}
85 printPairInUTF8 name value
86 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
87 A.toAsciiBuilder "*=utf-8''" ⊕
88 escapeUnsafeChars (encodeUtf8 value) (∅)
90 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
91 {-# INLINEABLE printPairInAscii #-}
92 printPairInAscii name value
93 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
94 A.toAsciiBuilder "=" ⊕
95 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
98 A.toAsciiBuilder value
100 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
101 {-# INLINEABLE escapeUnsafeChars #-}
102 escapeUnsafeChars bs b
103 = case BS.uncons bs of
106 | isToken c → escapeUnsafeChars bs' $
107 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
108 | otherwise → escapeUnsafeChars bs' $
109 b ⊕ toHex (fromIntegral $ fromEnum c)
111 toHex ∷ Word8 → AsciiBuilder
112 {-# INLINEABLE toHex #-}
113 toHex o = A.toAsciiBuilder "%" ⊕
114 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
115 , toHex' (o .&. 0x0F) ])
117 toHex' ∷ Word8 → Char
118 {-# INLINEABLE toHex' #-}
120 | h ≤ 0x09 = toEnum $ fromIntegral
121 $ fromEnum '0' + fromIntegral h
122 | otherwise = toEnum $ fromIntegral
123 $ fromEnum 'A' + fromIntegral (h - 0x0A)
126 = InitialEncodedParam {
128 , epCharset ∷ !CIAscii
129 , epPayload ∷ !BS.ByteString
131 | ContinuedEncodedParam {
133 , epSection ∷ !Integer
134 , epPayload ∷ !BS.ByteString
138 , epSection ∷ !Integer
142 section ∷ ExtendedParam → Integer
143 {-# INLINE section #-}
144 section (InitialEncodedParam {..}) = 0
145 section ep = epSection ep
147 -- |'Parser' for MIME parameter values.
148 mimeParams ∷ Parser MIMEParams
149 {-# INLINEABLE mimeParams #-}
150 mimeParams = decodeParams =≪ P.many (try paramP)
152 paramP ∷ Parser ExtendedParam
153 paramP = do skipMany lws
160 → do (charset, payload) ← initialEncodedValue
161 return $ InitialEncodedParam name charset payload
163 → do payload ← encodedPayload
164 return $ ContinuedEncodedParam name sect payload
166 → do payload ← token <|> quotedStr
167 return $ AsciiParam name sect payload
169 nameP ∷ Parser (CIAscii, Integer, Bool)
170 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
171 takeWhile1 (\c → isToken c ∧ c ≢ '*')
172 sect ← option 0 $ try (char '*' *> decimal )
173 isEncoded ← option False $ try (char '*' *> pure True)
174 return (name, sect, isEncoded)
176 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
178 = do charset ← metadata
180 void $ metadata -- Ignore the language tag
182 payload ← encodedPayload
184 -- NOTE: I'm not sure this is the right thing, but RFC
185 -- 2231 doesn't tell us what we should do when the
186 -- charset is omitted.
187 fail "charset is missing"
189 return (charset, payload)
191 metadata ∷ Parser CIAscii
192 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
193 takeWhile (\c → c ≢ '\'' ∧ isToken c)
195 encodedPayload ∷ Parser BS.ByteString
196 {-# INLINE encodedPayload #-}
197 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
199 hexChar ∷ Parser BS.ByteString
200 {-# INLINEABLE hexChar #-}
201 hexChar = do void $ char '%'
202 h ← satisfy isHexChar
203 l ← satisfy isHexChar
204 return $ BS.singleton $ hexToChar h l
206 isHexChar ∷ Char → Bool
207 isHexChar = inClass "0-9a-fA-F"
209 hexToChar ∷ Char → Char → Char
210 {-# INLINE hexToChar #-}
212 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
214 hexToInt ∷ Char → Int
215 {-# INLINEABLE hexToInt #-}
217 | c ≤ '9' = ord c - ord '0'
218 | c ≤ 'F' = ord c - ord 'A' + 10
219 | otherwise = ord c - ord 'a' + 10
221 rawChars ∷ Parser BS.ByteString
222 {-# INLINE rawChars #-}
223 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
225 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
226 {-# INLINE decodeParams #-}
227 decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection
229 sortBySection ∷ Monad m
231 → m (Map CIAscii (Map Integer ExtendedParam))
232 sortBySection = flip go (∅)
236 → Map CIAscii (Map Integer ExtendedParam)
237 → m (Map CIAscii (Map Integer ExtendedParam))
240 = case M.lookup (epName x) m of
242 → let s = M.singleton (section x) x
243 m' = M.insert (epName x) s m
247 → case M.lookup (section x) s of
249 → let s' = M.insert (section x) x s
250 m' = M.insert (epName x) s' m
254 → fail (concat [ "Duplicate section "
257 , A.toString $ A.fromCIAscii $ epName x
261 decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text
262 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
265 ⇒ Map Integer ExtendedParam
268 → m (Seq ExtendedParam)
269 toSeq m expectedSect sects
270 = case M.minViewWithKey m of
274 | sect ≡ expectedSect
275 → toSeq m' (expectedSect + 1) (sects ⊳ p)
277 → fail (concat [ "Missing section "
280 , A.toString $ A.fromCIAscii $ epName p
284 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
286 = case S.viewl sects of
288 → fail "decodeSeq: internal error: empty seq"
289 InitialEncodedParam {..} :< xs
290 → do d ← getDecoder epCharset
291 t ← decodeStr d epPayload
292 decodeSeq' (Just d) xs $ S.singleton t
293 ContinuedEncodedParam {..} :< _
294 → fail "decodeSeq: internal error: CEP at section 0"
295 AsciiParam {..} :< xs
296 → let t = A.toText apPayload
298 decodeSeq' Nothing xs $ S.singleton t
305 decodeSeq' decoder sects chunks
306 = case S.viewl sects of
308 → return $ T.concat $ toList chunks
309 InitialEncodedParam {..} :< _
310 → fail "decodeSeq': internal error: IEP at section > 0"
311 ContinuedEncodedParam {..} :< xs
314 → do t ← decodeStr d epPayload
315 decodeSeq' decoder xs $ chunks ⊳ t
317 → fail (concat [ "Section "
320 , A.toString $ A.fromCIAscii epName
321 , "' is encoded but its first section is not"
323 AsciiParam {..} :< xs
324 → let t = A.toText apPayload
326 decodeSeq' decoder xs $ chunks ⊳ t
328 type Decoder = BS.ByteString → Either UnicodeException Text
330 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
331 decodeStr decoder str
332 = case decoder str of
334 Left e → fail $ show e
336 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
338 | charset ≡ "UTF-8" = return decodeUtf8'
339 | charset ≡ "US-ASCII" = return decodeUtf8'
340 | otherwise = fail $ "No decoders found for charset: "
341 ⧺ A.toString (A.fromCIAscii charset)