5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
15 -- |Parsing and printing MIME parameter values
16 -- (<http://tools.ietf.org/html/rfc2231>).
17 module Network.HTTP.Lucu.MIMEParams
22 import Control.Applicative hiding (empty)
23 import Control.Monad hiding (mapM)
24 import Control.Monad.Unicode
25 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
26 import qualified Data.Ascii as A
27 import Data.Attoparsec.Char8
29 import qualified Data.ByteString.Char8 as BS
31 import Data.Collections
32 import Data.Collections.BaseInstances ()
33 import qualified Data.Collections.Newtype.TH as C
34 import Data.Convertible.Base
35 import Data.Convertible.Instances.Ascii ()
36 import Data.Convertible.Utils
37 import qualified Data.Map as M (Map)
38 import Data.Monoid.Unicode
39 import Data.Sequence (Seq)
40 import Data.Text (Text)
41 import qualified Data.Text as T
42 import Data.Text.Encoding
43 import Data.Text.Encoding.Error
45 import Network.HTTP.Lucu.MIMEParams.Internal
46 import Network.HTTP.Lucu.OrphanInstances ()
47 import Network.HTTP.Lucu.Parser.Http
48 import Network.HTTP.Lucu.Utils
49 import Prelude hiding (concat, lookup, mapM, takeWhile)
50 import Prelude.Unicode
52 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
53 instance Foldable MIMEParams (CIAscii, Text)
54 instance Collection MIMEParams (CIAscii, Text)
55 instance Indexed MIMEParams CIAscii Text
56 instance Map MIMEParams CIAscii Text
57 instance SortingCollection MIMEParams (CIAscii, Text)
60 instance ConvertSuccess MIMEParams Ascii where
61 {-# INLINE convertSuccess #-}
62 convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder)
64 instance ConvertSuccess MIMEParams AsciiBuilder where
65 {-# INLINE convertSuccess #-}
66 convertSuccess = foldl' f (∅)
68 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
70 f ab (k, v) = ab ⊕ cs ("; " ∷ Ascii) ⊕ printPair k v
72 printPair ∷ CIAscii → Text → AsciiBuilder
73 {-# INLINEABLE printPair #-}
75 | T.any (> '\xFF') value
76 = printPairInUTF8 name value
78 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
80 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
81 {-# INLINEABLE printPairInUTF8 #-}
82 printPairInUTF8 name value
84 cs ("*=utf-8''" ∷ Ascii) ⊕
85 escapeUnsafeChars (encodeUtf8 value) (∅)
87 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
88 {-# INLINEABLE printPairInAscii #-}
89 printPairInAscii name value
92 if BS.any ((¬) ∘ isToken) (cs value) then
97 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
98 {-# INLINEABLE escapeUnsafeChars #-}
99 escapeUnsafeChars bs b
100 = case BS.uncons bs of
103 | isToken c → escapeUnsafeChars bs' $
104 b ⊕ cs (A.unsafeFromString [c])
105 | otherwise → escapeUnsafeChars bs' $
106 b ⊕ toHex (fromIntegral $ fromEnum c)
108 toHex ∷ Word8 → AsciiBuilder
109 {-# INLINEABLE toHex #-}
110 toHex o = cs ("%" ∷ Ascii) ⊕
111 cs (A.unsafeFromString [ toHex' (o `shiftR` 8)
112 , toHex' (o .&. 0x0F) ])
114 toHex' ∷ Word8 → Char
115 {-# INLINEABLE toHex' #-}
117 | h ≤ 0x09 = toEnum $ fromIntegral
118 $ fromEnum '0' + fromIntegral h
119 | otherwise = toEnum $ fromIntegral
120 $ fromEnum 'A' + fromIntegral (h - 0x0A)
123 = InitialEncodedParam {
125 , epCharset ∷ !CIAscii
126 , epPayload ∷ !BS.ByteString
128 | ContinuedEncodedParam {
130 , epSection ∷ !Integer
131 , epPayload ∷ !BS.ByteString
135 , epSection ∷ !Integer
139 section ∷ ExtendedParam → Integer
140 {-# INLINE section #-}
141 section (InitialEncodedParam {..}) = 0
142 section ep = epSection ep
144 -- |'Parser' for MIME parameter values.
145 mimeParams ∷ Parser MIMEParams
146 {-# INLINEABLE mimeParams #-}
147 mimeParams = decodeParams =≪ many (try paramP)
149 paramP ∷ Parser ExtendedParam
150 paramP = do skipMany lws
157 → do (charset, payload) ← initialEncodedValue
158 return $ InitialEncodedParam name charset payload
160 → do payload ← encodedPayload
161 return $ ContinuedEncodedParam name sect payload
163 → do payload ← token <|> quotedStr
164 return $ AsciiParam name sect payload
166 nameP ∷ Parser (CIAscii, Integer, Bool)
167 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
168 takeWhile1 (\c → isToken c ∧ c ≢ '*')
169 sect ← option 0 $ try (char '*' *> decimal )
170 isEncoded ← option False $ try (char '*' *> pure True)
171 return (name, sect, isEncoded)
173 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
175 = do charset ← metadata
177 void $ metadata -- Ignore the language tag
179 payload ← encodedPayload
181 -- NOTE: I'm not sure this is the right thing, but RFC
182 -- 2231 doesn't tell us what we should do when the
183 -- charset is omitted.
184 fail "charset is missing"
186 return (charset, payload)
188 metadata ∷ Parser CIAscii
189 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
190 takeWhile (\c → c ≢ '\'' ∧ isToken c)
192 encodedPayload ∷ Parser BS.ByteString
193 {-# INLINE encodedPayload #-}
194 encodedPayload = BS.concat <$> many (hexChar <|> rawChars)
196 hexChar ∷ Parser BS.ByteString
197 {-# INLINEABLE hexChar #-}
198 hexChar = do void $ char '%'
199 h ← satisfy isHexChar
200 l ← satisfy isHexChar
201 return $ BS.singleton $ hexToChar h l
203 isHexChar ∷ Char → Bool
204 isHexChar = inClass "0-9a-fA-F"
206 hexToChar ∷ Char → Char → Char
207 {-# INLINE hexToChar #-}
209 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
211 hexToInt ∷ Char → Int
212 {-# INLINEABLE hexToInt #-}
214 | c ≤ '9' = ord c - ord '0'
215 | c ≤ 'F' = ord c - ord 'A' + 10
216 | otherwise = ord c - ord 'a' + 10
218 rawChars ∷ Parser BS.ByteString
219 {-# INLINE rawChars #-}
220 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
222 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
223 {-# INLINE decodeParams #-}
224 decodeParams = (MIMEParams <$>)
225 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
228 sortBySection ∷ Monad m
230 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
231 sortBySection = flip go (∅)
235 → M.Map CIAscii (M.Map Integer ExtendedParam)
236 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
239 = case lookup (epName x) m of
241 → let s = singleton (section x, x)
242 m' = insert (epName x, s) m
246 → case lookup (section x) s of
248 → let s' = insert (section x, x ) s
249 m' = insert (epName x, s') m
253 → fail (concat [ "Duplicate section "
256 , A.toString $ A.fromCIAscii $ epName x
260 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
261 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
264 ⇒ M.Map Integer ExtendedParam
267 → m (Seq ExtendedParam)
268 toSeq m expectedSect sects
273 | sect ≡ expectedSect
274 → toSeq m' (expectedSect + 1) (sects ⊳ p)
276 → fail (concat [ "Missing section "
279 , A.toString $ A.fromCIAscii $ epName p
283 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
285 = case front sects of
287 → fail "decodeSeq: internal error: empty seq"
288 Just (InitialEncodedParam {..}, xs)
289 → do d ← getDecoder epCharset
290 t ← decodeStr d epPayload
291 decodeSeq' (Just d) xs $ singleton t
292 Just (ContinuedEncodedParam {..}, _)
293 → fail "decodeSeq: internal error: CEP at section 0"
294 Just (AsciiParam {..}, xs)
295 → let t = A.toText apPayload
297 decodeSeq' Nothing xs $ singleton t
304 decodeSeq' decoder sects chunks
305 = case front sects of
307 → return $ T.concat $ toList chunks
308 Just (InitialEncodedParam {}, _)
309 → fail "decodeSeq': internal error: IEP at section > 0"
310 Just (ContinuedEncodedParam {..}, xs)
313 → do t ← decodeStr d epPayload
314 decodeSeq' decoder xs $ chunks ⊳ t
316 → fail (concat [ "Section "
319 , A.toString $ A.fromCIAscii epName
320 , "' is encoded but its first section is not"
322 Just (AsciiParam {..}, xs)
323 → let t = A.toText apPayload
325 decodeSeq' decoder xs $ chunks ⊳ t
327 type Decoder = BS.ByteString → Either UnicodeException Text
329 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
330 decodeStr decoder str
331 = case decoder str of
333 Left e → fail $ show e
335 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
337 | charset ≡ "UTF-8" = return decodeUtf8'
338 | charset ≡ "US-ASCII" = return decodeUtf8'
339 | otherwise = fail $ "No decoders found for charset: "
340 ⧺ A.toString (A.fromCIAscii charset)