5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
15 -- |Parsing and printing MIME parameter values
16 -- (<http://tools.ietf.org/html/rfc2231>).
17 module Network.HTTP.Lucu.MIMEParams
22 import Control.Applicative hiding (empty)
23 import Control.Monad hiding (mapM)
24 import Control.Monad.Unicode
25 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
26 import qualified Data.Ascii as A
27 import Data.Attoparsec.Char8
29 import qualified Data.ByteString.Char8 as BS
31 import Data.Collections
32 import Data.Collections.BaseInstances ()
33 import qualified Data.Collections.Newtype.TH as C
34 import Data.Convertible.Base
35 import Data.Convertible.Instances.Ascii ()
36 import Data.Convertible.Utils
37 import qualified Data.Map as M (Map)
38 import Data.Monoid.Unicode
39 import Data.Sequence (Seq)
40 import Data.Text (Text)
41 import qualified Data.Text as T
42 import Data.Text.Encoding
43 import Data.Text.Encoding.Error
45 import Network.HTTP.Lucu.MIMEParams.Internal
46 import Network.HTTP.Lucu.OrphanInstances ()
47 import Network.HTTP.Lucu.Parser.Http
48 import Network.HTTP.Lucu.Utils
49 import Prelude hiding (concat, lookup, mapM, takeWhile)
50 import Prelude.Unicode
52 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
53 instance Foldable MIMEParams (CIAscii, Text)
54 instance Collection MIMEParams (CIAscii, Text)
55 instance Indexed MIMEParams CIAscii Text
56 instance Map MIMEParams CIAscii Text
57 instance SortingCollection MIMEParams (CIAscii, Text)
60 instance ConvertSuccess MIMEParams Ascii where
61 {-# INLINE convertSuccess #-}
62 convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder)
64 instance ConvertSuccess MIMEParams AsciiBuilder where
65 {-# INLINEABLE convertSuccess #-}
66 convertSuccess = foldl' f (∅)
68 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
70 f ab (k, v) = ab ⊕ cs ("; " ∷ Ascii) ⊕ printPair k v
72 printPair ∷ CIAscii → Text → AsciiBuilder
73 {-# INLINEABLE printPair #-}
75 | T.any (> '\xFF') value
76 = printPairInUTF8 name value
78 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
80 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
81 {-# INLINEABLE printPairInUTF8 #-}
82 printPairInUTF8 name value
84 cs ("*=utf-8''" ∷ Ascii) ⊕
85 escapeUnsafeChars (encodeUtf8 value) (∅)
87 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
88 {-# INLINEABLE printPairInAscii #-}
89 printPairInAscii name value
92 if BS.any ((¬) ∘ isToken) (cs value) then
97 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
98 {-# INLINEABLE escapeUnsafeChars #-}
99 escapeUnsafeChars bs b
100 = case BS.uncons bs of
103 | isToken c → escapeUnsafeChars bs' $
104 b ⊕ cs (A.unsafeFromString [c])
105 | otherwise → escapeUnsafeChars bs' $
106 b ⊕ toHex (fromIntegral $ fromEnum c)
108 toHex ∷ Word8 → AsciiBuilder
109 {-# INLINEABLE toHex #-}
110 toHex o = cs ("%" ∷ Ascii) ⊕
111 cs (A.unsafeFromString [ toHex' (o `shiftR` 8)
112 , toHex' (o .&. 0x0F) ])
114 toHex' ∷ Word8 → Char
115 {-# INLINEABLE toHex' #-}
117 | h ≤ 0x09 = toEnum $ fromIntegral
118 $ fromEnum '0' + fromIntegral h
119 | otherwise = toEnum $ fromIntegral
120 $ fromEnum 'A' + fromIntegral (h - 0x0A)
122 deriveAttempts [ ([t| MIMEParams |], [t| Ascii |])
123 , ([t| MIMEParams |], [t| AsciiBuilder |])
127 = InitialEncodedParam {
129 , epCharset ∷ !CIAscii
130 , epPayload ∷ !BS.ByteString
132 | ContinuedEncodedParam {
134 , epSection ∷ !Integer
135 , epPayload ∷ !BS.ByteString
139 , epSection ∷ !Integer
143 section ∷ ExtendedParam → Integer
144 {-# INLINE section #-}
145 section (InitialEncodedParam {..}) = 0
146 section ep = epSection ep
148 -- |'Parser' for MIME parameter values.
149 mimeParams ∷ Parser MIMEParams
150 {-# INLINEABLE mimeParams #-}
151 mimeParams = decodeParams =≪ many (try paramP)
153 paramP ∷ Parser ExtendedParam
154 paramP = do skipMany lws
161 → do (charset, payload) ← initialEncodedValue
162 return $ InitialEncodedParam name charset payload
164 → do payload ← encodedPayload
165 return $ ContinuedEncodedParam name sect payload
167 → do payload ← token <|> quotedStr
168 return $ AsciiParam name sect payload
170 nameP ∷ Parser (CIAscii, Integer, Bool)
171 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
172 takeWhile1 (\c → isToken c ∧ c ≢ '*')
173 sect ← option 0 $ try (char '*' *> decimal )
174 isEncoded ← option False $ try (char '*' *> pure True)
175 return (name, sect, isEncoded)
177 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
179 = do charset ← metadata
181 void $ metadata -- Ignore the language tag
183 payload ← encodedPayload
185 -- NOTE: I'm not sure this is the right thing, but RFC
186 -- 2231 doesn't tell us what we should do when the
187 -- charset is omitted.
188 fail "charset is missing"
190 return (charset, payload)
192 metadata ∷ Parser CIAscii
193 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
194 takeWhile (\c → c ≢ '\'' ∧ isToken c)
196 encodedPayload ∷ Parser BS.ByteString
197 {-# INLINE encodedPayload #-}
198 encodedPayload = BS.concat <$> many (hexChar <|> rawChars)
200 hexChar ∷ Parser BS.ByteString
201 {-# INLINEABLE hexChar #-}
202 hexChar = do void $ char '%'
203 h ← satisfy isHexChar
204 l ← satisfy isHexChar
205 return $ BS.singleton $ hexToChar h l
207 isHexChar ∷ Char → Bool
208 isHexChar = inClass "0-9a-fA-F"
210 hexToChar ∷ Char → Char → Char
211 {-# INLINE hexToChar #-}
213 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
215 hexToInt ∷ Char → Int
216 {-# INLINEABLE hexToInt #-}
218 | c ≤ '9' = ord c - ord '0'
219 | c ≤ 'F' = ord c - ord 'A' + 10
220 | otherwise = ord c - ord 'a' + 10
222 rawChars ∷ Parser BS.ByteString
223 {-# INLINE rawChars #-}
224 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
226 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
227 {-# INLINE decodeParams #-}
228 decodeParams = (MIMEParams <$>)
229 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
232 sortBySection ∷ Monad m
234 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
235 sortBySection = flip go (∅)
239 → M.Map CIAscii (M.Map Integer ExtendedParam)
240 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
243 = case lookup (epName x) m of
245 → let s = singleton (section x, x)
246 m' = insert (epName x, s) m
250 → case lookup (section x) s of
252 → let s' = insert (section x, x ) s
253 m' = insert (epName x, s') m
257 → fail (concat [ "Duplicate section "
260 , A.toString $ A.fromCIAscii $ epName x
264 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
265 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
268 ⇒ M.Map Integer ExtendedParam
271 → m (Seq ExtendedParam)
272 toSeq m expectedSect sects
277 | sect ≡ expectedSect
278 → toSeq m' (expectedSect + 1) (sects ⊳ p)
280 → fail (concat [ "Missing section "
283 , A.toString $ A.fromCIAscii $ epName p
287 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
289 = case front sects of
291 → fail "decodeSeq: internal error: empty seq"
292 Just (InitialEncodedParam {..}, xs)
293 → do d ← getDecoder epCharset
294 t ← decodeStr d epPayload
295 decodeSeq' (Just d) xs $ singleton t
296 Just (ContinuedEncodedParam {..}, _)
297 → fail "decodeSeq: internal error: CEP at section 0"
298 Just (AsciiParam {..}, xs)
299 → let t = A.toText apPayload
301 decodeSeq' Nothing xs $ singleton t
308 decodeSeq' decoder sects chunks
309 = case front sects of
311 → return $ T.concat $ toList chunks
312 Just (InitialEncodedParam {}, _)
313 → fail "decodeSeq': internal error: IEP at section > 0"
314 Just (ContinuedEncodedParam {..}, xs)
317 → do t ← decodeStr d epPayload
318 decodeSeq' decoder xs $ chunks ⊳ t
320 → fail (concat [ "Section "
323 , A.toString $ A.fromCIAscii epName
324 , "' is encoded but its first section is not"
326 Just (AsciiParam {..}, xs)
327 → let t = A.toText apPayload
329 decodeSeq' decoder xs $ chunks ⊳ t
331 type Decoder = BS.ByteString → Either UnicodeException Text
333 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
334 decodeStr decoder str
335 = case decoder str of
337 Left e → fail $ show e
339 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
341 | charset ≡ "UTF-8" = return decodeUtf8'
342 | charset ≡ "US-ASCII" = return decodeUtf8'
343 | otherwise = fail $ "No decoders found for charset: "
344 ⧺ A.toString (A.fromCIAscii charset)