5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
15 -- |Parsing and printing MIME parameter values
16 -- (<http://tools.ietf.org/html/rfc2231>).
17 module Network.HTTP.Lucu.MIMEParams
23 import Control.Applicative hiding (empty)
24 import Control.Monad hiding (mapM)
25 import Control.Monad.Unicode
26 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
27 import qualified Data.Ascii as A
28 import Data.Attoparsec.Char8 as P
30 import qualified Data.ByteString.Char8 as BS
32 import Data.Collections
33 import Data.Collections.BaseInstances ()
34 import qualified Data.Collections.Newtype.TH as C
35 import qualified Data.Map as M (Map)
36 import Data.Monoid.Unicode
37 import Data.Sequence (Seq)
38 import Data.Text (Text)
39 import qualified Data.Text as T
40 import Data.Text.Encoding
41 import Data.Text.Encoding.Error
43 import Network.HTTP.Lucu.MIMEParams.Internal
44 import Network.HTTP.Lucu.OrphanInstances ()
45 import Network.HTTP.Lucu.Parser.Http
46 import Network.HTTP.Lucu.Utils
47 import Prelude hiding (concat, lookup, mapM, takeWhile)
48 import Prelude.Unicode
50 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
51 instance Foldable MIMEParams (CIAscii, Text)
52 instance Collection MIMEParams (CIAscii, Text)
53 instance Indexed MIMEParams CIAscii Text
54 instance Map MIMEParams CIAscii Text
55 instance SortingCollection MIMEParams (CIAscii, Text)
58 -- |Convert MIME parameter values to an 'AsciiBuilder'.
59 printMIMEParams ∷ MIMEParams → AsciiBuilder
60 {-# INLINEABLE printMIMEParams #-}
61 printMIMEParams = foldl' f (∅)
63 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
65 f ab (k, v) = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
67 printPair ∷ CIAscii → Text → AsciiBuilder
68 {-# INLINEABLE printPair #-}
70 | T.any (> '\xFF') value
71 = printPairInUTF8 name value
73 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
75 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
76 {-# INLINEABLE printPairInUTF8 #-}
77 printPairInUTF8 name value
78 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
79 A.toAsciiBuilder "*=utf-8''" ⊕
80 escapeUnsafeChars (encodeUtf8 value) (∅)
82 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
83 {-# INLINEABLE printPairInAscii #-}
84 printPairInAscii name value
85 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
86 A.toAsciiBuilder "=" ⊕
87 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
90 A.toAsciiBuilder value
92 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
93 {-# INLINEABLE escapeUnsafeChars #-}
94 escapeUnsafeChars bs b
95 = case BS.uncons bs of
98 | isToken c → escapeUnsafeChars bs' $
99 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
100 | otherwise → escapeUnsafeChars bs' $
101 b ⊕ toHex (fromIntegral $ fromEnum c)
103 toHex ∷ Word8 → AsciiBuilder
104 {-# INLINEABLE toHex #-}
105 toHex o = A.toAsciiBuilder "%" ⊕
106 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
107 , toHex' (o .&. 0x0F) ])
109 toHex' ∷ Word8 → Char
110 {-# INLINEABLE toHex' #-}
112 | h ≤ 0x09 = toEnum $ fromIntegral
113 $ fromEnum '0' + fromIntegral h
114 | otherwise = toEnum $ fromIntegral
115 $ fromEnum 'A' + fromIntegral (h - 0x0A)
118 = InitialEncodedParam {
120 , epCharset ∷ !CIAscii
121 , epPayload ∷ !BS.ByteString
123 | ContinuedEncodedParam {
125 , epSection ∷ !Integer
126 , epPayload ∷ !BS.ByteString
130 , epSection ∷ !Integer
134 section ∷ ExtendedParam → Integer
135 {-# INLINE section #-}
136 section (InitialEncodedParam {..}) = 0
137 section ep = epSection ep
139 -- |'Parser' for MIME parameter values.
140 mimeParams ∷ Parser MIMEParams
141 {-# INLINEABLE mimeParams #-}
142 mimeParams = decodeParams =≪ P.many (try paramP)
144 paramP ∷ Parser ExtendedParam
145 paramP = do skipMany lws
152 → do (charset, payload) ← initialEncodedValue
153 return $ InitialEncodedParam name charset payload
155 → do payload ← encodedPayload
156 return $ ContinuedEncodedParam name sect payload
158 → do payload ← token <|> quotedStr
159 return $ AsciiParam name sect payload
161 nameP ∷ Parser (CIAscii, Integer, Bool)
162 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
163 takeWhile1 (\c → isToken c ∧ c ≢ '*')
164 sect ← option 0 $ try (char '*' *> decimal )
165 isEncoded ← option False $ try (char '*' *> pure True)
166 return (name, sect, isEncoded)
168 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
170 = do charset ← metadata
172 void $ metadata -- Ignore the language tag
174 payload ← encodedPayload
176 -- NOTE: I'm not sure this is the right thing, but RFC
177 -- 2231 doesn't tell us what we should do when the
178 -- charset is omitted.
179 fail "charset is missing"
181 return (charset, payload)
183 metadata ∷ Parser CIAscii
184 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
185 takeWhile (\c → c ≢ '\'' ∧ isToken c)
187 encodedPayload ∷ Parser BS.ByteString
188 {-# INLINE encodedPayload #-}
189 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
191 hexChar ∷ Parser BS.ByteString
192 {-# INLINEABLE hexChar #-}
193 hexChar = do void $ char '%'
194 h ← satisfy isHexChar
195 l ← satisfy isHexChar
196 return $ BS.singleton $ hexToChar h l
198 isHexChar ∷ Char → Bool
199 isHexChar = inClass "0-9a-fA-F"
201 hexToChar ∷ Char → Char → Char
202 {-# INLINE hexToChar #-}
204 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
206 hexToInt ∷ Char → Int
207 {-# INLINEABLE hexToInt #-}
209 | c ≤ '9' = ord c - ord '0'
210 | c ≤ 'F' = ord c - ord 'A' + 10
211 | otherwise = ord c - ord 'a' + 10
213 rawChars ∷ Parser BS.ByteString
214 {-# INLINE rawChars #-}
215 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
217 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
218 {-# INLINE decodeParams #-}
219 decodeParams = (MIMEParams <$>)
220 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
223 sortBySection ∷ Monad m
225 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
226 sortBySection = flip go (∅)
230 → M.Map CIAscii (M.Map Integer ExtendedParam)
231 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
234 = case lookup (epName x) m of
236 → let s = singleton (section x, x)
237 m' = insert (epName x, s) m
241 → case lookup (section x) s of
243 → let s' = insert (section x, x ) s
244 m' = insert (epName x, s') m
248 → fail (concat [ "Duplicate section "
251 , A.toString $ A.fromCIAscii $ epName x
255 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
256 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
259 ⇒ M.Map Integer ExtendedParam
262 → m (Seq ExtendedParam)
263 toSeq m expectedSect sects
268 | sect ≡ expectedSect
269 → toSeq m' (expectedSect + 1) (sects ⊳ p)
271 → fail (concat [ "Missing section "
274 , A.toString $ A.fromCIAscii $ epName p
278 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
280 = case front sects of
282 → fail "decodeSeq: internal error: empty seq"
283 Just (InitialEncodedParam {..}, xs)
284 → do d ← getDecoder epCharset
285 t ← decodeStr d epPayload
286 decodeSeq' (Just d) xs $ singleton t
287 Just (ContinuedEncodedParam {..}, _)
288 → fail "decodeSeq: internal error: CEP at section 0"
289 Just (AsciiParam {..}, xs)
290 → let t = A.toText apPayload
292 decodeSeq' Nothing xs $ singleton t
299 decodeSeq' decoder sects chunks
300 = case front sects of
302 → return $ T.concat $ toList chunks
303 Just (InitialEncodedParam {}, _)
304 → fail "decodeSeq': internal error: IEP at section > 0"
305 Just (ContinuedEncodedParam {..}, xs)
308 → do t ← decodeStr d epPayload
309 decodeSeq' decoder xs $ chunks ⊳ t
311 → fail (concat [ "Section "
314 , A.toString $ A.fromCIAscii epName
315 , "' is encoded but its first section is not"
317 Just (AsciiParam {..}, xs)
318 → let t = A.toText apPayload
320 decodeSeq' decoder xs $ chunks ⊳ t
322 type Decoder = BS.ByteString → Either UnicodeException Text
324 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
325 decodeStr decoder str
326 = case decoder str of
328 Left e → fail $ show e
330 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
332 | charset ≡ "UTF-8" = return decodeUtf8'
333 | charset ≡ "US-ASCII" = return decodeUtf8'
334 | otherwise = fail $ "No decoders found for charset: "
335 ⧺ A.toString (A.fromCIAscii charset)