5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
15 -- |Parsing and printing MIME parameter values
16 -- (<http://tools.ietf.org/html/rfc2231>).
17 module Network.HTTP.Lucu.MIMEParams
21 import Control.Applicative hiding (empty)
22 import Control.Monad hiding (mapM)
23 import Control.Monad.Unicode
24 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
25 import qualified Data.Ascii as A
26 import Data.Attoparsec.Char8
27 import Data.Attoparsec.Parsable
29 import Data.ByteString (ByteString)
30 import qualified Data.ByteString.Char8 as BS
32 import Data.Collections
33 import Data.Collections.BaseInstances ()
34 import qualified Data.Collections.Newtype.TH as C
35 import Data.Convertible.Base
36 import Data.Convertible.Instances.Ascii ()
37 import Data.Convertible.Utils
38 import qualified Data.Map as M (Map)
39 import Data.Monoid.Unicode
40 import Data.Sequence (Seq)
41 import Data.Text (Text)
42 import qualified Data.Text as T
43 import Data.Text.Encoding
44 import Data.Text.Encoding.Error
46 import Network.HTTP.Lucu.MIMEParams.Internal
47 import Network.HTTP.Lucu.OrphanInstances ()
48 import Network.HTTP.Lucu.Parser.Http
49 import Network.HTTP.Lucu.Utils
50 import Prelude hiding (concat, lookup, mapM, takeWhile)
51 import Prelude.Unicode
53 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
54 instance Foldable MIMEParams (CIAscii, Text)
55 instance Collection MIMEParams (CIAscii, Text)
56 instance Indexed MIMEParams CIAscii Text
57 instance Map MIMEParams CIAscii Text
58 instance SortingCollection MIMEParams (CIAscii, Text)
61 instance ConvertSuccess MIMEParams Ascii where
62 {-# INLINE convertSuccess #-}
63 convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder)
65 instance ConvertSuccess MIMEParams AsciiBuilder where
66 {-# INLINEABLE convertSuccess #-}
67 convertSuccess = foldl' f (∅)
69 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
71 f ab (k, v) = ab ⊕ cs ("; " ∷ Ascii) ⊕ printPair k v
73 printPair ∷ CIAscii → Text → AsciiBuilder
74 {-# INLINEABLE printPair #-}
76 | T.any (> '\xFF') value
77 = printPairInUTF8 name value
79 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
81 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
82 {-# INLINEABLE printPairInUTF8 #-}
83 printPairInUTF8 name value
85 cs ("*=utf-8''" ∷ Ascii) ⊕
86 escapeUnsafeChars (encodeUtf8 value) (∅)
88 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
89 {-# INLINEABLE printPairInAscii #-}
90 printPairInAscii name value
93 if BS.any ((¬) ∘ isToken) (cs value) then
98 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
99 {-# INLINEABLE escapeUnsafeChars #-}
100 escapeUnsafeChars bs b
101 = case BS.uncons bs of
104 | isToken c → escapeUnsafeChars bs' $
105 b ⊕ cs (A.unsafeFromString [c])
106 | otherwise → escapeUnsafeChars bs' $
107 b ⊕ toHex (fromIntegral $ fromEnum c)
109 toHex ∷ Word8 → AsciiBuilder
110 {-# INLINEABLE toHex #-}
111 toHex o = cs ("%" ∷ Ascii) ⊕
112 cs (A.unsafeFromString [ toHex' (o `shiftR` 8)
113 , toHex' (o .&. 0x0F) ])
115 toHex' ∷ Word8 → Char
116 {-# INLINEABLE toHex' #-}
118 | h ≤ 0x09 = toEnum $ fromIntegral
119 $ fromEnum '0' + fromIntegral h
120 | otherwise = toEnum $ fromIntegral
121 $ fromEnum 'A' + fromIntegral (h - 0x0A)
123 deriveAttempts [ ([t| MIMEParams |], [t| Ascii |])
124 , ([t| MIMEParams |], [t| AsciiBuilder |])
128 = InitialEncodedParam {
130 , epCharset ∷ !CIAscii
131 , epPayload ∷ !BS.ByteString
133 | ContinuedEncodedParam {
135 , epSection ∷ !Integer
136 , epPayload ∷ !BS.ByteString
140 , epSection ∷ !Integer
144 section ∷ ExtendedParam → Integer
145 {-# INLINE section #-}
146 section (InitialEncodedParam {..}) = 0
147 section ep = epSection ep
149 instance Parsable ByteString MIMEParams where
150 {-# INLINEABLE parser #-}
151 parser = decodeParams =≪ many (try parser)
153 instance Parsable ByteString ExtendedParam where
154 parser = do skipMany lws
161 → do (charset, payload) ← initialEncodedValue
162 return $ InitialEncodedParam nm charset payload
164 → do payload ← encodedPayload
165 return $ ContinuedEncodedParam nm sect payload
167 → do payload ← token <|> quotedStr
168 return $ AsciiParam nm sect payload
170 name ∷ Parser (CIAscii, Integer, Bool)
171 name = do nm ← (cs ∘ A.unsafeFromByteString) <$>
172 takeWhile1 (\c → isToken c ∧ c ≢ '*')
173 sect ← option 0 $ try (char '*' *> decimal )
174 isEncoded ← option False $ try (char '*' *> pure True)
175 return (nm, sect, isEncoded)
177 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
179 = do charset ← metadata
181 void $ metadata -- Ignore the language tag
183 payload ← encodedPayload
185 -- NOTE: I'm not sure this is the right thing, but RFC
186 -- 2231 doesn't tell us what we should do when the
187 -- charset is omitted.
188 fail "charset is missing"
190 return (charset, payload)
192 metadata ∷ Parser CIAscii
193 metadata = (cs ∘ A.unsafeFromByteString) <$>
194 takeWhile (\c → c ≢ '\'' ∧ isToken c)
196 encodedPayload ∷ Parser BS.ByteString
197 {-# INLINE encodedPayload #-}
198 encodedPayload = BS.concat <$> many (hexChar <|> rawChars)
200 hexChar ∷ Parser BS.ByteString
201 {-# INLINEABLE hexChar #-}
202 hexChar = do void $ char '%'
203 h ← satisfy isHexChar
204 l ← satisfy isHexChar
205 return $ BS.singleton $ hexToChar h l
207 isHexChar ∷ Char → Bool
208 isHexChar = inClass "0-9a-fA-F"
210 hexToChar ∷ Char → Char → Char
211 {-# INLINE hexToChar #-}
213 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
215 hexToInt ∷ Char → Int
216 {-# INLINEABLE hexToInt #-}
218 | c ≤ '9' = ord c - ord '0'
219 | c ≤ 'F' = ord c - ord 'A' + 10
220 | otherwise = ord c - ord 'a' + 10
222 rawChars ∷ Parser BS.ByteString
223 {-# INLINE rawChars #-}
224 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
226 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
227 {-# INLINE decodeParams #-}
228 decodeParams = (MIMEParams <$>)
229 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
232 sortBySection ∷ Monad m
234 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
235 sortBySection = flip go (∅)
239 → M.Map CIAscii (M.Map Integer ExtendedParam)
240 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
243 = case lookup (epName x) m of
245 → let s = singleton (section x, x)
246 m' = insert (epName x, s) m
250 → case lookup (section x) s of
252 → let s' = insert (section x, x ) s
253 m' = insert (epName x, s') m
257 → fail (concat [ "Duplicate section "
264 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
265 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
268 ⇒ M.Map Integer ExtendedParam
271 → m (Seq ExtendedParam)
272 toSeq m expectedSect sects
277 | sect ≡ expectedSect
278 → toSeq m' (expectedSect + 1) (sects ⊳ p)
280 → fail (concat [ "Missing section "
287 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
289 = case front sects of
291 → fail "decodeSeq: internal error: empty seq"
292 Just (InitialEncodedParam {..}, xs)
293 → do d ← getDecoder epCharset
294 t ← decodeStr d epPayload
295 decodeSeq' (Just d) xs $ singleton t
296 Just (ContinuedEncodedParam {..}, _)
297 → fail "decodeSeq: internal error: CEP at section 0"
298 Just (AsciiParam {..}, xs)
299 → decodeSeq' Nothing xs $ singleton $ cs apPayload
306 decodeSeq' decoder sects chunks
307 = case front sects of
309 → return $ T.concat $ toList chunks
310 Just (InitialEncodedParam {}, _)
311 → fail "decodeSeq': internal error: IEP at section > 0"
312 Just (ContinuedEncodedParam {..}, xs)
315 → do t ← decodeStr d epPayload
316 decodeSeq' decoder xs $ chunks ⊳ t
318 → fail (concat [ "Section "
322 , "' is encoded but its first section is not"
324 Just (AsciiParam {..}, xs)
325 → decodeSeq' decoder xs $ chunks ⊳ cs apPayload
327 type Decoder = BS.ByteString → Either UnicodeException Text
329 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
330 decodeStr decoder str
331 = case decoder str of
333 Left e → fail $ show e
335 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
337 | charset ≡ "UTF-8" = return decodeUtf8'
338 | charset ≡ "US-ASCII" = return decodeUtf8'
339 | otherwise = fail $ "No decoders found for charset: " ⊕ cs charset