5 , GeneralizedNewtypeDeriving
6 , MultiParamTypeClasses
10 , TypeSynonymInstances
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-missing-methods #-}
15 -- |Parsing and printing MIME parameter values
16 -- (<http://tools.ietf.org/html/rfc2231>).
17 module Network.HTTP.Lucu.MIMEParams
21 import Control.Applicative hiding (empty)
22 import Control.Monad hiding (mapM)
23 import Control.Monad.Unicode
24 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
25 import qualified Data.Ascii as A
26 import Data.Attoparsec.Char8
28 import qualified Data.ByteString.Char8 as BS
30 import Data.Collections
31 import Data.Collections.BaseInstances ()
32 import qualified Data.Collections.Newtype.TH as C
33 import Data.Convertible.Base
34 import Data.Convertible.Instances.Ascii ()
35 import Data.Convertible.Utils
37 import qualified Data.Map as M (Map)
38 import Data.Monoid.Unicode
39 import Data.Sequence (Seq)
40 import Data.Text (Text)
41 import qualified Data.Text as T
42 import Data.Text.Encoding
43 import Data.Text.Encoding.Error
45 import Network.HTTP.Lucu.MIMEParams.Internal
46 import Network.HTTP.Lucu.OrphanInstances ()
47 import Network.HTTP.Lucu.Parser.Http
48 import Network.HTTP.Lucu.Utils
49 import Prelude hiding (concat, lookup, mapM, takeWhile)
50 import Prelude.Unicode
52 C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
53 instance Foldable MIMEParams (CIAscii, Text)
54 instance Collection MIMEParams (CIAscii, Text)
55 instance Indexed MIMEParams CIAscii Text
56 instance Map MIMEParams CIAscii Text
57 instance SortingCollection MIMEParams (CIAscii, Text)
60 instance ConvertSuccess MIMEParams Ascii where
61 {-# INLINE convertSuccess #-}
62 convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder)
64 instance ConvertSuccess MIMEParams AsciiBuilder where
65 {-# INLINEABLE convertSuccess #-}
66 convertSuccess = foldl' f (∅)
68 f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
70 f ab (k, v) = ab ⊕ cs ("; " ∷ Ascii) ⊕ printPair k v
72 printPair ∷ CIAscii → Text → AsciiBuilder
73 {-# INLINEABLE printPair #-}
75 | T.any (> '\xFF') value
76 = printPairInUTF8 name value
78 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
80 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
81 {-# INLINEABLE printPairInUTF8 #-}
82 printPairInUTF8 name value
84 cs ("*=utf-8''" ∷ Ascii) ⊕
85 escapeUnsafeChars (encodeUtf8 value) (∅)
87 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
88 {-# INLINEABLE printPairInAscii #-}
89 printPairInAscii name value
92 if BS.any ((¬) ∘ isToken) (cs value) then
97 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
98 {-# INLINEABLE escapeUnsafeChars #-}
99 escapeUnsafeChars bs b
100 = case BS.uncons bs of
103 | isToken c → escapeUnsafeChars bs' $
104 b ⊕ cs (A.unsafeFromString [c])
105 | otherwise → escapeUnsafeChars bs' $
106 b ⊕ toHex (fromIntegral $ fromEnum c)
108 toHex ∷ Word8 → AsciiBuilder
109 {-# INLINEABLE toHex #-}
110 toHex o = cs ("%" ∷ Ascii) ⊕
111 cs (A.unsafeFromString [ toHex' (o `shiftR` 8)
112 , toHex' (o .&. 0x0F) ])
114 toHex' ∷ Word8 → Char
115 {-# INLINEABLE toHex' #-}
117 | h ≤ 0x09 = toEnum $ fromIntegral
118 $ fromEnum '0' + fromIntegral h
119 | otherwise = toEnum $ fromIntegral
120 $ fromEnum 'A' + fromIntegral (h - 0x0A)
122 deriveAttempts [ ([t| MIMEParams |], [t| Ascii |])
123 , ([t| MIMEParams |], [t| AsciiBuilder |])
127 = InitialEncodedParam {
129 , epCharset ∷ !CIAscii
130 , epPayload ∷ !BS.ByteString
132 | ContinuedEncodedParam {
134 , epSection ∷ !Integer
135 , epPayload ∷ !BS.ByteString
139 , epSection ∷ !Integer
143 section ∷ ExtendedParam → Integer
144 {-# INLINE section #-}
145 section (InitialEncodedParam {..}) = 0
146 section ep = epSection ep
148 instance Default (Parser MIMEParams) where
150 def = decodeParams =≪ many (try def)
152 instance Default (Parser ExtendedParam) where
153 def = do skipMany lws
160 → do (charset, payload) ← initialEncodedValue
161 return $ InitialEncodedParam nm charset payload
163 → do payload ← encodedPayload
164 return $ ContinuedEncodedParam nm sect payload
166 → do payload ← token <|> quotedStr
167 return $ AsciiParam nm sect payload
169 name ∷ Parser (CIAscii, Integer, Bool)
170 name = do nm ← (cs ∘ A.unsafeFromByteString) <$>
171 takeWhile1 (\c → isToken c ∧ c ≢ '*')
172 sect ← option 0 $ try (char '*' *> decimal )
173 isEncoded ← option False $ try (char '*' *> pure True)
174 return (nm, sect, isEncoded)
176 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
178 = do charset ← metadata
180 void $ metadata -- Ignore the language tag
182 payload ← encodedPayload
184 -- NOTE: I'm not sure this is the right thing, but RFC
185 -- 2231 doesn't tell us what we should do when the
186 -- charset is omitted.
187 fail "charset is missing"
189 return (charset, payload)
191 metadata ∷ Parser CIAscii
192 metadata = (cs ∘ A.unsafeFromByteString) <$>
193 takeWhile (\c → c ≢ '\'' ∧ isToken c)
195 encodedPayload ∷ Parser BS.ByteString
196 {-# INLINE encodedPayload #-}
197 encodedPayload = BS.concat <$> many (hexChar <|> rawChars)
199 hexChar ∷ Parser BS.ByteString
200 {-# INLINEABLE hexChar #-}
201 hexChar = do void $ char '%'
202 h ← satisfy isHexChar
203 l ← satisfy isHexChar
204 return $ BS.singleton $ hexToChar h l
206 isHexChar ∷ Char → Bool
207 isHexChar = inClass "0-9a-fA-F"
209 hexToChar ∷ Char → Char → Char
210 {-# INLINE hexToChar #-}
212 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
214 hexToInt ∷ Char → Int
215 {-# INLINEABLE hexToInt #-}
217 | c ≤ '9' = ord c - ord '0'
218 | c ≤ 'F' = ord c - ord 'A' + 10
219 | otherwise = ord c - ord 'a' + 10
221 rawChars ∷ Parser BS.ByteString
222 {-# INLINE rawChars #-}
223 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
225 decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
226 {-# INLINE decodeParams #-}
227 decodeParams = (MIMEParams <$>)
228 ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
231 sortBySection ∷ Monad m
233 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
234 sortBySection = flip go (∅)
238 → M.Map CIAscii (M.Map Integer ExtendedParam)
239 → m (M.Map CIAscii (M.Map Integer ExtendedParam))
242 = case lookup (epName x) m of
244 → let s = singleton (section x, x)
245 m' = insert (epName x, s) m
249 → case lookup (section x) s of
251 → let s' = insert (section x, x ) s
252 m' = insert (epName x, s') m
256 → fail (concat [ "Duplicate section "
263 decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
264 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
267 ⇒ M.Map Integer ExtendedParam
270 → m (Seq ExtendedParam)
271 toSeq m expectedSect sects
276 | sect ≡ expectedSect
277 → toSeq m' (expectedSect + 1) (sects ⊳ p)
279 → fail (concat [ "Missing section "
286 decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
288 = case front sects of
290 → fail "decodeSeq: internal error: empty seq"
291 Just (InitialEncodedParam {..}, xs)
292 → do d ← getDecoder epCharset
293 t ← decodeStr d epPayload
294 decodeSeq' (Just d) xs $ singleton t
295 Just (ContinuedEncodedParam {..}, _)
296 → fail "decodeSeq: internal error: CEP at section 0"
297 Just (AsciiParam {..}, xs)
298 → decodeSeq' Nothing xs $ singleton $ cs apPayload
305 decodeSeq' decoder sects chunks
306 = case front sects of
308 → return $ T.concat $ toList chunks
309 Just (InitialEncodedParam {}, _)
310 → fail "decodeSeq': internal error: IEP at section > 0"
311 Just (ContinuedEncodedParam {..}, xs)
314 → do t ← decodeStr d epPayload
315 decodeSeq' decoder xs $ chunks ⊳ t
317 → fail (concat [ "Section "
321 , "' is encoded but its first section is not"
323 Just (AsciiParam {..}, xs)
324 → decodeSeq' decoder xs $ chunks ⊳ cs apPayload
326 type Decoder = BS.ByteString → Either UnicodeException Text
328 decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
329 decodeStr decoder str
330 = case decoder str of
332 Left e → fail $ show e
334 getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
336 | charset ≡ "UTF-8" = return decodeUtf8'
337 | charset ≡ "US-ASCII" = return decodeUtf8'
338 | otherwise = fail $ "No decoders found for charset: " ⊕ cs charset