8 -- |Provide functionalities to encode/decode MIME parameter values in
9 -- character sets other than US-ASCII. See:
10 -- <http://www.faqs.org/rfcs/rfc2231.html>
12 -- You usually don't have to use this module directly.
13 module Network.HTTP.Lucu.RFC2231
18 import Control.Applicative
19 import qualified Control.Exception as E
20 import Control.Monad hiding (mapM)
21 import Control.Monad.Unicode
22 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
23 import qualified Data.Ascii as A
24 import Data.Attoparsec.Char8 as P
26 import qualified Data.ByteString.Char8 as BS
30 import qualified Data.Map as M
31 import Data.Monoid.Unicode
32 import Data.Sequence (Seq, ViewL(..))
33 import qualified Data.Sequence as S
34 import Data.Sequence.Unicode hiding ((∅))
35 import Data.Text (Text)
36 import qualified Data.Text as T
37 import qualified Data.Text.ICU.Convert as TC
38 import Data.Text.Encoding
39 import Data.Text.ICU.Error
40 import Data.Traversable
42 import Network.HTTP.Lucu.Parser.Http
43 import Network.HTTP.Lucu.Utils
44 import Prelude hiding (concat, mapM, takeWhile)
45 import Prelude.Unicode
46 import System.IO.Unsafe
48 -- |Convert parameter values to an 'AsciiBuilder'.
49 printParams ∷ Map CIAscii Text → AsciiBuilder
50 {-# INLINEABLE printParams #-}
51 printParams m = M.foldlWithKey f (∅) m
52 -- THINKME: Use foldlWithKey' for newer Data.Map
54 f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
56 f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
58 printPair ∷ CIAscii → Text → AsciiBuilder
59 {-# INLINEABLE printPair #-}
61 | T.any (> '\xFF') value
62 = printPairInUTF8 name value
64 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
66 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
67 {-# INLINEABLE printPairInUTF8 #-}
68 printPairInUTF8 name value
69 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
70 A.toAsciiBuilder "*=utf-8''" ⊕
71 escapeUnsafeChars (encodeUtf8 value) (∅)
73 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
74 {-# INLINEABLE printPairInAscii #-}
75 printPairInAscii name value
76 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
77 A.toAsciiBuilder "=" ⊕
78 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
81 A.toAsciiBuilder value
83 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
84 {-# INLINEABLE escapeUnsafeChars #-}
85 escapeUnsafeChars bs b
86 = case BS.uncons bs of
89 | isToken c → escapeUnsafeChars bs' $
90 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
91 | otherwise → escapeUnsafeChars bs' $
92 b ⊕ toHex (fromIntegral $ fromEnum c)
94 toHex ∷ Word8 → AsciiBuilder
95 {-# INLINEABLE toHex #-}
96 toHex o = A.toAsciiBuilder "%" ⊕
97 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
98 , toHex' (o .&. 0x0F) ])
100 toHex' ∷ Word8 → Char
101 {-# INLINEABLE toHex' #-}
103 | h ≤ 0x09 = toEnum $ fromIntegral
104 $ fromEnum '0' + fromIntegral h
105 | otherwise = toEnum $ fromIntegral
106 $ fromEnum 'A' + fromIntegral (h - 0x0A)
109 = InitialEncodedParam {
111 , epCharset ∷ !CIAscii
112 , epPayload ∷ !BS.ByteString
114 | ContinuedEncodedParam {
116 , epSection ∷ !Integer
117 , epPayload ∷ !BS.ByteString
121 , epSection ∷ !Integer
125 section ∷ ExtendedParam → Integer
126 {-# INLINE section #-}
127 section (InitialEncodedParam {..}) = 0
128 section ep = epSection ep
130 -- |'Parser' for parameter values.
131 paramsP ∷ Parser (Map CIAscii Text)
132 {-# INLINEABLE paramsP #-}
133 paramsP = decodeParams =≪ P.many (try paramP)
135 paramP ∷ Parser ExtendedParam
136 paramP = do skipMany lws
143 → do (charset, payload) ← initialEncodedValue
144 return $ InitialEncodedParam name charset payload
146 → do payload ← encodedPayload
147 return $ ContinuedEncodedParam name sect payload
149 → do payload ← token <|> quotedStr
150 return $ AsciiParam name sect payload
152 nameP ∷ Parser (CIAscii, Integer, Bool)
153 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
154 takeWhile1 (\c → isToken c ∧ c ≢ '*')
155 sect ← option 0 $ try (char '*' *> decimal )
156 isEncoded ← option False $ try (char '*' *> pure True)
157 return (name, sect, isEncoded)
159 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
161 = do charset ← metadata
163 void $ metadata -- Ignore the language tag
165 payload ← encodedPayload
167 -- NOTE: I'm not sure this is the right thing, but RFC
168 -- 2231 doesn't tell us what we should do when the
169 -- charset is omitted.
170 return ("US-ASCII", payload)
172 return (charset, payload)
174 metadata ∷ Parser CIAscii
175 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
176 takeWhile (\c → c ≢ '\'' ∧ isToken c)
178 encodedPayload ∷ Parser BS.ByteString
179 {-# INLINE encodedPayload #-}
180 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
182 hexChar ∷ Parser BS.ByteString
183 {-# INLINEABLE hexChar #-}
184 hexChar = do void $ char '%'
185 h ← satisfy isHexChar
186 l ← satisfy isHexChar
187 return $ BS.singleton $ hexToChar h l
189 isHexChar ∷ Char → Bool
190 isHexChar = inClass "0-9a-fA-F"
192 hexToChar ∷ Char → Char → Char
193 {-# INLINE hexToChar #-}
195 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
197 hexToInt ∷ Char → Int
198 {-# INLINEABLE hexToInt #-}
200 | c ≤ '9' = ord c - ord '0'
201 | c ≤ 'F' = ord c - ord 'A' + 10
202 | otherwise = ord c - ord 'a' + 10
204 rawChars ∷ Parser BS.ByteString
205 {-# INLINE rawChars #-}
206 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
208 decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
209 {-# INLINE decodeParams #-}
210 decodeParams = (mapM decodeSections =≪) ∘ sortBySection
212 sortBySection ∷ ∀m. Monad m
214 → m (Map CIAscii (Map Integer ExtendedParam))
215 sortBySection = flip go (∅)
218 → Map CIAscii (Map Integer ExtendedParam)
219 → m (Map CIAscii (Map Integer ExtendedParam))
222 = case M.lookup (epName x) m of
224 → let s = M.singleton (section x) x
225 m' = M.insert (epName x) s m
229 → case M.lookup (section x) s of
231 → let s' = M.insert (section x) x s
232 m' = M.insert (epName x) s' m
236 → fail (concat [ "Duplicate section "
239 , A.toString $ A.fromCIAscii $ epName x
243 decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
244 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
246 toSeq ∷ Map Integer ExtendedParam
249 → m (Seq ExtendedParam)
250 toSeq m expectedSect sects
251 = case M.minViewWithKey m of
255 | sect ≡ expectedSect
256 → toSeq m' (expectedSect + 1) (sects ⊳ p)
258 → fail (concat [ "Missing section "
261 , A.toString $ A.fromCIAscii $ epName p
265 decodeSeq ∷ Seq ExtendedParam → m Text
267 = case S.viewl sects of
269 → fail "decodeSeq: internal error: empty seq"
270 InitialEncodedParam {..} :< xs
271 → do conv ← openConv epCharset
272 let t = TC.toUnicode conv epPayload
273 decodeSeq' (Just conv) xs $ S.singleton t
274 ContinuedEncodedParam {..} :< _
275 → fail "decodeSeq: internal error: CEP at section 0"
276 AsciiParam {..} :< xs
277 → let t = A.toText apPayload
279 decodeSeq' Nothing xs $ S.singleton t
281 decodeSeq' ∷ Maybe (TC.Converter)
285 decodeSeq' convM sects chunks
286 = case S.viewl sects of
288 → return $ T.concat $ toList chunks
289 InitialEncodedParam {..} :< _
290 → fail "decodeSeq': internal error: IEP at section > 0"
291 ContinuedEncodedParam {..} :< xs
294 → let t = TC.toUnicode conv epPayload
296 decodeSeq' convM xs $ chunks ⊳ t
298 → fail (concat [ "Section "
301 , A.toString $ A.fromCIAscii epName
302 , "' is encoded but its first section is not"
304 AsciiParam {..} :< xs
305 → let t = A.toText apPayload
307 decodeSeq' convM xs $ chunks ⊳ t
309 openConv ∷ CIAscii → m TC.Converter
311 = let cs = A.toString $ A.fromCIAscii charset
312 open' = TC.open cs (Just True)
314 case unsafePerformIO $ E.try open' of
315 Right conv → return conv
316 Left err → fail $ show (err ∷ ICUError)