8 -- |Provide functionalities to encode/decode MIME parameter values in
9 -- character sets other than US-ASCII. See:
10 -- <http://www.faqs.org/rfcs/rfc2231.html>
12 -- You usually don't have to use this module directly.
13 module Network.HTTP.Lucu.RFC2231
18 import Control.Applicative
19 import qualified Control.Exception as E
20 import Control.Monad.Unicode
21 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
22 import qualified Data.Ascii as A
23 import Data.Attoparsec.Char8 as P
25 import qualified Data.ByteString.Char8 as BS
29 import qualified Data.Map as M
30 import Data.Monoid.Unicode
31 import Data.Sequence (Seq, ViewL(..))
32 import qualified Data.Sequence as S
33 import Data.Sequence.Unicode hiding ((∅))
34 import Data.Text (Text)
35 import qualified Data.Text as T
36 import qualified Data.Text.ICU.Convert as TC
37 import Data.Text.Encoding
38 import Data.Text.ICU.Error
39 import Data.Traversable
41 import Network.HTTP.Lucu.Parser.Http
42 import Network.HTTP.Lucu.Utils
43 import Prelude hiding (concat, mapM, takeWhile)
44 import Prelude.Unicode
45 import System.IO.Unsafe
47 -- |Convert parameter values to an 'AsciiBuilder'.
48 printParams ∷ Map CIAscii Text → AsciiBuilder
51 | otherwise = A.toAsciiBuilder "; " ⊕
52 joinWith "; " (map printPair $ M.toList params)
54 printPair ∷ (CIAscii, Text) → AsciiBuilder
55 printPair (name, value)
56 | T.any (> '\xFF') value
57 = printPairInUTF8 name value
59 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
61 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
62 printPairInUTF8 name value
63 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
64 A.toAsciiBuilder "*=utf-8''" ⊕
65 escapeUnsafeChars (encodeUtf8 value) (∅)
67 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
68 printPairInAscii name value
69 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
70 A.toAsciiBuilder "=" ⊕
71 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
74 A.toAsciiBuilder value
76 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
77 escapeUnsafeChars bs b
78 = case BS.uncons bs of
81 | isToken c → escapeUnsafeChars bs' $
82 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
83 | otherwise → escapeUnsafeChars bs' $
84 b ⊕ toHex (fromIntegral $ fromEnum c)
86 toHex ∷ Word8 → AsciiBuilder
87 toHex o = A.toAsciiBuilder "%" ⊕
88 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
89 , toHex' (o .&. 0x0F) ])
93 | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
94 | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
98 = InitialEncodedParam {
100 , epCharset ∷ !CIAscii
101 , epPayload ∷ !BS.ByteString
103 | ContinuedEncodedParam {
105 , epSection ∷ !Integer
106 , epPayload ∷ !BS.ByteString
110 , epSection ∷ !Integer
114 section ∷ ExtendedParam → Integer
115 section (InitialEncodedParam {..}) = 0
116 section ep = epSection ep
118 -- |'Parser' for parameter values.
119 paramsP ∷ Parser (Map CIAscii Text)
120 paramsP = decodeParams =≪ P.many (try paramP)
122 paramP ∷ Parser ExtendedParam
123 paramP = do skipMany lws
130 → do (charset, payload) ← initialEncodedValue
131 return $ InitialEncodedParam name charset payload
133 → do payload ← encodedPayload
134 return $ ContinuedEncodedParam name sect payload
136 → do payload ← token <|> quotedStr
137 return $ AsciiParam name sect payload
139 nameP ∷ Parser (CIAscii, Integer, Bool)
140 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
141 takeWhile1 (\c → isToken c ∧ c ≢ '*')
147 isEncoded ← option False $
150 return (name, sect, isEncoded)
152 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
154 = do charset ← metadata
156 _ ← metadata -- Ignore the language tag
158 payload ← encodedPayload
160 -- NOTE: I'm not sure this is the right thing, but RFC
161 -- 2231 doesn't tell us what we should do when the
162 -- charset is omitted.
163 return ("US-ASCII", payload)
165 return (charset, payload)
167 metadata ∷ Parser CIAscii
168 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
169 takeWhile (\c → isToken c ∧ c ≢ '\'')
171 encodedPayload ∷ Parser BS.ByteString
172 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
174 hexChar ∷ Parser BS.ByteString
175 hexChar = do _ ← char '%'
176 h ← satisfy isHexChar
177 l ← satisfy isHexChar
178 return $ BS.singleton $ hexToChar h l
180 isHexChar ∷ Char → Bool
181 isHexChar = inClass "0-9a-fA-F"
183 hexToChar ∷ Char → Char → Char
185 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
187 hexToInt ∷ Char → Int
189 | c ≤ '9' = ord c - ord '0'
190 | c ≤ 'F' = ord c - ord 'A' + 10
191 | otherwise = ord c - ord 'a' + 10
193 rawChars ∷ Parser BS.ByteString
194 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
196 decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
197 decodeParams = (mapM decodeSections =≪) ∘ sortBySection
199 sortBySection ∷ ∀m. Monad m
201 → m (Map CIAscii (Map Integer ExtendedParam))
202 sortBySection = flip go (∅)
205 → Map CIAscii (Map Integer ExtendedParam)
206 → m (Map CIAscii (Map Integer ExtendedParam))
209 = case M.lookup (epName x) m of
211 → let s = M.singleton (section x) x
212 m' = M.insert (epName x) s m
216 → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
218 → let m' = M.insert (epName x) s' m
222 → fail (concat [ "Duplicate section "
225 , A.toString $ A.fromCIAscii $ epName x
229 decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
230 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
232 toSeq ∷ Map Integer ExtendedParam
235 → m (Seq ExtendedParam)
236 toSeq m expectedSect sects
237 = case M.minViewWithKey m of
241 | sect ≡ expectedSect
242 → toSeq m' (expectedSect + 1) (sects ⊳ p)
244 → fail (concat [ "Missing section "
247 , A.toString $ A.fromCIAscii $ epName p
251 decodeSeq ∷ Seq ExtendedParam → m Text
253 = case S.viewl sects of
255 → fail "decodeSeq: internal error: empty seq"
256 InitialEncodedParam {..} :< xs
257 → do conv ← openConv epCharset
258 let t = TC.toUnicode conv epPayload
259 decodeSeq' (Just conv) xs $ S.singleton t
260 ContinuedEncodedParam {..} :< _
261 → fail "decodeSeq: internal error: CEP at section 0"
262 AsciiParam {..} :< xs
263 → let t = A.toText apPayload
265 decodeSeq' Nothing xs $ S.singleton t
267 decodeSeq' ∷ Maybe (TC.Converter)
271 decodeSeq' convM sects chunks
272 = case S.viewl sects of
274 → return $ T.concat $ toList chunks
275 InitialEncodedParam {..} :< _
276 → fail "decodeSeq': internal error: IEP at section > 0"
277 ContinuedEncodedParam {..} :< xs
280 → let t = TC.toUnicode conv epPayload
282 decodeSeq' convM xs $ chunks ⊳ t
284 → fail (concat [ "Section "
287 , A.toString $ A.fromCIAscii epName
288 , "' is encoded but its first section is not"
290 AsciiParam {..} :< xs
291 → let t = A.toText apPayload
293 decodeSeq' convM xs $ chunks ⊳ t
295 openConv ∷ CIAscii → m TC.Converter
297 = let cs = A.toString $ A.fromCIAscii charset
298 open' = TC.open cs (Just True)
300 case unsafePerformIO $ E.try open' of
301 Right conv → return conv
302 Left err → fail $ show (err ∷ ICUError)