8 -- |Provide facilities to encode/decode MIME parameter values in
9 -- character sets other than US-ASCII. See:
10 -- http://www.faqs.org/rfcs/rfc2231.html
11 module Network.HTTP.Lucu.RFC2231
16 import Control.Applicative
17 import qualified Control.Exception as E
18 import Control.Monad.Unicode
19 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
20 import qualified Data.Ascii as A
21 import Data.Attoparsec.Char8 as P
23 import qualified Data.ByteString.Char8 as BS
27 import qualified Data.Map as M
28 import Data.Monoid.Unicode
29 import Data.Sequence (Seq, ViewL(..))
30 import qualified Data.Sequence as S
31 import Data.Sequence.Unicode hiding ((∅))
32 import Data.Text (Text)
33 import qualified Data.Text as T
34 import qualified Data.Text.ICU.Convert as TC
35 import Data.Text.ICU.Error
36 import Data.Text.Encoding
37 import Data.Traversable
39 import Network.HTTP.Lucu.Parser.Http
40 import Network.HTTP.Lucu.Utils
41 import Prelude hiding (concat, mapM, takeWhile)
42 import Prelude.Unicode
43 import System.IO.Unsafe
45 printParams ∷ Map CIAscii Text → AsciiBuilder
48 | otherwise = A.toAsciiBuilder "; " ⊕
49 joinWith "; " (map printPair $ M.toList params)
51 printPair ∷ (CIAscii, Text) → AsciiBuilder
52 printPair (name, value)
53 | T.any (> '\xFF') value
54 = printPairInUTF8 name value
56 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
58 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
59 printPairInUTF8 name value
60 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
61 A.toAsciiBuilder "*=utf-8''" ⊕
62 escapeUnsafeChars (encodeUtf8 value) (∅)
64 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
65 printPairInAscii name value
66 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
67 A.toAsciiBuilder "=" ⊕
68 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
71 A.toAsciiBuilder value
73 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
74 escapeUnsafeChars bs b
75 = case BS.uncons bs of
78 | isToken c → escapeUnsafeChars bs' $
79 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
80 | otherwise → escapeUnsafeChars bs' $
81 b ⊕ toHex (fromIntegral $ fromEnum c)
83 toHex ∷ Word8 → AsciiBuilder
84 toHex o = A.toAsciiBuilder "%" ⊕
85 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
86 , toHex' (o .&. 0x0F) ])
90 | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
91 | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
95 = InitialEncodedParam {
97 , epCharset ∷ !CIAscii
98 , epPayload ∷ !BS.ByteString
100 | ContinuedEncodedParam {
102 , epSection ∷ !Integer
103 , epPayload ∷ !BS.ByteString
107 , epSection ∷ !Integer
111 section ∷ ExtendedParam → Integer
112 section (InitialEncodedParam {..}) = 0
113 section ep = epSection ep
115 paramsP ∷ Parser (Map CIAscii Text)
116 paramsP = decodeParams =≪ P.many (try paramP)
118 paramP ∷ Parser ExtendedParam
119 paramP = do skipMany lws
126 → do (charset, payload) ← initialEncodedValue
127 return $ InitialEncodedParam name charset payload
129 → do payload ← encodedPayload
130 return $ ContinuedEncodedParam name sect payload
132 → do payload ← token <|> quotedStr
133 return $ AsciiParam name sect payload
135 nameP ∷ Parser (CIAscii, Integer, Bool)
136 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
137 takeWhile1 (\c → isToken c ∧ c ≢ '*')
143 isEncoded ← option False $
146 return (name, sect, isEncoded)
148 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
150 = do charset ← metadata
152 _ ← metadata -- Ignore the language tag
154 payload ← encodedPayload
156 -- NOTE: I'm not sure this is the right thing, but RFC
157 -- 2231 doesn't tell us what we should do when the
158 -- charset is omitted.
159 return ("US-ASCII", payload)
161 return (charset, payload)
163 metadata ∷ Parser CIAscii
164 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
165 takeWhile (\c → isToken c ∧ c ≢ '\'')
167 encodedPayload ∷ Parser BS.ByteString
168 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
170 hexChar ∷ Parser BS.ByteString
171 hexChar = do _ ← char '%'
172 h ← satisfy isHexChar
173 l ← satisfy isHexChar
174 return $ BS.singleton $ hexToChar h l
176 isHexChar ∷ Char → Bool
177 isHexChar = inClass "0-9a-fA-F"
179 hexToChar ∷ Char → Char → Char
181 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
183 hexToInt ∷ Char → Int
185 | c ≤ '9' = ord c - ord '0'
186 | c ≤ 'F' = ord c - ord 'A' + 10
187 | otherwise = ord c - ord 'a' + 10
189 rawChars ∷ Parser BS.ByteString
190 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
192 decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
193 decodeParams = (mapM decodeSections =≪) ∘ sortBySection
195 sortBySection ∷ ∀m. Monad m
197 → m (Map CIAscii (Map Integer ExtendedParam))
198 sortBySection = flip go (∅)
201 → Map CIAscii (Map Integer ExtendedParam)
202 → m (Map CIAscii (Map Integer ExtendedParam))
205 = case M.lookup (epName x) m of
207 → let s = M.singleton (section x) x
208 m' = M.insert (epName x) s m
212 → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
214 → let m' = M.insert (epName x) s' m
218 → fail (concat [ "Duplicate section "
221 , A.toString $ A.fromCIAscii $ epName x
225 decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
226 decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
228 toSeq ∷ Map Integer ExtendedParam
231 → m (Seq ExtendedParam)
232 toSeq m expectedSect sects
233 = case M.minViewWithKey m of
237 | sect ≡ expectedSect
238 → toSeq m' (expectedSect + 1) (sects ⊳ p)
240 → fail (concat [ "Missing section "
243 , A.toString $ A.fromCIAscii $ epName p
247 decodeSeq ∷ Seq ExtendedParam → m Text
249 = case S.viewl sects of
251 → fail "decodeSeq: internal error: empty seq"
252 InitialEncodedParam {..} :< xs
253 → do conv ← openConv epCharset
254 let t = TC.toUnicode conv epPayload
255 decodeSeq' (Just conv) xs $ S.singleton t
256 ContinuedEncodedParam {..} :< _
257 → fail "decodeSeq: internal error: CEP at section 0"
258 AsciiParam {..} :< xs
259 → let t = A.toText apPayload
261 decodeSeq' Nothing xs $ S.singleton t
263 decodeSeq' ∷ Maybe (TC.Converter)
267 decodeSeq' convM sects chunks
268 = case S.viewl sects of
270 → return $ T.concat $ toList chunks
271 InitialEncodedParam {..} :< _
272 → fail "decodeSeq': internal error: IEP at section > 0"
273 ContinuedEncodedParam {..} :< xs
276 → let t = TC.toUnicode conv epPayload
278 decodeSeq' convM xs $ chunks ⊳ t
280 → fail (concat [ "Section "
283 , A.toString $ A.fromCIAscii epName
284 , "' is encoded but its first section is not"
286 AsciiParam {..} :< xs
287 → let t = A.toText apPayload
289 decodeSeq' convM xs $ chunks ⊳ t
291 openConv ∷ CIAscii → m TC.Converter
293 = let cs = A.toString $ A.fromCIAscii charset
294 open' = TC.open cs (Just True)
296 case unsafePerformIO $ E.try open' of
297 Right conv → return conv
298 Left err → fail $ show (err ∷ ICUError)