8 -- |Provide facilities to encode/decode MIME parameter values in
9 -- character sets other than US-ASCII. See:
10 -- http://www.faqs.org/rfcs/rfc2231.html
11 module Network.HTTP.Lucu.RFC2231
16 import Control.Applicative
17 import Control.Monad.Unicode
18 import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
19 import qualified Data.Ascii as A
20 import Data.Attoparsec.Char8 as P
22 import qualified Data.ByteString.Char8 as BS
26 import qualified Data.Map as M
27 import Data.Monoid.Unicode
28 import qualified Data.Sequence as S
29 import Data.Sequence.Unicode hiding ((∅))
30 import Data.Text (Text)
31 import qualified Data.Text as T
32 import Data.Text.Encoding
33 import Data.Traversable
35 import Network.HTTP.Lucu.Parser.Http
36 import Network.HTTP.Lucu.Utils
37 import Prelude hiding (concat, mapM, takeWhile)
38 import Prelude.Unicode
40 printParams ∷ Map CIAscii Text → AsciiBuilder
43 | otherwise = A.toAsciiBuilder "; " ⊕
44 joinWith "; " (map printPair $ M.toList params)
46 printPair ∷ (CIAscii, Text) → AsciiBuilder
47 printPair (name, value)
48 | T.any (> '\xFF') value
49 = printPairInUTF8 name value
51 = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
53 printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
54 printPairInUTF8 name value
55 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
56 A.toAsciiBuilder "*=utf-8''" ⊕
57 escapeUnsafeChars (encodeUtf8 value) (∅)
59 printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
60 printPairInAscii name value
61 = A.toAsciiBuilder (A.fromCIAscii name) ⊕
62 A.toAsciiBuilder "=" ⊕
63 if BS.any ((¬) ∘ isToken) (A.toByteString value) then
66 A.toAsciiBuilder value
68 escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
69 escapeUnsafeChars bs b
70 = case BS.uncons bs of
73 | isToken c → escapeUnsafeChars bs' $
74 b ⊕ A.toAsciiBuilder (A.unsafeFromString [c])
75 | otherwise → escapeUnsafeChars bs' $
76 b ⊕ toHex (fromIntegral $ fromEnum c)
78 toHex ∷ Word8 → AsciiBuilder
79 toHex o = A.toAsciiBuilder "%" ⊕
80 A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
81 , toHex' (o .&. 0x0F) ])
85 | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
86 | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
90 = InitialEncodedParam {
92 , epCharset ∷ !CIAscii
93 , epPayload ∷ !BS.ByteString
95 | ContinuedEncodedParam {
97 , epSection ∷ !Integer
98 , epPayload ∷ !BS.ByteString
102 , epSection ∷ !Integer
106 section ∷ ExtendedParam → Integer
107 section (InitialEncodedParam {..}) = 0
108 section ep = epSection ep
110 paramsP ∷ Parser (Map CIAscii Text)
111 paramsP = decodeParams =≪ P.many (try paramP)
113 paramP ∷ Parser ExtendedParam
114 paramP = do skipMany lws
121 → do (charset, payload) ← initialEncodedValue
122 return $ InitialEncodedParam name charset payload
124 → do payload ← encodedPayload
125 return $ ContinuedEncodedParam name sect payload
127 → do payload ← token <|> quotedStr
128 return $ AsciiParam name sect payload
130 nameP ∷ Parser (CIAscii, Integer, Bool)
131 nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
132 takeWhile1 (\c → isToken c ∧ c ≢ '*')
138 isEncoded ← option False $
141 return (name, sect, isEncoded)
143 initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
144 initialEncodedValue = do charset ← metadata
146 _ ← metadata -- Ignore the language tag
148 payload ← encodedPayload
149 return (charset, payload)
151 metadata ∷ Parser CIAscii
152 metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
153 takeWhile (\c → isToken c ∧ c ≢ '\'')
155 encodedPayload ∷ Parser BS.ByteString
156 encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
158 hexChar ∷ Parser BS.ByteString
159 hexChar = do _ ← char '%'
160 h ← satisfy isHexChar
161 l ← satisfy isHexChar
162 return $ BS.singleton $ hexToChar h l
164 isHexChar ∷ Char → Bool
165 isHexChar = inClass "0-9a-fA-F"
167 hexToChar ∷ Char → Char → Char
169 = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
171 hexToInt ∷ Char → Int
173 | c ≤ '9' = ord c - ord '0'
174 | c ≤ 'F' = ord c - ord 'A' + 10
175 | otherwise = ord c - ord 'a' + 10
177 rawChars ∷ Parser BS.ByteString
178 rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
180 decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
181 decodeParams = (mapM decodeSections =≪) ∘ sortBySection
183 sortBySection ∷ ∀m. Monad m
185 → m (Map CIAscii (Map Integer ExtendedParam))
186 sortBySection = flip go (∅)
189 → Map CIAscii (Map Integer ExtendedParam)
190 → m (Map CIAscii (Map Integer ExtendedParam))
193 = case M.lookup (epName x) m of
195 → let s = M.singleton (section x) x
196 m' = M.insert (epName x) s m
200 → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
202 → let m' = M.insert (epName x) s' m
206 → fail (concat [ "Duplicate section "
209 , A.toString $ A.fromCIAscii $ epName x
213 decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
214 decodeSections = flip (flip go 0) (∅)
216 go ∷ Map Integer ExtendedParam → Integer → S.Seq Text → m Text
217 go m expectedSect chunks
218 = case M.minViewWithKey m of
220 → return $ T.concat $ toList chunks
222 | sect ≡ expectedSect
225 → fail (concat [ "Missing section "
228 , A.toString $ A.fromCIAscii $ epName p