, ScopedTypeVariables
, UnicodeSyntax
#-}
--- |Provide facilities to encode/decode MIME parameter values in
+-- |Provide functionalities to encode/decode MIME parameter values in
-- character sets other than US-ASCII. See:
--- http://www.faqs.org/rfcs/rfc2231.html
+-- <http://www.faqs.org/rfcs/rfc2231.html>
+--
+-- You usually don't have to use this module directly.
module Network.HTTP.Lucu.RFC2231
( printParams
, paramsP
)
where
import Control.Applicative
+import qualified Control.Exception as E
+import Control.Monad hiding (mapM)
import Control.Monad.Unicode
import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
import qualified Data.Ascii as A
import Data.Map (Map)
import qualified Data.Map as M
import Data.Monoid.Unicode
+import Data.Sequence (Seq, ViewL(..))
import qualified Data.Sequence as S
import Data.Sequence.Unicode hiding ((∅))
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.ICU.Convert as TC
import Data.Text.Encoding
+import Data.Text.ICU.Error
import Data.Traversable
import Data.Word
import Network.HTTP.Lucu.Parser.Http
import Network.HTTP.Lucu.Utils
import Prelude hiding (concat, mapM, takeWhile)
import Prelude.Unicode
+import System.IO.Unsafe
+-- |Convert parameter values to an 'AsciiBuilder'.
printParams ∷ Map CIAscii Text → AsciiBuilder
-printParams params
- | M.null params = (∅)
- | otherwise = A.toAsciiBuilder "; " ⊕
- joinWith "; " (map printPair $ M.toList params)
+{-# INLINEABLE printParams #-}
+printParams m = M.foldlWithKey f (∅) m
+ -- THINKME: Use foldlWithKey' for newer Data.Map
+ where
+ f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder
+ {-# INLINE f #-}
+ f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v
-printPair ∷ (CIAscii, Text) → AsciiBuilder
-printPair (name, value)
+printPair ∷ CIAscii → Text → AsciiBuilder
+{-# INLINEABLE printPair #-}
+printPair name value
| T.any (> '\xFF') value
= printPairInUTF8 name value
| otherwise
= printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
+{-# INLINEABLE printPairInUTF8 #-}
printPairInUTF8 name value
= A.toAsciiBuilder (A.fromCIAscii name) ⊕
A.toAsciiBuilder "*=utf-8''" ⊕
escapeUnsafeChars (encodeUtf8 value) (∅)
printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
+{-# INLINEABLE printPairInAscii #-}
printPairInAscii name value
= A.toAsciiBuilder (A.fromCIAscii name) ⊕
A.toAsciiBuilder "=" ⊕
A.toAsciiBuilder value
escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
+{-# INLINEABLE escapeUnsafeChars #-}
escapeUnsafeChars bs b
= case BS.uncons bs of
Nothing → b
b ⊕ toHex (fromIntegral $ fromEnum c)
toHex ∷ Word8 → AsciiBuilder
+{-# INLINEABLE toHex #-}
toHex o = A.toAsciiBuilder "%" ⊕
A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8)
, toHex' (o .&. 0x0F) ])
-
-toHex' ∷ Word8 → Char
-toHex' o
- | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o
- | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A)
-
+ where
+ toHex' ∷ Word8 → Char
+ {-# INLINEABLE toHex' #-}
+ toHex' h
+ | h ≤ 0x09 = toEnum $ fromIntegral
+ $ fromEnum '0' + fromIntegral h
+ | otherwise = toEnum $ fromIntegral
+ $ fromEnum 'A' + fromIntegral (h - 0x0A)
data ExtendedParam
= InitialEncodedParam {
}
section ∷ ExtendedParam → Integer
+{-# INLINE section #-}
section (InitialEncodedParam {..}) = 0
section ep = epSection ep
+-- |'Parser' for parameter values.
paramsP ∷ Parser (Map CIAscii Text)
+{-# INLINEABLE paramsP #-}
paramsP = decodeParams =≪ P.many (try paramP)
paramP ∷ Parser ExtendedParam
paramP = do skipMany lws
- _ ← char ';'
+ void $ char ';'
skipMany lws
epm ← nameP
- _ ← char '='
+ void $ char '='
case epm of
(name, 0, True)
→ do (charset, payload) ← initialEncodedValue
nameP ∷ Parser (CIAscii, Integer, Bool)
nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$>
takeWhile1 (\c → isToken c ∧ c ≢ '*')
- sect ← option 0 $
- try $
- do _ ← char '*'
- n ← decimal
- return n
- isEncoded ← option False $
- do _ ← char '*'
- return True
+ sect ← option 0 $ try (char '*' *> decimal )
+ isEncoded ← option False $ try (char '*' *> pure True)
return (name, sect, isEncoded)
initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
-initialEncodedValue = do charset ← metadata
- _ ← char '\''
- _ ← metadata -- Ignore the language tag
- _ ← char '\''
- payload ← encodedPayload
- return (charset, payload)
+initialEncodedValue
+ = do charset ← metadata
+ void $ char '\''
+ void $ metadata -- Ignore the language tag
+ void $ char '\''
+ payload ← encodedPayload
+ if charset ≡ "" then
+ -- NOTE: I'm not sure this is the right thing, but RFC
+ -- 2231 doesn't tell us what we should do when the
+ -- charset is omitted.
+ return ("US-ASCII", payload)
+ else
+ return (charset, payload)
where
metadata ∷ Parser CIAscii
metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$>
- takeWhile (\c → isToken c ∧ c ≢ '\'')
+ takeWhile (\c → c ≢ '\'' ∧ isToken c)
encodedPayload ∷ Parser BS.ByteString
+{-# INLINE encodedPayload #-}
encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars)
hexChar ∷ Parser BS.ByteString
-hexChar = do _ ← char '%'
+{-# INLINEABLE hexChar #-}
+hexChar = do void $ char '%'
h ← satisfy isHexChar
l ← satisfy isHexChar
return $ BS.singleton $ hexToChar h l
isHexChar = inClass "0-9a-fA-F"
hexToChar ∷ Char → Char → Char
+{-# INLINE hexToChar #-}
hexToChar h l
= chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
hexToInt ∷ Char → Int
+{-# INLINEABLE hexToInt #-}
hexToInt c
| c ≤ '9' = ord c - ord '0'
| c ≤ 'F' = ord c - ord 'A' + 10
| otherwise = ord c - ord 'a' + 10
rawChars ∷ Parser BS.ByteString
+{-# INLINE rawChars #-}
rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text)
+{-# INLINE decodeParams #-}
decodeParams = (mapM decodeSections =≪) ∘ sortBySection
sortBySection ∷ ∀m. Monad m
in
go xs m'
Just s
- → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of
- (Nothing, s')
- → let m' = M.insert (epName x) s' m
+ → case M.lookup (section x) s of
+ Nothing
+ → let s' = M.insert (section x) x s
+ m' = M.insert (epName x) s' m
in
go xs m'
- (Just _, _)
+ Just _
→ fail (concat [ "Duplicate section "
, show $ section x
, " for parameter '"
])
decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text
-decodeSections = flip (flip go 0) (∅)
+decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
where
- go ∷ Map Integer ExtendedParam → Integer → S.Seq Text → m Text
- go m expectedSect chunks
+ toSeq ∷ Map Integer ExtendedParam
+ → Integer
+ → Seq ExtendedParam
+ → m (Seq ExtendedParam)
+ toSeq m expectedSect sects
= case M.minViewWithKey m of
Nothing
- → return $ T.concat $ toList chunks
+ → return sects
Just ((sect, p), m')
| sect ≡ expectedSect
- → error "FIXME"
+ → toSeq m' (expectedSect + 1) (sects ⊳ p)
| otherwise
→ fail (concat [ "Missing section "
, show $ section p
, A.toString $ A.fromCIAscii $ epName p
, "'"
])
+
+ decodeSeq ∷ Seq ExtendedParam → m Text
+ decodeSeq sects
+ = case S.viewl sects of
+ EmptyL
+ → fail "decodeSeq: internal error: empty seq"
+ InitialEncodedParam {..} :< xs
+ → do conv ← openConv epCharset
+ let t = TC.toUnicode conv epPayload
+ decodeSeq' (Just conv) xs $ S.singleton t
+ ContinuedEncodedParam {..} :< _
+ → fail "decodeSeq: internal error: CEP at section 0"
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' Nothing xs $ S.singleton t
+
+ decodeSeq' ∷ Maybe (TC.Converter)
+ → Seq ExtendedParam
+ → Seq Text
+ → m Text
+ decodeSeq' convM sects chunks
+ = case S.viewl sects of
+ EmptyL
+ → return $ T.concat $ toList chunks
+ InitialEncodedParam {..} :< _
+ → fail "decodeSeq': internal error: IEP at section > 0"
+ ContinuedEncodedParam {..} :< xs
+ → case convM of
+ Just conv
+ → let t = TC.toUnicode conv epPayload
+ in
+ decodeSeq' convM xs $ chunks ⊳ t
+ Nothing
+ → fail (concat [ "Section "
+ , show epSection
+ , " for parameter '"
+ , A.toString $ A.fromCIAscii epName
+ , "' is encoded but its first section is not"
+ ])
+ AsciiParam {..} :< xs
+ → let t = A.toText apPayload
+ in
+ decodeSeq' convM xs $ chunks ⊳ t
+
+ openConv ∷ CIAscii → m TC.Converter
+ openConv charset
+ = let cs = A.toString $ A.fromCIAscii charset
+ open' = TC.open cs (Just True)
+ in
+ case unsafePerformIO $ E.try open' of
+ Right conv → return conv
+ Left err → fail $ show (err ∷ ICUError)