From 7a7fc1ababca9cfc667870b1c7da78378072bb6b Mon Sep 17 00:00:00 2001 From: PHO Date: Wed, 17 Aug 2011 11:31:00 +0900 Subject: [PATCH] Still working on RFC2231 Ditz-issue: 8959dadc07db1bd363283dee401073f6e48dc7fa --- Network/HTTP/Lucu/Parser/Http.hs | 6 +- Network/HTTP/Lucu/RFC2231.hs | 286 +++++++++++++++++++------------ 2 files changed, 176 insertions(+), 116 deletions(-) diff --git a/Network/HTTP/Lucu/Parser/Http.hs b/Network/HTTP/Lucu/Parser/Http.hs index 4ac11a4..4138db2 100644 --- a/Network/HTTP/Lucu/Parser/Http.hs +++ b/Network/HTTP/Lucu/Parser/Http.hs @@ -41,7 +41,7 @@ import qualified Data.Attoparsec.FastSet as FS import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as LS import qualified Data.ByteString.Lazy.Internal as LS -import qualified Data.Foldable as F +import Data.Foldable import Data.Monoid import Data.Monoid.Unicode import qualified Data.Sequence as S @@ -192,7 +192,7 @@ instance Monoid CharAccumState where lastChunk ∷ CharAccumState → BS.ByteString {-# INLINE lastChunk #-} -lastChunk = BS.pack ∘ F.toList ∘ casLastChunk +lastChunk = BS.pack ∘ toList ∘ casLastChunk snoc ∷ CharAccumState → Char → CharAccumState {-# INLINEABLE snoc #-} @@ -210,7 +210,7 @@ snoc cas c finish ∷ CharAccumState → LS.ByteString {-# INLINEABLE finish #-} finish cas - = let chunks = F.toList $ casChunks cas ⊳ lastChunk cas + = let chunks = toList $ casChunks cas ⊳ lastChunk cas str = LS.fromChunks chunks in str diff --git a/Network/HTTP/Lucu/RFC2231.hs b/Network/HTTP/Lucu/RFC2231.hs index e0f6e42..a8e29cb 100644 --- a/Network/HTTP/Lucu/RFC2231.hs +++ b/Network/HTTP/Lucu/RFC2231.hs @@ -1,6 +1,8 @@ {-# LANGUAGE DoAndIfThenElse , OverloadedStrings + , RecordWildCards + , ScopedTypeVariables , UnicodeSyntax #-} -- |Provide facilities to encode/decode MIME parameter values in @@ -12,22 +14,27 @@ module Network.HTTP.Lucu.RFC2231 ) where import Control.Applicative +import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii, AsciiBuilder) import qualified Data.Ascii as A import Data.Attoparsec.Char8 as P import Data.Bits import qualified Data.ByteString.Char8 as BS import Data.Char +import Data.Foldable import Data.Map (Map) import qualified Data.Map as M import Data.Monoid.Unicode +import qualified Data.Sequence as S +import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding +import Data.Traversable import Data.Word import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.Utils -import Prelude hiding (takeWhile) +import Prelude hiding (concat, mapM, takeWhile) import Prelude.Unicode printParams ∷ Map CIAscii Text → AsciiBuilder @@ -35,48 +42,48 @@ printParams params | M.null params = (∅) | otherwise = A.toAsciiBuilder "; " ⊕ joinWith "; " (map printPair $ M.toList params) - where - printPair ∷ (CIAscii, Text) → AsciiBuilder - printPair (name, value) - | T.any (> '\xFF') value - = printPairInUTF8 name value - | otherwise - = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) - - printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder - printPairInUTF8 name value - = A.toAsciiBuilder (A.fromCIAscii name) ⊕ - A.toAsciiBuilder "*=utf-8''" ⊕ - escapeUnsafeChars (encodeUtf8 value) (∅) - - printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder - printPairInAscii name value - = A.toAsciiBuilder (A.fromCIAscii name) ⊕ - A.toAsciiBuilder "=" ⊕ - if BS.any ((¬) ∘ isToken) (A.toByteString value) then - quoteStr value - else - A.toAsciiBuilder value - - escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder - escapeUnsafeChars bs b - = case BS.uncons bs of - Nothing → b - Just (c, bs') - | isToken c → escapeUnsafeChars bs' $ - b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) - | otherwise → escapeUnsafeChars bs' $ - b ⊕ toHex (fromIntegral $ fromEnum c) - - toHex ∷ Word8 → AsciiBuilder - toHex o = A.toAsciiBuilder "%" ⊕ - A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) - , toHex' (o .&. 0x0F) ]) - - toHex' ∷ Word8 → Char - toHex' o - | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o - | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A) + +printPair ∷ (CIAscii, Text) → AsciiBuilder +printPair (name, value) + | T.any (> '\xFF') value + = printPairInUTF8 name value + | otherwise + = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value) + +printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder +printPairInUTF8 name value + = A.toAsciiBuilder (A.fromCIAscii name) ⊕ + A.toAsciiBuilder "*=utf-8''" ⊕ + escapeUnsafeChars (encodeUtf8 value) (∅) + +printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder +printPairInAscii name value + = A.toAsciiBuilder (A.fromCIAscii name) ⊕ + A.toAsciiBuilder "=" ⊕ + if BS.any ((¬) ∘ isToken) (A.toByteString value) then + quoteStr value + else + A.toAsciiBuilder value + +escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder +escapeUnsafeChars bs b + = case BS.uncons bs of + Nothing → b + Just (c, bs') + | isToken c → escapeUnsafeChars bs' $ + b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) + | otherwise → escapeUnsafeChars bs' $ + b ⊕ toHex (fromIntegral $ fromEnum c) + +toHex ∷ Word8 → AsciiBuilder +toHex o = A.toAsciiBuilder "%" ⊕ + A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) + , toHex' (o .&. 0x0F) ]) + +toHex' ∷ Word8 → Char +toHex' o + | o ≤ 0x09 = toEnum $ fromIntegral $ fromEnum '0' + fromIntegral o + | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (o - 0x0A) data ExtendedParam @@ -96,75 +103,128 @@ data ExtendedParam , apPayload ∷ !Ascii } +section ∷ ExtendedParam → Integer +section (InitialEncodedParam {..}) = 0 +section ep = epSection ep + paramsP ∷ Parser (Map CIAscii Text) -paramsP = decodeParams <$> P.many (try paramP) +paramsP = decodeParams =≪ P.many (try paramP) + +paramP ∷ Parser ExtendedParam +paramP = do skipMany lws + _ ← char ';' + skipMany lws + epm ← nameP + _ ← char '=' + case epm of + (name, 0, True) + → do (charset, payload) ← initialEncodedValue + return $ InitialEncodedParam name charset payload + (name, sect, True) + → do payload ← encodedPayload + return $ ContinuedEncodedParam name sect payload + (name, sect, False) + → do payload ← token <|> quotedStr + return $ AsciiParam name sect payload + +nameP ∷ Parser (CIAscii, Integer, Bool) +nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> + takeWhile1 (\c → isToken c ∧ c ≢ '*') + sect ← option 0 $ + try $ + do _ ← char '*' + n ← decimal + return n + isEncoded ← option False $ + do _ ← char '*' + return True + return (name, sect, isEncoded) + +initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) +initialEncodedValue = do charset ← metadata + _ ← char '\'' + _ ← metadata -- Ignore the language tag + _ ← char '\'' + payload ← encodedPayload + return (charset, payload) + where + metadata ∷ Parser CIAscii + metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> + takeWhile (\c → isToken c ∧ c ≢ '\'') + +encodedPayload ∷ Parser BS.ByteString +encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars) + +hexChar ∷ Parser BS.ByteString +hexChar = do _ ← char '%' + h ← satisfy isHexChar + l ← satisfy isHexChar + return $ BS.singleton $ hexToChar h l + +isHexChar ∷ Char → Bool +isHexChar = inClass "0-9a-fA-F" + +hexToChar ∷ Char → Char → Char +hexToChar h l + = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l + +hexToInt ∷ Char → Int +hexToInt c + | c ≤ '9' = ord c - ord '0' + | c ≤ 'F' = ord c - ord 'A' + 10 + | otherwise = ord c - ord 'a' + 10 + +rawChars ∷ Parser BS.ByteString +rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%') + +decodeParams ∷ Monad m ⇒ [ExtendedParam] → m (Map CIAscii Text) +decodeParams = (mapM decodeSections =≪) ∘ sortBySection + +sortBySection ∷ ∀m. Monad m + ⇒ [ExtendedParam] + → m (Map CIAscii (Map Integer ExtendedParam)) +sortBySection = flip go (∅) + where + go ∷ [ExtendedParam] + → Map CIAscii (Map Integer ExtendedParam) + → m (Map CIAscii (Map Integer ExtendedParam)) + go [] m = return m + go (x:xs) m + = case M.lookup (epName x) m of + Nothing + → let s = M.singleton (section x) x + m' = M.insert (epName x) s m + in + go xs m' + Just s + → case M.insertLookupWithKey (\_ s' _ → s') (section x) x s of + (Nothing, s') + → let m' = M.insert (epName x) s' m + in + go xs m' + (Just _, _) + → fail (concat [ "Duplicate section " + , show $ section x + , " for parameter '" + , A.toString $ A.fromCIAscii $ epName x + , "'" + ]) + +decodeSections ∷ ∀m. Monad m ⇒ Map Integer ExtendedParam → m Text +decodeSections = flip (flip go 0) (∅) where - paramP ∷ Parser ExtendedParam - paramP = do skipMany lws - _ ← char ';' - skipMany lws - epm ← nameP - _ ← char '=' - case epm of - (name, 0, True) - → do (charset, payload) ← initialEncodedValue - return $ InitialEncodedParam name charset payload - (name, section, True) - → do payload ← encodedPayload - return $ ContinuedEncodedParam name section payload - (name, section, False) - → do payload ← token <|> quotedStr - return $ AsciiParam name section payload - - nameP ∷ Parser (CIAscii, Integer, Bool) - nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> - takeWhile1 (\c → isToken c ∧ c ≢ '*') - section ← option 0 $ - try $ - do _ ← char '*' - n ← decimal - return n - isEncoded ← option False $ - do _ ← char '*' - return True - return (name, section, isEncoded) - - initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) - initialEncodedValue = do charset ← metadata - _ ← char '\'' - _ ← metadata -- Ignore the language tag - _ ← char '\'' - payload ← encodedPayload - return (charset, payload) - where - metadata ∷ Parser CIAscii - metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> - takeWhile (\c → isToken c ∧ c ≢ '\'') - - encodedPayload ∷ Parser BS.ByteString - encodedPayload = BS.concat <$> P.many (hexChar <|> literal) - where - hexChar ∷ Parser BS.ByteString - hexChar = do _ ← char '%' - h ← satisfy isHexChar - l ← satisfy isHexChar - return $ BS.singleton $ hexToChar h l - - isHexChar ∷ Char → Bool - isHexChar = inClass "0-9a-fA-F" - - hexToChar ∷ Char → Char → Char - hexToChar h l - = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l - - hexToInt ∷ Char → Int - hexToInt c - | c ≤ '9' = ord c - ord '0' - | c ≤ 'F' = ord c - ord 'A' + 10 - | otherwise = ord c - ord 'a' + 10 - - literal ∷ Parser BS.ByteString - literal = takeWhile1 (\c → isToken c ∧ c ≢ '%') - - decodeParams ∷ [ExtendedParam] → Map CIAscii Text - decodeParams = error "FIXME" + go ∷ Map Integer ExtendedParam → Integer → S.Seq Text → m Text + go m expectedSect chunks + = case M.minViewWithKey m of + Nothing + → return $ T.concat $ toList chunks + Just ((sect, p), m') + | sect ≡ expectedSect + → error "FIXME" + | otherwise + → fail (concat [ "Missing section " + , show $ section p + , " for parameter '" + , A.toString $ A.fromCIAscii $ epName p + , "'" + ]) -- 2.40.0