X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMIMEParams.hs;h=e4e4271a6fcdd405b25804c113778f4158ae344a;hp=b3edeb5836745779d8f2820eae92b90d19f98fcf;hb=90fca0675b1694e69b8e431c989343855cbd125d;hpb=5e561403ba8ad9c440cc2bf2bacb61ebc3c7a111 diff --git a/Network/HTTP/Lucu/MIMEParams.hs b/Network/HTTP/Lucu/MIMEParams.hs index b3edeb5..e4e4271 100644 --- a/Network/HTTP/Lucu/MIMEParams.hs +++ b/Network/HTTP/Lucu/MIMEParams.hs @@ -1,72 +1,73 @@ {-# LANGUAGE DeriveDataTypeable , DoAndIfThenElse + , FlexibleInstances , GeneralizedNewtypeDeriving + , MultiParamTypeClasses , OverloadedStrings , RecordWildCards , TemplateHaskell + , TypeSynonymInstances , UnicodeSyntax #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# OPTIONS_GHC -fno-warn-missing-methods #-} -- |Parsing and printing MIME parameter values -- (). module Network.HTTP.Lucu.MIMEParams - ( MIMEParams(..) - , printMIMEParams - , mimeParams + ( MIMEParams ) where -import Control.Applicative +import Control.Applicative hiding (empty) import Control.Monad hiding (mapM) import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii, AsciiBuilder) import qualified Data.Ascii as A -import Data.Attoparsec.Char8 as P +import Data.Attoparsec.Char8 import Data.Bits import qualified Data.ByteString.Char8 as BS import Data.Char -import Data.Data -import Data.Foldable -import Data.Map (Map) -import qualified Data.Map as M -import Data.Monoid +import Data.Collections +import Data.Collections.BaseInstances () +import qualified Data.Collections.Newtype.TH as C +import Data.Convertible.Base +import Data.Convertible.Instances.Ascii () +import Data.Convertible.Utils +import Data.Default +import qualified Data.Map as M (Map) import Data.Monoid.Unicode -import Data.Sequence (Seq, ViewL(..)) -import qualified Data.Sequence as S -import Data.Sequence.Unicode hiding ((∅)) +import Data.Sequence (Seq) import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding import Data.Text.Encoding.Error -import Data.Traversable import Data.Word -import Language.Haskell.TH.Syntax +import Network.HTTP.Lucu.MIMEParams.Internal +import Network.HTTP.Lucu.OrphanInstances () import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.Utils -import Prelude hiding (concat, mapM, takeWhile) +import Prelude hiding (concat, lookup, mapM, takeWhile) import Prelude.Unicode --- |A map from MIME parameter attributes to values. Attributes are --- always case-insensitive according to RFC 2045 --- (). -newtype MIMEParams - = MIMEParams (Map CIAscii Text) - deriving (Eq, Show, Read, Monoid, Typeable) +C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text) + instance Foldable MIMEParams (CIAscii, Text) + instance Collection MIMEParams (CIAscii, Text) + instance Indexed MIMEParams CIAscii Text + instance Map MIMEParams CIAscii Text + instance SortingCollection MIMEParams (CIAscii, Text) + |] -instance Lift MIMEParams where - lift (MIMEParams m) = [| MIMEParams $(liftParams m) |] - where - liftParams ∷ Map CIAscii Text → Q Exp - liftParams = liftMap liftCIAscii liftText +instance ConvertSuccess MIMEParams Ascii where + {-# INLINE convertSuccess #-} + convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder) --- |Convert MIME parameter values to an 'AsciiBuilder'. -printMIMEParams ∷ MIMEParams → AsciiBuilder -{-# INLINEABLE printMIMEParams #-} -printMIMEParams (MIMEParams m) = M.foldlWithKey f (∅) m - -- THINKME: Use foldlWithKey' for newer Data.Map - where - f ∷ AsciiBuilder → CIAscii → Text → AsciiBuilder - {-# INLINE f #-} - f ab k v = ab ⊕ A.toAsciiBuilder "; " ⊕ printPair k v +instance ConvertSuccess MIMEParams AsciiBuilder where + {-# INLINEABLE convertSuccess #-} + convertSuccess = foldl' f (∅) + where + f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder + {-# INLINE f #-} + f ab (k, v) = ab ⊕ cs ("; " ∷ Ascii) ⊕ printPair k v printPair ∷ CIAscii → Text → AsciiBuilder {-# INLINEABLE printPair #-} @@ -79,19 +80,19 @@ printPair name value printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder {-# INLINEABLE printPairInUTF8 #-} printPairInUTF8 name value - = A.toAsciiBuilder (A.fromCIAscii name) ⊕ - A.toAsciiBuilder "*=utf-8''" ⊕ + = cs name ⊕ + cs ("*=utf-8''" ∷ Ascii) ⊕ escapeUnsafeChars (encodeUtf8 value) (∅) printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder {-# INLINEABLE printPairInAscii #-} printPairInAscii name value - = A.toAsciiBuilder (A.fromCIAscii name) ⊕ - A.toAsciiBuilder "=" ⊕ - if BS.any ((¬) ∘ isToken) (A.toByteString value) then + = cs name ⊕ + cs ("=" ∷ Ascii) ⊕ + if BS.any ((¬) ∘ isToken) (cs value) then quoteStr value else - A.toAsciiBuilder value + cs value escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder {-# INLINEABLE escapeUnsafeChars #-} @@ -100,15 +101,15 @@ escapeUnsafeChars bs b Nothing → b Just (c, bs') | isToken c → escapeUnsafeChars bs' $ - b ⊕ A.toAsciiBuilder (A.unsafeFromString [c]) + b ⊕ cs (A.unsafeFromString [c]) | otherwise → escapeUnsafeChars bs' $ b ⊕ toHex (fromIntegral $ fromEnum c) toHex ∷ Word8 → AsciiBuilder {-# INLINEABLE toHex #-} -toHex o = A.toAsciiBuilder "%" ⊕ - A.toAsciiBuilder (A.unsafeFromString [ toHex' (o `shiftR` 8) - , toHex' (o .&. 0x0F) ]) +toHex o = cs ("%" ∷ Ascii) ⊕ + cs (A.unsafeFromString [ toHex' (o `shiftR` 8) + , toHex' (o .&. 0x0F) ]) where toHex' ∷ Word8 → Char {-# INLINEABLE toHex' #-} @@ -118,6 +119,10 @@ toHex o = A.toAsciiBuilder "%" ⊕ | otherwise = toEnum $ fromIntegral $ fromEnum 'A' + fromIntegral (h - 0x0A) +deriveAttempts [ ([t| MIMEParams |], [t| Ascii |]) + , ([t| MIMEParams |], [t| AsciiBuilder |]) + ] + data ExtendedParam = InitialEncodedParam { epName ∷ !CIAscii @@ -140,34 +145,33 @@ section ∷ ExtendedParam → Integer section (InitialEncodedParam {..}) = 0 section ep = epSection ep --- |'Parser' for MIME parameter values. -mimeParams ∷ Parser MIMEParams -{-# INLINEABLE mimeParams #-} -mimeParams = decodeParams =≪ P.many (try paramP) +instance Default (Parser MIMEParams) where + {-# INLINE def #-} + def = decodeParams =≪ many (try def) -paramP ∷ Parser ExtendedParam -paramP = do skipMany lws - void $ char ';' - skipMany lws - epm ← nameP - void $ char '=' - case epm of - (name, 0, True) - → do (charset, payload) ← initialEncodedValue - return $ InitialEncodedParam name charset payload - (name, sect, True) - → do payload ← encodedPayload - return $ ContinuedEncodedParam name sect payload - (name, sect, False) - → do payload ← token <|> quotedStr - return $ AsciiParam name sect payload +instance Default (Parser ExtendedParam) where + def = do skipMany lws + void $ char ';' + skipMany lws + epm ← name + void $ char '=' + case epm of + (nm, 0, True) + → do (charset, payload) ← initialEncodedValue + return $ InitialEncodedParam nm charset payload + (nm, sect, True) + → do payload ← encodedPayload + return $ ContinuedEncodedParam nm sect payload + (nm, sect, False) + → do payload ← token <|> quotedStr + return $ AsciiParam nm sect payload -nameP ∷ Parser (CIAscii, Integer, Bool) -nameP = do name ← (A.toCIAscii ∘ A.unsafeFromByteString) <$> - takeWhile1 (\c → isToken c ∧ c ≢ '*') - sect ← option 0 $ try (char '*' *> decimal ) - isEncoded ← option False $ try (char '*' *> pure True) - return (name, sect, isEncoded) +name ∷ Parser (CIAscii, Integer, Bool) +name = do nm ← (cs ∘ A.unsafeFromByteString) <$> + takeWhile1 (\c → isToken c ∧ c ≢ '*') + sect ← option 0 $ try (char '*' *> decimal ) + isEncoded ← option False $ try (char '*' *> pure True) + return (nm, sect, isEncoded) initialEncodedValue ∷ Parser (CIAscii, BS.ByteString) initialEncodedValue @@ -180,18 +184,17 @@ initialEncodedValue -- NOTE: I'm not sure this is the right thing, but RFC -- 2231 doesn't tell us what we should do when the -- charset is omitted. - return ("US-ASCII", payload) - -- FIXME: Rethink about this behaviour. + fail "charset is missing" else return (charset, payload) where metadata ∷ Parser CIAscii - metadata = (A.toCIAscii ∘ A.unsafeFromByteString) <$> + metadata = (cs ∘ A.unsafeFromByteString) <$> takeWhile (\c → c ≢ '\'' ∧ isToken c) encodedPayload ∷ Parser BS.ByteString {-# INLINE encodedPayload #-} -encodedPayload = BS.concat <$> P.many (hexChar <|> rawChars) +encodedPayload = BS.concat <$> many (hexChar <|> rawChars) hexChar ∷ Parser BS.ByteString {-# INLINEABLE hexChar #-} @@ -221,50 +224,52 @@ rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%') decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams {-# INLINE decodeParams #-} -decodeParams = (MIMEParams <$>) ∘ (mapM decodeSections =≪) ∘ sortBySection +decodeParams = (MIMEParams <$>) + ∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪) + ∘ sortBySection sortBySection ∷ Monad m ⇒ [ExtendedParam] - → m (Map CIAscii (Map Integer ExtendedParam)) + → m (M.Map CIAscii (M.Map Integer ExtendedParam)) sortBySection = flip go (∅) where go ∷ Monad m ⇒ [ExtendedParam] - → Map CIAscii (Map Integer ExtendedParam) - → m (Map CIAscii (Map Integer ExtendedParam)) + → M.Map CIAscii (M.Map Integer ExtendedParam) + → m (M.Map CIAscii (M.Map Integer ExtendedParam)) go [] m = return m go (x:xs) m - = case M.lookup (epName x) m of + = case lookup (epName x) m of Nothing - → let s = M.singleton (section x) x - m' = M.insert (epName x) s m + → let s = singleton (section x, x) + m' = insert (epName x, s) m in go xs m' Just s - → case M.lookup (section x) s of + → case lookup (section x) s of Nothing - → let s' = M.insert (section x) x s - m' = M.insert (epName x) s' m + → let s' = insert (section x, x ) s + m' = insert (epName x, s') m in go xs m' Just _ → fail (concat [ "Duplicate section " , show $ section x , " for parameter '" - , A.toString $ A.fromCIAscii $ epName x + , cs $ epName x , "'" ]) -decodeSections ∷ Monad m ⇒ Map Integer ExtendedParam → m Text +decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) where toSeq ∷ Monad m - ⇒ Map Integer ExtendedParam + ⇒ M.Map Integer ExtendedParam → Integer → Seq ExtendedParam → m (Seq ExtendedParam) toSeq m expectedSect sects - = case M.minViewWithKey m of + = case minView m of Nothing → return sects Just ((sect, p), m') @@ -274,25 +279,23 @@ decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) → fail (concat [ "Missing section " , show $ section p , " for parameter '" - , A.toString $ A.fromCIAscii $ epName p + , cs $ epName p , "'" ]) decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text decodeSeq sects - = case S.viewl sects of - EmptyL + = case front sects of + Nothing → fail "decodeSeq: internal error: empty seq" - InitialEncodedParam {..} :< xs + Just (InitialEncodedParam {..}, xs) → do d ← getDecoder epCharset t ← decodeStr d epPayload - decodeSeq' (Just d) xs $ S.singleton t - ContinuedEncodedParam {..} :< _ + decodeSeq' (Just d) xs $ singleton t + Just (ContinuedEncodedParam {..}, _) → fail "decodeSeq: internal error: CEP at section 0" - AsciiParam {..} :< xs - → let t = A.toText apPayload - in - decodeSeq' Nothing xs $ S.singleton t + Just (AsciiParam {..}, xs) + → decodeSeq' Nothing xs $ singleton $ cs apPayload decodeSeq' ∷ Monad m ⇒ Maybe Decoder @@ -300,12 +303,12 @@ decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) → Seq Text → m Text decodeSeq' decoder sects chunks - = case S.viewl sects of - EmptyL + = case front sects of + Nothing → return $ T.concat $ toList chunks - InitialEncodedParam {..} :< _ + Just (InitialEncodedParam {}, _) → fail "decodeSeq': internal error: IEP at section > 0" - ContinuedEncodedParam {..} :< xs + Just (ContinuedEncodedParam {..}, xs) → case decoder of Just d → do t ← decodeStr d epPayload @@ -314,13 +317,11 @@ decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅) → fail (concat [ "Section " , show epSection , " for parameter '" - , A.toString $ A.fromCIAscii epName + , cs epName , "' is encoded but its first section is not" ]) - AsciiParam {..} :< xs - → let t = A.toText apPayload - in - decodeSeq' decoder xs $ chunks ⊳ t + Just (AsciiParam {..}, xs) + → decodeSeq' decoder xs $ chunks ⊳ cs apPayload type Decoder = BS.ByteString → Either UnicodeException Text @@ -334,5 +335,4 @@ getDecoder ∷ Monad m ⇒ CIAscii → m Decoder getDecoder charset | charset ≡ "UTF-8" = return decodeUtf8' | charset ≡ "US-ASCII" = return decodeUtf8' - | otherwise = fail $ "No decoders found for charset: " - ⧺ A.toString (A.fromCIAscii charset) + | otherwise = fail $ "No decoders found for charset: " ⊕ cs charset