From 02d702c138d918386135245021d5778676ee6d0e Mon Sep 17 00:00:00 2001 From: PHO Date: Sat, 6 Aug 2011 14:21:02 +0900 Subject: [PATCH] MultipartForm Ditz-issue: 8959dadc07db1bd363283dee401073f6e48dc7fa --- Lucu.cabal | 41 ++--- Network/HTTP/Lucu/Headers.hs | 4 + Network/HTTP/Lucu/MultipartForm.hs | 257 +++++++++++++++-------------- Network/HTTP/Lucu/Parser/Http.hs | 70 +++++++- 4 files changed, 230 insertions(+), 142 deletions(-) diff --git a/Lucu.cabal b/Lucu.cabal index ec67718..de76987 100644 --- a/Lucu.cabal +++ b/Lucu.cabal @@ -45,26 +45,27 @@ Flag build-lucu-implant-file Library Build-Depends: - HsOpenSSL == 0.10.*, - ascii == 0.0.*, - attoparsec == 0.9.*, - base == 4.3.*, - base-unicode-symbols == 0.2.*, - base64-bytestring == 0.1.*, - bytestring == 0.9.*, - containers == 0.4.*, - filepath == 1.2.*, - directory == 1.1.*, - haskell-src == 1.0.*, - hxt == 9.1.*, - mtl == 2.0.*, - network == 2.3.*, - stm == 2.2.*, - text == 0.11.*, - time == 1.2.*, - time-http == 0.1.*, - unix == 2.4.*, - zlib == 0.5.* + HsOpenSSL == 0.10.*, + ascii == 0.0.*, + attoparsec == 0.9.*, + base == 4.3.*, + base-unicode-symbols == 0.2.*, + base64-bytestring == 0.1.*, + bytestring == 0.9.*, + containers == 0.4.*, + containers-unicode-symbols == 0.3.*, + filepath == 1.2.*, + directory == 1.1.*, + haskell-src == 1.0.*, + hxt == 9.1.*, + mtl == 2.0.*, + network == 2.3.*, + stm == 2.2.*, + text == 0.11.*, + time == 1.2.*, + time-http == 0.1.*, + unix == 2.4.*, + zlib == 0.5.* Exposed-Modules: Network.HTTP.Lucu diff --git a/Network/HTTP/Lucu/Headers.hs b/Network/HTTP/Lucu/Headers.hs index cfb3fb2..4241455 100644 --- a/Network/HTTP/Lucu/Headers.hs +++ b/Network/HTTP/Lucu/Headers.hs @@ -57,6 +57,10 @@ class HasHeaders a where Headers m → setHeaders a $ Headers $ M.insert key val m +instance HasHeaders Headers where + getHeaders = id + setHeaders _ = id + toHeaders ∷ [(CIAscii, Ascii)] → Headers {-# INLINE toHeaders #-} toHeaders = flip mkHeaders (∅) diff --git a/Network/HTTP/Lucu/MultipartForm.hs b/Network/HTTP/Lucu/MultipartForm.hs index 741427f..4dcf076 100644 --- a/Network/HTTP/Lucu/MultipartForm.hs +++ b/Network/HTTP/Lucu/MultipartForm.hs @@ -1,5 +1,8 @@ {-# LANGUAGE - UnboxedTuples + DoAndIfThenElse + , OverloadedStrings + , ScopedTypeVariables + , UnboxedTuples , UnicodeSyntax #-} module Network.HTTP.Lucu.MultipartForm @@ -7,147 +10,161 @@ module Network.HTTP.Lucu.MultipartForm , multipartFormP ) where -import qualified Data.ByteString.Char8 as C8 -import qualified Data.ByteString.Lazy.Char8 as L8 +import Control.Applicative hiding (many) +import Data.Ascii (Ascii, CIAscii, AsciiBuilder) +import qualified Data.Ascii as A +import Data.Attoparsec.Char8 +import qualified Data.ByteString.Char8 as BS +import qualified Data.ByteString.Lazy.Char8 as LS import Data.Char import Data.List -import Network.HTTP.Lucu.Abortion +import Data.Maybe +import Data.Monoid.Unicode +import Data.Text (Text) import Network.HTTP.Lucu.Headers import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.Response import Network.HTTP.Lucu.Utils - -data Part = Part Headers L8.ByteString +import Prelude.Unicode -- |This data type represents a form value and possibly an uploaded -- file name. data FormData = FormData { - fdFileName :: Maybe String - , fdContent :: L8.ByteString + fdFileName ∷ Maybe Text + , fdContent ∷ LS.ByteString } -instance HasHeaders Part where - getHeaders (Part hs _) = hs - setHeaders (Part _ b) hs = Part hs b +data Part + = Part { + ptHeaders ∷ Headers + , ptContDispo ∷ ContDispo + , ptBody ∷ LS.ByteString + } +instance HasHeaders Part where + getHeaders = ptHeaders + setHeaders pt hs = pt { ptHeaders = hs } -data ContDispo = ContDispo String [(String, String)] +data ContDispo + = ContDispo { + dType ∷ !CIAscii + , dParams ∷ ![(CIAscii, Ascii)] + } -instance Show ContDispo where - show (ContDispo dType dParams) - = dType ++ - if null dParams then - "" +printContDispo ∷ ContDispo → Ascii +printContDispo d + = A.fromAsciiBuilder $ + ( A.toAsciiBuilder (A.fromCIAscii $ dType d) + ⊕ + ( if null $ dParams d then + (∅) else - "; " ++ joinWith "; " (map showPair dParams) - where - showPair :: (String, String) -> String - showPair (name, value) - = name ++ "=" ++ if any (not . isToken) value then - quoteStr value - else - value - - -multipartFormP :: String -> Parser [(String, FormData)] + A.toAsciiBuilder "; " ⊕ + joinWith "; " (map printPair $ dParams d) ) ) + where + printPair ∷ (CIAscii, Ascii) → AsciiBuilder + printPair (name, value) + = A.toAsciiBuilder (A.fromCIAscii name) ⊕ + A.toAsciiBuilder "=" ⊕ + ( if BS.any ((¬) ∘ isToken) $ A.toByteString value then + quoteStr value + else + A.toAsciiBuilder value ) + +multipartFormP ∷ Ascii → Parser [(Text, FormData)] multipartFormP boundary - = do parts <- many (partP boundary) - _ <- string "--" - _ <- string boundary - _ <- string "--" - _ <- crlf - eof - return $ map partToFormPair parts - - -partP :: String -> Parser Part + = try $ + do parts ← many (partP boundary) + _ ← string "--" + _ ← string $ A.toByteString boundary + _ ← string "--" + crlf + catMaybes <$> mapM partToFormPair parts + +partP ∷ Ascii → Parser Part partP boundary - = do _ <- string "--" - _ <- string boundary - _ <- crlf -- バウンダリの末尾に -- が付いてゐたらここで fail する。 - hs <- headersP - body <- bodyP boundary - return $ Part hs body - - -bodyP :: String -> Parser L8.ByteString + = try $ + do _ ← string "--" + _ ← string $ A.toByteString boundary + crlf + hs ← headersP + d ← getContDispo hs + body ← bodyP boundary + return $ Part hs d body + +bodyP ∷ Ascii → Parser LS.ByteString bodyP boundary - = do body <- manyChar $ - do notFollowedBy $ ( crlf >> - string "--" >> - string boundary ) - anyChar - _ <- crlf + = try $ + do body ← manyCharsTill anyChar $ + try $ + do crlf + _ ← string "--" + _ ← string $ A.toByteString boundary + return () + crlf return body - -partToFormPair :: Part -> (String, FormData) -partToFormPair part@(Part _ body) - = let name = partName part - fname = partFileName part - fd = FormData { - fdFileName = fname - , fdContent = body - } - in (name, fd) - -partName :: Part -> String -partName = getName' . getContDispoFormData - where - getName' :: ContDispo -> String - getName' dispo@(ContDispo _ dParams) - = case find ((== "name") . map toLower . fst) dParams of - Just (_, name) -> name - Nothing - -> abortPurely BadRequest [] - (Just $ "form-data without name: " ++ show dispo) - - -partFileName :: Part -> Maybe String -partFileName = getFileName' . getContDispoFormData - where - getFileName' :: ContDispo -> Maybe String - getFileName' (ContDispo _ dParams) - = do (_, fileName) <- find ((== "filename") . map toLower . fst) dParams - return fileName - -getContDispoFormData :: Part -> ContDispo -getContDispoFormData part - = let dispo@(ContDispo dType _) = getContDispo part - in - if map toLower dType == "form-data" then - dispo - else - abortPurely BadRequest [] - (Just $ "Content-Disposition type is not form-data: " ++ dType) - - -getContDispo :: Part -> ContDispo -getContDispo part - = case getHeader (C8.pack "Content-Disposition") part of - Nothing - -> abortPurely BadRequest [] - (Just "There is a part without Content-Disposition in the multipart/form-data.") - Just dispoStr - -> case parse contDispoP (L8.fromChunks [dispoStr]) of - (# Success dispo, _ #) - -> dispo - (# _, _ #) - -> abortPurely BadRequest [] - (Just $ "Unparsable Content-Disposition: " ++ C8.unpack dispoStr) - - -contDispoP :: Parser ContDispo -contDispoP = do dispoType <- token - params <- allowEOF $ many paramP +partToFormPair ∷ Monad m ⇒ Part → m (Maybe (Text, FormData)) +{-# INLINEABLE partToFormPair #-} +partToFormPair pt + | dType (ptContDispo pt) ≡ "form-data" + = do name ← partName pt + let fname = partFileName pt + let fd = FormData { + fdFileName = fname + , fdContent = ptBody pt + } + return $ Just (name, fd) + | otherwise + = return Nothing + +partName ∷ Monad m ⇒ Part → m Text +{-# INLINEABLE partName #-} +partName pt + = case find ((≡ "name") ∘ fst) $ dParams $ ptContDispo pt of + Just (_, name) + → return name + Nothing + → fail ("form-data without name: " ⧺ + A.toString (printContDispo $ ptContDispo pt)) + +partFileName ∷ Part → Maybe Text +{-# INLINEABLE partFileName #-} +partFileName pt + = snd <$> (find ((== "filename") ∘ fst) $ dParams $ ptContDispo pt) + +getContDispo ∷ Monad m ⇒ Headers → m ContDispo +{-# INLINEABLE getContDispo #-} +getContDispo hdr + = case getHeader "Content-Disposition" hdr of + Nothing + → fail ("There is a part without Content-Disposition in the multipart/form-data.") + Just str + → let p = do d ← contDispoP + endOfInput + return d + bs = A.toByteString str + in + case parseOnly p bs of + Right d → return d + Left err → fail (concat [ "Unparsable Content-Disposition: " + , BS.unpack bs + , ": " + , err + ]) + +contDispoP ∷ Parser ContDispo +contDispoP = try $ + do dispoType ← A.toCIAscii <$> token + params ← many paramP return $ ContDispo dispoType params where - paramP :: Parser (String, String) - paramP = do _ <- many lws - _ <- char ';' - _ <- many lws - name <- token - _ <- char '=' - value <- token <|> quotedStr + paramP ∷ Parser (CIAscii, Ascii) + paramP = do skipMany lws + _ ← char ';' + skipMany lws + name ← A.toCIAscii <$> token + _ ← char '=' + value ← token <|> quotedStr return (name, value) diff --git a/Network/HTTP/Lucu/Parser/Http.hs b/Network/HTTP/Lucu/Parser/Http.hs index 65ba8b2..4ac11a4 100644 --- a/Network/HTTP/Lucu/Parser/Http.hs +++ b/Network/HTTP/Lucu/Parser/Http.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BangPatterns , OverloadedStrings + , ScopedTypeVariables , UnicodeSyntax #-} -- |This is an auxiliary parser utilities for parsing things related @@ -27,16 +28,24 @@ module Network.HTTP.Lucu.Parser.Http , qvalue , atMost + , manyCharsTill ) where import Control.Applicative -import Control.Applicative.Unicode +import Control.Applicative.Unicode hiding ((∅)) import Control.Monad.Unicode import Data.Ascii (Ascii) import qualified Data.Ascii as A -import Data.Attoparsec.Char8 as P +import Data.Attoparsec.Char8 as P hiding (scan) import qualified Data.Attoparsec.FastSet as FS import qualified Data.ByteString.Char8 as BS +import qualified Data.ByteString.Lazy.Char8 as LS +import qualified Data.ByteString.Lazy.Internal as LS +import qualified Data.Foldable as F +import Data.Monoid +import Data.Monoid.Unicode +import qualified Data.Sequence as S +import Data.Sequence.Unicode hiding ((∅)) import Prelude.Unicode -- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@. @@ -162,3 +171,60 @@ atMost 0 _ = pure [] atMost n v = ( (:) <$> v ⊛ atMost (n-1) v ) <|> pure [] + + +data CharAccumState + = CharAccumState { + casChunks ∷ !(S.Seq BS.ByteString) + , casLastChunk ∷ !(S.Seq Char) + } + +instance Monoid CharAccumState where + mempty + = CharAccumState { + casChunks = (∅) + , casLastChunk = (∅) + } + mappend a b + = b { + casChunks = (casChunks a ⊳ lastChunk a) ⋈ casChunks b + } + +lastChunk ∷ CharAccumState → BS.ByteString +{-# INLINE lastChunk #-} +lastChunk = BS.pack ∘ F.toList ∘ casLastChunk + +snoc ∷ CharAccumState → Char → CharAccumState +{-# INLINEABLE snoc #-} +snoc cas c + | S.length (casLastChunk cas) ≥ LS.defaultChunkSize + = cas { + casChunks = casChunks cas ⊳ lastChunk cas + , casLastChunk = S.singleton c + } + | otherwise + = cas { + casLastChunk = casLastChunk cas ⊳ c + } + +finish ∷ CharAccumState → LS.ByteString +{-# INLINEABLE finish #-} +finish cas + = let chunks = F.toList $ casChunks cas ⊳ lastChunk cas + str = LS.fromChunks chunks + in + str + +manyCharsTill ∷ ∀m b. (Monad m, Alternative m) + ⇒ m Char + → m b + → m LS.ByteString +{-# INLINEABLE manyCharsTill #-} +manyCharsTill p end = scan (∅) + where + scan ∷ CharAccumState → m LS.ByteString + {-# INLINE scan #-} + scan s + = (end *> pure (finish s)) + <|> + (scan =≪ (snoc s <$> p)) -- 2.40.0