X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMultipartForm.hs;h=98699e43ca37d2e2a2978236130f50b6a705c04a;hp=c4631300e9efae3b3d14ac57917597ef685032fd;hb=90fca0675b1694e69b8e431c989343855cbd125d;hpb=70bf5bd248aa426ca4e410b3fb9a0529354aedaf diff --git a/Network/HTTP/Lucu/MultipartForm.hs b/Network/HTTP/Lucu/MultipartForm.hs index c463130..98699e4 100644 --- a/Network/HTTP/Lucu/MultipartForm.hs +++ b/Network/HTTP/Lucu/MultipartForm.hs @@ -1,156 +1,255 @@ {-# LANGUAGE - UnboxedTuples + DoAndIfThenElse + , FlexibleInstances + , FlexibleContexts + , MultiParamTypeClasses + , OverloadedStrings + , QuasiQuotes + , RecordWildCards + , ScopedTypeVariables + , TemplateHaskell , UnicodeSyntax + , ViewPatterns #-} +-- |Parse \"multipart/form-data\" based on RFC 2388: +-- module Network.HTTP.Lucu.MultipartForm ( FormData(..) - , multipartFormP + , parseMultipartFormData ) where - -import qualified Data.ByteString.Char8 as C8 -import qualified Data.ByteString.Lazy.Char8 as L8 -import Data.Char -import Data.List -import Network.HTTP.Lucu.Abortion -import Network.HTTP.Lucu.Headers -import Network.HTTP.Lucu.Parser -import Network.HTTP.Lucu.Parser.Http -import Network.HTTP.Lucu.Response -import Network.HTTP.Lucu.Utils - - -data Part = Part Headers L8.ByteString - --- |This data type represents a form value and possibly an uploaded --- file name. +import Control.Applicative hiding (many) +import Control.Applicative.Unicode hiding ((∅)) +import Control.Monad.Error (MonadError, throwError) +import Control.Monad.Unicode +import Data.Ascii (Ascii, CIAscii, AsciiBuilder) +import Data.Attempt +import Data.Attoparsec +import qualified Data.Attoparsec.Lazy as LP +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as LS +import Data.ByteString.Lazy.Search +import Data.Collections +import Data.Convertible.Base +import Data.Convertible.Instances.Ascii () +import Data.Convertible.Utils +import Data.Default +import Data.List (intercalate) +import Data.Maybe +import Data.Monoid.Unicode +import Data.Sequence (Seq) +import Data.Text (Text) +import Network.HTTP.Lucu.Headers +import Network.HTTP.Lucu.MIMEParams +import Network.HTTP.Lucu.MIMEType +import Network.HTTP.Lucu.Parser +import Network.HTTP.Lucu.Parser.Http +import Network.HTTP.Lucu.Utils +import Prelude hiding (lookup, mapM) +import Prelude.Unicode + +-- |'FormData' represents a form value and possibly an uploaded file +-- name. data FormData = FormData { - fdFileName :: Maybe String - , fdContent :: L8.ByteString + -- | @'Nothing'@ for non-file values. + fdFileName ∷ !(Maybe Text) + -- | MIME Type of this value, defaulted to \"text/plain\". + , fdMIMEType ∷ !MIMEType + -- | The form value. + , fdContent ∷ !(LS.ByteString) } -instance HasHeaders Part where - getHeaders (Part hs _) = hs - setHeaders (Part _ b) hs = Part hs b - - -data ContDispo = ContDispo String [(String, String)] - -instance Show ContDispo where - show (ContDispo dType dParams) - = dType ++ - if null dParams then - "" - else - "; " ++ joinWith "; " (map showPair dParams) - where - showPair :: (String, String) -> String - showPair (name, value) - = name ++ "=" ++ if any (not . isToken) value then - quoteStr value - else - value - - -multipartFormP :: String -> Parser [(String, FormData)] -multipartFormP boundary - = do parts <- many (partP boundary) - _ <- string "--" - _ <- string boundary - _ <- string "--" - _ <- crlf - eof - return $ map partToFormPair parts - - -partP :: String -> Parser Part -partP boundary - = do _ <- string "--" - _ <- string boundary - _ <- crlf -- バウンダリの末尾に -- が付いてゐたらここで fail する。 - hs <- headersP - body <- bodyP boundary - return $ Part hs body - - -bodyP :: String -> Parser L8.ByteString -bodyP boundary - = do body <- manyChar $ - do notFollowedBy $ ( crlf >> - string "--" >> - string boundary ) - anyChar - _ <- crlf - return body - - -partToFormPair :: Part -> (String, FormData) -partToFormPair part@(Part _ body) - = let name = partName part - fname = partFileName part - fd = FormData { - fdFileName = fname - , fdContent = body - } - in (name, fd) - -partName :: Part -> String -partName = getName' . getContDispoFormData - where - getName' :: ContDispo -> String - getName' dispo@(ContDispo _ dParams) - = case find ((== "name") . map toLower . fst) dParams of - Just (_, name) -> name - Nothing - -> abortPurely BadRequest [] - (Just $ "form-data without name: " ++ show dispo) +data Part + = Part { + ptContDispo ∷ !ContDispo + , ptContType ∷ !MIMEType + , ptBody ∷ !LS.ByteString + } +data ContDispo + = ContDispo { + dType ∷ !CIAscii + , dParams ∷ !MIMEParams + } -partFileName :: Part -> Maybe String -partFileName = getFileName' . getContDispoFormData - where - getFileName' :: ContDispo -> Maybe String - getFileName' (ContDispo _ dParams) - = do (_, fileName) <- find ((== "filename") . map toLower . fst) dParams - return fileName - -getContDispoFormData :: Part -> ContDispo -getContDispoFormData part - = let dispo@(ContDispo dType _) = getContDispo part - in - if map toLower dType == "form-data" then - dispo - else - abortPurely BadRequest [] - (Just $ "Content-Disposition type is not form-data: " ++ dType) - - -getContDispo :: Part -> ContDispo -getContDispo part - = case getHeader (C8.pack "Content-Disposition") part of - Nothing - -> abortPurely BadRequest [] - (Just "There is a part without Content-Disposition in the multipart/form-data.") - Just dispoStr - -> case parse contDispoP (L8.fromChunks [dispoStr]) of - (# Success dispo, _ #) - -> dispo - (# _, _ #) - -> abortPurely BadRequest [] - (Just $ "Unparsable Content-Disposition: " ++ C8.unpack dispoStr) - - -contDispoP :: Parser ContDispo -contDispoP = do dispoType <- token - params <- allowEOF $ many paramP - return $ ContDispo dispoType params +instance ConvertSuccess ContDispo Ascii where + {-# INLINE convertSuccess #-} + convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder) + +instance ConvertSuccess ContDispo AsciiBuilder where + {-# INLINE convertSuccess #-} + convertSuccess (ContDispo {..}) + = cs dType ⊕ cs dParams + +deriveAttempts [ ([t| ContDispo |], [t| Ascii |]) + , ([t| ContDispo |], [t| AsciiBuilder |]) + ] + +-- |Parse \"multipart/form-data\" to a list of @(name, +-- formData)@. Note that there are currently the following +-- limitations: +-- +-- * Multiple files embedded as \"multipart/mixed\" within the +-- \"multipart/form-data\" won't be decomposed. +-- +-- * \"Content-Transfer-Encoding\" is always ignored. +-- +-- * RFC 2388 () says +-- that non-ASCII field names are encoded according to the method +-- in RFC 2047 (), but this +-- function currently doesn't decode them. +parseMultipartFormData ∷ Ascii -- ^boundary + → LS.ByteString -- ^input + → Either String [(Ascii, FormData)] +parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go where - paramP :: Parser (String, String) - paramP = do _ <- many lws - _ <- char ';' - _ <- many lws - name <- token - _ <- char '=' - value <- token <|> quotedStr - return (name, value) + go ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → m [Part] + {-# INLINEABLE go #-} + go src + = case LP.parse (prologue boundary) src of + LP.Done src' _ + → go' src' (∅) + LP.Fail _ eCtx e + → throwError $ "Unparsable multipart/form-data: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + go' ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → Seq Part + → m [Part] + {-# INLINEABLE go' #-} + go' src xs + = case LP.parse epilogue src of + LP.Done _ _ + → return $ toList xs + LP.Fail _ _ _ + → do (src', x) ← parsePart boundary src + go' src' $ xs ⊳ x + +prologue ∷ Ascii → Parser () +prologue boundary + = ( (string "--" "prefix") + *> + (string (cs boundary) "boundary") + *> + pure () + ) + + "prologue" + +epilogue ∷ Parser () +epilogue = finishOff ((string "--" "suffix") *> crlf) + + "epilogue" + +parsePart ∷ (Functor m, MonadError String m) + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, Part) +{-# INLINEABLE parsePart #-} +parsePart boundary src + = case LP.parse partHeader src of + LP.Done src' hdrs + → do dispo ← getContDispo hdrs + cType ← fromMaybe defaultCType <$> getContType hdrs + (body, src'') + ← getBody boundary src' + return (src'', Part dispo cType body) + LP.Fail _ eCtx e + → throwError $ "unparsable part: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + where + defaultCType ∷ MIMEType + defaultCType = [mimeType| text/plain |] + +partHeader ∷ Parser Headers +{-# INLINE partHeader #-} +partHeader = crlf *> def + +getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo +{-# INLINEABLE getContDispo #-} +getContDispo hdrs + = case getHeader "Content-Disposition" hdrs of + Nothing + → throwError "Content-Disposition is missing" + Just str + → case parseOnly (finishOff contentDisposition) $ cs str of + Right d → return d + Left err → throwError $ "malformed Content-Disposition: " + ⊕ cs str + ⊕ ": " + ⊕ err + +contentDisposition ∷ Parser ContDispo +{-# INLINEABLE contentDisposition #-} +contentDisposition + = (ContDispo <$> (cs <$> token) ⊛ def) + + "contentDisposition" + +getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType) +{-# INLINEABLE getContType #-} +getContType hdrs + = case getHeader "Content-Type" hdrs of + Nothing + → return Nothing + Just str + → case parseOnly (finishOff def) $ cs str of + Right d → return $ Just d + Left err → throwError $ "malformed Content-Type: " + ⊕ cs str + ⊕ ": " + ⊕ err + +getBody ∷ MonadError String m + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, LS.ByteString) +{-# INLINEABLE getBody #-} +getBody (("\r\n--" ⊕) ∘ cs → boundary) src + = case breakOn boundary src of + (before, after) + | LS.null after + → throwError "missing boundary" + | otherwise + → let len = fromIntegral $ BS.length boundary + after' = LS.drop len after + in + return (before, after') + +partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData) +{-# INLINEABLE partToFormPair #-} +partToFormPair pt@(Part {..}) + | dType ptContDispo ≡ "form-data" + = do name ← partName pt + let fd = FormData { + fdFileName = partFileName pt + , fdMIMEType = ptContType + , fdContent = ptBody + } + return (name, fd) + | otherwise + = throwError $ "disposition type is not \"form-data\": " + ⊕ cs (dType ptContDispo) + +partName ∷ MonadError String m ⇒ Part → m Ascii +{-# INLINEABLE partName #-} +partName (Part {..}) + = case lookup "name" $ dParams ptContDispo of + Just name + → case ca name of + Success a → return a + Failure e → throwError $ show e + Nothing + → throwError $ "form-data without name: " + ⊕ convertSuccessVia ((⊥) ∷ Ascii) ptContDispo + +partFileName ∷ Part → Maybe Text +partFileName (ptContDispo → ContDispo {..}) + = lookup "filename" dParams