X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMultipartForm.hs;h=98699e43ca37d2e2a2978236130f50b6a705c04a;hp=72eef21c1ec9e99be27857e48169cd0b068d6c3e;hb=90fca0675b1694e69b8e431c989343855cbd125d;hpb=ece223c516e66223ef1d5d8e6bbe4054a235d983 diff --git a/Network/HTTP/Lucu/MultipartForm.hs b/Network/HTTP/Lucu/MultipartForm.hs index 72eef21..98699e4 100644 --- a/Network/HTTP/Lucu/MultipartForm.hs +++ b/Network/HTTP/Lucu/MultipartForm.hs @@ -1,150 +1,255 @@ {-# LANGUAGE DoAndIfThenElse + , FlexibleInstances + , FlexibleContexts + , MultiParamTypeClasses , OverloadedStrings + , QuasiQuotes , RecordWildCards , ScopedTypeVariables + , TemplateHaskell , UnicodeSyntax + , ViewPatterns #-} +-- |Parse \"multipart/form-data\" based on RFC 2388: +-- module Network.HTTP.Lucu.MultipartForm ( FormData(..) - , multipartFormP + , parseMultipartFormData ) where import Control.Applicative hiding (many) -import Control.Monad -import Data.Ascii (Ascii, CIAscii) -import qualified Data.Ascii as A +import Control.Applicative.Unicode hiding ((∅)) +import Control.Monad.Error (MonadError, throwError) +import Control.Monad.Unicode +import Data.Ascii (Ascii, CIAscii, AsciiBuilder) +import Data.Attempt import Data.Attoparsec -import qualified Data.ByteString.Char8 as BS -import qualified Data.ByteString.Lazy.Char8 as LS -import Data.Map (Map) -import qualified Data.Map as M +import qualified Data.Attoparsec.Lazy as LP +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as LS +import Data.ByteString.Lazy.Search +import Data.Collections +import Data.Convertible.Base +import Data.Convertible.Instances.Ascii () +import Data.Convertible.Utils +import Data.Default +import Data.List (intercalate) import Data.Maybe import Data.Monoid.Unicode +import Data.Sequence (Seq) import Data.Text (Text) import Network.HTTP.Lucu.Headers +import Network.HTTP.Lucu.MIMEParams +import Network.HTTP.Lucu.MIMEType import Network.HTTP.Lucu.Parser import Network.HTTP.Lucu.Parser.Http -import Network.HTTP.Lucu.RFC2231 +import Network.HTTP.Lucu.Utils +import Prelude hiding (lookup, mapM) import Prelude.Unicode --- |This data type represents a form value and possibly an uploaded --- file name. +-- |'FormData' represents a form value and possibly an uploaded file +-- name. data FormData = FormData { - fdFileName ∷ Maybe Text - , fdContent ∷ LS.ByteString + -- | @'Nothing'@ for non-file values. + fdFileName ∷ !(Maybe Text) + -- | MIME Type of this value, defaulted to \"text/plain\". + , fdMIMEType ∷ !MIMEType + -- | The form value. + , fdContent ∷ !(LS.ByteString) } data Part = Part { - ptHeaders ∷ Headers - , ptContDispo ∷ ContDispo - , ptBody ∷ LS.ByteString + ptContDispo ∷ !ContDispo + , ptContType ∷ !MIMEType + , ptBody ∷ !LS.ByteString } -instance HasHeaders Part where - getHeaders = ptHeaders - setHeaders pt hs = pt { ptHeaders = hs } - data ContDispo = ContDispo { dType ∷ !CIAscii - , dParams ∷ !(Map CIAscii Text) + , dParams ∷ !MIMEParams } -printContDispo ∷ ContDispo → Ascii -printContDispo d - = A.fromAsciiBuilder - ( A.toAsciiBuilder (A.fromCIAscii $ dType d) - ⊕ - printParams (dParams d) ) - -multipartFormP ∷ Ascii → Parser [(Text, FormData)] -multipartFormP boundary - = do void boundaryP - parts ← many $ partP boundaryP - void (string "--" "suffix") - crlf - catMaybes <$> mapM partToFormPair parts - - "multipartFormP" +instance ConvertSuccess ContDispo Ascii where + {-# INLINE convertSuccess #-} + convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder) + +instance ConvertSuccess ContDispo AsciiBuilder where + {-# INLINE convertSuccess #-} + convertSuccess (ContDispo {..}) + = cs dType ⊕ cs dParams + +deriveAttempts [ ([t| ContDispo |], [t| Ascii |]) + , ([t| ContDispo |], [t| AsciiBuilder |]) + ] + +-- |Parse \"multipart/form-data\" to a list of @(name, +-- formData)@. Note that there are currently the following +-- limitations: +-- +-- * Multiple files embedded as \"multipart/mixed\" within the +-- \"multipart/form-data\" won't be decomposed. +-- +-- * \"Content-Transfer-Encoding\" is always ignored. +-- +-- * RFC 2388 () says +-- that non-ASCII field names are encoded according to the method +-- in RFC 2047 (), but this +-- function currently doesn't decode them. +parseMultipartFormData ∷ Ascii -- ^boundary + → LS.ByteString -- ^input + → Either String [(Ascii, FormData)] +parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go where - boundaryP ∷ Parser BS.ByteString - boundaryP = string ("--" ⊕ A.toByteString boundary) - - "boundaryP" - -partP ∷ Parser α → Parser Part -partP boundaryP - = do crlf - hs ← headersP - d ← getContDispo hs - body ← bodyP boundaryP - return $ Part hs d body + go ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → m [Part] + {-# INLINEABLE go #-} + go src + = case LP.parse (prologue boundary) src of + LP.Done src' _ + → go' src' (∅) + LP.Fail _ eCtx e + → throwError $ "Unparsable multipart/form-data: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + go' ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → Seq Part + → m [Part] + {-# INLINEABLE go' #-} + go' src xs + = case LP.parse epilogue src of + LP.Done _ _ + → return $ toList xs + LP.Fail _ _ _ + → do (src', x) ← parsePart boundary src + go' src' $ xs ⊳ x + +prologue ∷ Ascii → Parser () +prologue boundary + = ( (string "--" "prefix") + *> + (string (cs boundary) "boundary") + *> + pure () + ) - "partP" + "prologue" + +epilogue ∷ Parser () +epilogue = finishOff ((string "--" "suffix") *> crlf) + + "epilogue" + +parsePart ∷ (Functor m, MonadError String m) + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, Part) +{-# INLINEABLE parsePart #-} +parsePart boundary src + = case LP.parse partHeader src of + LP.Done src' hdrs + → do dispo ← getContDispo hdrs + cType ← fromMaybe defaultCType <$> getContType hdrs + (body, src'') + ← getBody boundary src' + return (src'', Part dispo cType body) + LP.Fail _ eCtx e + → throwError $ "unparsable part: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + where + defaultCType ∷ MIMEType + defaultCType = [mimeType| text/plain |] + +partHeader ∷ Parser Headers +{-# INLINE partHeader #-} +partHeader = crlf *> def + +getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo +{-# INLINEABLE getContDispo #-} +getContDispo hdrs + = case getHeader "Content-Disposition" hdrs of + Nothing + → throwError "Content-Disposition is missing" + Just str + → case parseOnly (finishOff contentDisposition) $ cs str of + Right d → return d + Left err → throwError $ "malformed Content-Disposition: " + ⊕ cs str + ⊕ ": " + ⊕ err -bodyP ∷ Parser α → Parser LS.ByteString -bodyP boundaryP - = manyOctetsTill anyWord8 (try $ crlf *> boundaryP) +contentDisposition ∷ Parser ContDispo +{-# INLINEABLE contentDisposition #-} +contentDisposition + = (ContDispo <$> (cs <$> token) ⊛ def) - "bodyP" + "contentDisposition" + +getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType) +{-# INLINEABLE getContType #-} +getContType hdrs + = case getHeader "Content-Type" hdrs of + Nothing + → return Nothing + Just str + → case parseOnly (finishOff def) $ cs str of + Right d → return $ Just d + Left err → throwError $ "malformed Content-Type: " + ⊕ cs str + ⊕ ": " + ⊕ err + +getBody ∷ MonadError String m + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, LS.ByteString) +{-# INLINEABLE getBody #-} +getBody (("\r\n--" ⊕) ∘ cs → boundary) src + = case breakOn boundary src of + (before, after) + | LS.null after + → throwError "missing boundary" + | otherwise + → let len = fromIntegral $ BS.length boundary + after' = LS.drop len after + in + return (before, after') -partToFormPair ∷ Monad m ⇒ Part → m (Maybe (Text, FormData)) +partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData) {-# INLINEABLE partToFormPair #-} -partToFormPair pt - | dType (ptContDispo pt) ≡ "form-data" +partToFormPair pt@(Part {..}) + | dType ptContDispo ≡ "form-data" = do name ← partName pt - let fname = partFileName pt - let fd = FormData { - fdFileName = fname - , fdContent = ptBody pt - } - return $ Just (name, fd) + let fd = FormData { + fdFileName = partFileName pt + , fdMIMEType = ptContType + , fdContent = ptBody + } + return (name, fd) | otherwise - = return Nothing + = throwError $ "disposition type is not \"form-data\": " + ⊕ cs (dType ptContDispo) -partName ∷ Monad m ⇒ Part → m Text +partName ∷ MonadError String m ⇒ Part → m Ascii {-# INLINEABLE partName #-} partName (Part {..}) - = case M.lookup "name" $ dParams ptContDispo of + = case lookup "name" $ dParams ptContDispo of Just name - → return name + → case ca name of + Success a → return a + Failure e → throwError $ show e Nothing - → fail ("form-data without name: " ⧺ - A.toString (printContDispo ptContDispo)) + → throwError $ "form-data without name: " + ⊕ convertSuccessVia ((⊥) ∷ Ascii) ptContDispo partFileName ∷ Part → Maybe Text -{-# INLINEABLE partFileName #-} -partFileName (Part {..}) - = M.lookup "filename" $ dParams ptContDispo - -getContDispo ∷ Monad m ⇒ Headers → m ContDispo -{-# INLINEABLE getContDispo #-} -getContDispo hdr - = case getHeader "Content-Disposition" hdr of - Nothing - → fail "There is a part without Content-Disposition in the multipart/form-data." - Just str - → let p = do d ← contDispoP - endOfInput - return d - bs = A.toByteString str - in - case parseOnly p bs of - Right d → return d - Left err → fail (concat [ "Unparsable Content-Disposition: " - , BS.unpack bs - , ": " - , err - ]) - -contDispoP ∷ Parser ContDispo -{-# INLINEABLE contDispoP #-} -contDispoP - = do dispoType ← A.toCIAscii <$> token - params ← paramsP - return $ ContDispo dispoType params - - "contDispoP" +partFileName (ptContDispo → ContDispo {..}) + = lookup "filename" dParams