X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMultipartForm.hs;h=53174fa94d0a2f0ac448e4a53182800e5da64c43;hp=72eef21c1ec9e99be27857e48169cd0b068d6c3e;hb=a362be1c8664306b970c32e1df9b62081498feb1;hpb=45e3770f440c9fa8668f7e33063d630d73bcbe55 diff --git a/Network/HTTP/Lucu/MultipartForm.hs b/Network/HTTP/Lucu/MultipartForm.hs index 72eef21..53174fa 100644 --- a/Network/HTTP/Lucu/MultipartForm.hs +++ b/Network/HTTP/Lucu/MultipartForm.hs @@ -1,52 +1,62 @@ {-# LANGUAGE DoAndIfThenElse + , FlexibleContexts , OverloadedStrings , RecordWildCards , ScopedTypeVariables , UnicodeSyntax #-} +-- |Parse \"multipart/form-data\" based on RFC 2388: +-- +-- +-- You usually don't have to use this module directly. module Network.HTTP.Lucu.MultipartForm ( FormData(..) - , multipartFormP + , parseMultipartFormData ) where import Control.Applicative hiding (many) -import Control.Monad +import Control.Applicative.Unicode hiding ((∅)) +import Control.Monad.Error +import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii) import qualified Data.Ascii as A import Data.Attoparsec -import qualified Data.ByteString.Char8 as BS +import qualified Data.Attoparsec.Lazy as LP import qualified Data.ByteString.Lazy.Char8 as LS +import Data.ByteString.Lazy.Search +import Data.Foldable +import Data.List import Data.Map (Map) import qualified Data.Map as M import Data.Maybe import Data.Monoid.Unicode +import Data.Sequence (Seq) +import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP.Lucu.Headers -import Network.HTTP.Lucu.Parser +import Network.HTTP.Lucu.MIMEType import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.RFC2231 import Prelude.Unicode --- |This data type represents a form value and possibly an uploaded --- file name. +-- |'FormData' represents a form value and possibly an uploaded file +-- name. data FormData = FormData { - fdFileName ∷ Maybe Text - , fdContent ∷ LS.ByteString + fdFileName ∷ !(Maybe Text) + , fdMIMEType ∷ !MIMEType + , fdContent ∷ !(LS.ByteString) } data Part = Part { - ptHeaders ∷ Headers - , ptContDispo ∷ ContDispo - , ptBody ∷ LS.ByteString + ptContDispo ∷ !ContDispo + , ptContType ∷ !MIMEType + , ptBody ∷ !LS.ByteString } -instance HasHeaders Part where - getHeaders = ptHeaders - setHeaders pt hs = pt { ptHeaders = hs } - data ContDispo = ContDispo { dType ∷ !CIAscii @@ -58,93 +68,179 @@ printContDispo d = A.fromAsciiBuilder ( A.toAsciiBuilder (A.fromCIAscii $ dType d) ⊕ - printParams (dParams d) ) - -multipartFormP ∷ Ascii → Parser [(Text, FormData)] -multipartFormP boundary - = do void boundaryP - parts ← many $ partP boundaryP - void (string "--" "suffix") - crlf - catMaybes <$> mapM partToFormPair parts - - "multipartFormP" + printMIMEParams (dParams d) ) + +-- |Parse \"multipart/form-data\" and return either @'Left' err@ or +-- @'Right' result@. Note that there are currently the following +-- limitations: +-- +-- * Multiple files embedded as \"multipart/mixed\" within the +-- \"multipart/form-data\" aren't decomposed. +-- +-- * \"Content-Transfer-Encoding\"s are always ignored. +-- +-- * RFC 2388 says that non-ASCII field names are encoded according +-- to the method in RFC 2047 +-- , but they aren't +-- decoded. +parseMultipartFormData ∷ Ascii -- ^boundary + → LS.ByteString -- ^input + → Either String [(Ascii, FormData)] +parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go where - boundaryP ∷ Parser BS.ByteString - boundaryP = string ("--" ⊕ A.toByteString boundary) - - "boundaryP" - -partP ∷ Parser α → Parser Part -partP boundaryP - = do crlf - hs ← headersP - d ← getContDispo hs - body ← bodyP boundaryP - return $ Part hs d body + go ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → m [Part] + {-# INLINEABLE go #-} + go src + = case LP.parse (prologue boundary) src of + LP.Done src' _ + → go' src' (∅) + LP.Fail _ eCtx e + → throwError $ "Unparsable multipart/form-data: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + go' ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → Seq Part + → m [Part] + {-# INLINEABLE go' #-} + go' src xs + = case LP.parse epilogue src of + LP.Done _ _ + → return $ toList xs + LP.Fail _ _ _ + → do (src', x) ← parsePart boundary src + go' src' $ xs ⊳ x + +prologue ∷ Ascii → Parser () +prologue boundary + = ( (string "--" "prefix") + *> + (string (A.toByteString boundary) "boundary") + *> + pure () + ) - "partP" + "prologue" + +epilogue ∷ Parser () +epilogue = ( (string "--" "suffix") + *> + crlf + *> + endOfInput + ) + + "epilogue" + +parsePart ∷ (Functor m, MonadError String m) + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, Part) +{-# INLINEABLE parsePart #-} +parsePart boundary src + = case LP.parse partHeader src of + LP.Done src' hdrs + → do dispo ← getContDispo hdrs + cType ← fromMaybe defaultCType <$> getContType hdrs + (body, src'') + ← getBody boundary src' + return (src'', Part dispo cType body) + LP.Fail _ eCtx e + → throwError $ "unparsable part: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + where + defaultCType ∷ MIMEType + defaultCType = parseMIMEType "text/plain" + +partHeader ∷ Parser Headers +partHeader = crlf *> headers + +getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo +{-# INLINEABLE getContDispo #-} +getContDispo hdrs + = case getHeader "Content-Disposition" hdrs of + Nothing + → throwError "Content-Disposition is missing" + Just str + → case parseOnly p $ A.toByteString str of + Right d → return d + Left err → throwError $ "malformed Content-Disposition: " + ⧺ A.toString str + ⧺ ": " + ⧺ err + where + p = do dispo ← contentDisposition + endOfInput + return dispo -bodyP ∷ Parser α → Parser LS.ByteString -bodyP boundaryP - = manyOctetsTill anyWord8 (try $ crlf *> boundaryP) +contentDisposition ∷ Parser ContDispo +contentDisposition + = (ContDispo <$> (A.toCIAscii <$> token) ⊛ mimeParams) - "bodyP" + "contentDisposition" + +getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType) +{-# INLINEABLE getContType #-} +getContType hdrs + = case getHeader "Content-Type" hdrs of + Nothing + → return Nothing + Just str + → case parseOnly p $ A.toByteString str of + Right d → return $ Just d + Left err → throwError $ "malformed Content-Type: " + ⧺ A.toString str + ⧺ ": " + ⧺ err + where + p = do t ← mimeType + endOfInput + return t + +getBody ∷ MonadError String m + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, LS.ByteString) +{-# INLINEABLE getBody #-} +getBody boundary src + = case breakFindAfter (A.toByteString boundary) src of + ((before, after), True) + → return (before, after) + _ → throwError "missing boundary" -partToFormPair ∷ Monad m ⇒ Part → m (Maybe (Text, FormData)) +partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData) {-# INLINEABLE partToFormPair #-} -partToFormPair pt - | dType (ptContDispo pt) ≡ "form-data" +partToFormPair pt@(Part {..}) + | dType ptContDispo ≡ "form-data" = do name ← partName pt - let fname = partFileName pt - let fd = FormData { - fdFileName = fname - , fdContent = ptBody pt - } - return $ Just (name, fd) + let fd = FormData { + fdFileName = partFileName pt + , fdMIMEType = ptContType + , fdContent = ptBody + } + return (name, fd) | otherwise - = return Nothing + = throwError $ "disposition type is not \"form-data\": " + ⧺ A.toString (A.fromCIAscii $ dType ptContDispo) -partName ∷ Monad m ⇒ Part → m Text +partName ∷ MonadError String m ⇒ Part → m Ascii {-# INLINEABLE partName #-} partName (Part {..}) = case M.lookup "name" $ dParams ptContDispo of Just name - → return name + → case A.fromText name of + Just a → return a + Nothing → throwError $ "Non-ascii part name: " + ⧺ T.unpack name Nothing - → fail ("form-data without name: " ⧺ - A.toString (printContDispo ptContDispo)) + → throwError $ "form-data without name: " + ⧺ A.toString (printContDispo ptContDispo) partFileName ∷ Part → Maybe Text -{-# INLINEABLE partFileName #-} partFileName (Part {..}) = M.lookup "filename" $ dParams ptContDispo - -getContDispo ∷ Monad m ⇒ Headers → m ContDispo -{-# INLINEABLE getContDispo #-} -getContDispo hdr - = case getHeader "Content-Disposition" hdr of - Nothing - → fail "There is a part without Content-Disposition in the multipart/form-data." - Just str - → let p = do d ← contDispoP - endOfInput - return d - bs = A.toByteString str - in - case parseOnly p bs of - Right d → return d - Left err → fail (concat [ "Unparsable Content-Disposition: " - , BS.unpack bs - , ": " - , err - ]) - -contDispoP ∷ Parser ContDispo -{-# INLINEABLE contDispoP #-} -contDispoP - = do dispoType ← A.toCIAscii <$> token - params ← paramsP - return $ ContDispo dispoType params - - "contDispoP"