X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMultipartForm.hs;h=53174fa94d0a2f0ac448e4a53182800e5da64c43;hb=a362be1c8664306b970c32e1df9b62081498feb1;hp=c36b81905cb147ee37eb28c1463bcf8e51069dee;hpb=db4b61223e0d8b34079d3b190fb3e3644b0b4866;p=Lucu.git diff --git a/Network/HTTP/Lucu/MultipartForm.hs b/Network/HTTP/Lucu/MultipartForm.hs index c36b819..53174fa 100644 --- a/Network/HTTP/Lucu/MultipartForm.hs +++ b/Network/HTTP/Lucu/MultipartForm.hs @@ -1,50 +1,62 @@ {-# LANGUAGE DoAndIfThenElse + , FlexibleContexts , OverloadedStrings , RecordWildCards , ScopedTypeVariables , UnicodeSyntax #-} +-- |Parse \"multipart/form-data\" based on RFC 2388: +-- +-- +-- You usually don't have to use this module directly. module Network.HTTP.Lucu.MultipartForm ( FormData(..) - , multipartFormP + , parseMultipartFormData ) where import Control.Applicative hiding (many) +import Control.Applicative.Unicode hiding ((∅)) +import Control.Monad.Error +import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii) import qualified Data.Ascii as A -import Data.Attoparsec.Char8 -import qualified Data.ByteString.Char8 as BS +import Data.Attoparsec +import qualified Data.Attoparsec.Lazy as LP import qualified Data.ByteString.Lazy.Char8 as LS +import Data.ByteString.Lazy.Search +import Data.Foldable +import Data.List import Data.Map (Map) import qualified Data.Map as M import Data.Maybe import Data.Monoid.Unicode +import Data.Sequence (Seq) +import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP.Lucu.Headers +import Network.HTTP.Lucu.MIMEType import Network.HTTP.Lucu.Parser.Http import Network.HTTP.Lucu.RFC2231 import Prelude.Unicode --- |This data type represents a form value and possibly an uploaded --- file name. +-- |'FormData' represents a form value and possibly an uploaded file +-- name. data FormData = FormData { - fdFileName ∷ Maybe Text - , fdContent ∷ LS.ByteString + fdFileName ∷ !(Maybe Text) + , fdMIMEType ∷ !MIMEType + , fdContent ∷ !(LS.ByteString) } data Part = Part { - ptHeaders ∷ Headers - , ptContDispo ∷ ContDispo - , ptBody ∷ LS.ByteString + ptContDispo ∷ !ContDispo + , ptContType ∷ !MIMEType + , ptBody ∷ !LS.ByteString } -instance HasHeaders Part where - getHeaders = ptHeaders - setHeaders pt hs = pt { ptHeaders = hs } - data ContDispo = ContDispo { dType ∷ !CIAscii @@ -53,91 +65,182 @@ data ContDispo printContDispo ∷ ContDispo → Ascii printContDispo d - = A.fromAsciiBuilder $ + = A.fromAsciiBuilder ( A.toAsciiBuilder (A.fromCIAscii $ dType d) ⊕ - printParams (dParams d) ) - -multipartFormP ∷ Ascii → Parser [(Text, FormData)] -multipartFormP boundary - = do parts ← many $ try $ partP boundary - _ ← string "--" - _ ← string $ A.toByteString boundary - _ ← string "--" - crlf - catMaybes <$> mapM partToFormPair parts - -partP ∷ Ascii → Parser Part -partP boundary - = do _ ← string "--" - _ ← string $ A.toByteString boundary - crlf - hs ← headersP - d ← getContDispo hs - body ← bodyP boundary - return $ Part hs d body - -bodyP ∷ Ascii → Parser LS.ByteString -bodyP boundary - = do body ← manyCharsTill anyChar $ - try $ - do crlf - _ ← string "--" - _ ← string $ A.toByteString boundary - return () - crlf - return body - -partToFormPair ∷ Monad m ⇒ Part → m (Maybe (Text, FormData)) + printMIMEParams (dParams d) ) + +-- |Parse \"multipart/form-data\" and return either @'Left' err@ or +-- @'Right' result@. Note that there are currently the following +-- limitations: +-- +-- * Multiple files embedded as \"multipart/mixed\" within the +-- \"multipart/form-data\" aren't decomposed. +-- +-- * \"Content-Transfer-Encoding\"s are always ignored. +-- +-- * RFC 2388 says that non-ASCII field names are encoded according +-- to the method in RFC 2047 +-- , but they aren't +-- decoded. +parseMultipartFormData ∷ Ascii -- ^boundary + → LS.ByteString -- ^input + → Either String [(Ascii, FormData)] +parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go + where + go ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → m [Part] + {-# INLINEABLE go #-} + go src + = case LP.parse (prologue boundary) src of + LP.Done src' _ + → go' src' (∅) + LP.Fail _ eCtx e + → throwError $ "Unparsable multipart/form-data: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + go' ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → Seq Part + → m [Part] + {-# INLINEABLE go' #-} + go' src xs + = case LP.parse epilogue src of + LP.Done _ _ + → return $ toList xs + LP.Fail _ _ _ + → do (src', x) ← parsePart boundary src + go' src' $ xs ⊳ x + +prologue ∷ Ascii → Parser () +prologue boundary + = ( (string "--" "prefix") + *> + (string (A.toByteString boundary) "boundary") + *> + pure () + ) + + "prologue" + +epilogue ∷ Parser () +epilogue = ( (string "--" "suffix") + *> + crlf + *> + endOfInput + ) + + "epilogue" + +parsePart ∷ (Functor m, MonadError String m) + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, Part) +{-# INLINEABLE parsePart #-} +parsePart boundary src + = case LP.parse partHeader src of + LP.Done src' hdrs + → do dispo ← getContDispo hdrs + cType ← fromMaybe defaultCType <$> getContType hdrs + (body, src'') + ← getBody boundary src' + return (src'', Part dispo cType body) + LP.Fail _ eCtx e + → throwError $ "unparsable part: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + where + defaultCType ∷ MIMEType + defaultCType = parseMIMEType "text/plain" + +partHeader ∷ Parser Headers +partHeader = crlf *> headers + +getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo +{-# INLINEABLE getContDispo #-} +getContDispo hdrs + = case getHeader "Content-Disposition" hdrs of + Nothing + → throwError "Content-Disposition is missing" + Just str + → case parseOnly p $ A.toByteString str of + Right d → return d + Left err → throwError $ "malformed Content-Disposition: " + ⧺ A.toString str + ⧺ ": " + ⧺ err + where + p = do dispo ← contentDisposition + endOfInput + return dispo + +contentDisposition ∷ Parser ContDispo +contentDisposition + = (ContDispo <$> (A.toCIAscii <$> token) ⊛ mimeParams) + + "contentDisposition" + +getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType) +{-# INLINEABLE getContType #-} +getContType hdrs + = case getHeader "Content-Type" hdrs of + Nothing + → return Nothing + Just str + → case parseOnly p $ A.toByteString str of + Right d → return $ Just d + Left err → throwError $ "malformed Content-Type: " + ⧺ A.toString str + ⧺ ": " + ⧺ err + where + p = do t ← mimeType + endOfInput + return t + +getBody ∷ MonadError String m + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, LS.ByteString) +{-# INLINEABLE getBody #-} +getBody boundary src + = case breakFindAfter (A.toByteString boundary) src of + ((before, after), True) + → return (before, after) + _ → throwError "missing boundary" + +partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData) {-# INLINEABLE partToFormPair #-} -partToFormPair pt - | dType (ptContDispo pt) ≡ "form-data" +partToFormPair pt@(Part {..}) + | dType ptContDispo ≡ "form-data" = do name ← partName pt - let fname = partFileName pt - let fd = FormData { - fdFileName = fname - , fdContent = ptBody pt - } - return $ Just (name, fd) + let fd = FormData { + fdFileName = partFileName pt + , fdMIMEType = ptContType + , fdContent = ptBody + } + return (name, fd) | otherwise - = return Nothing + = throwError $ "disposition type is not \"form-data\": " + ⧺ A.toString (A.fromCIAscii $ dType ptContDispo) -partName ∷ Monad m ⇒ Part → m Text +partName ∷ MonadError String m ⇒ Part → m Ascii {-# INLINEABLE partName #-} partName (Part {..}) = case M.lookup "name" $ dParams ptContDispo of Just name - → return name + → case A.fromText name of + Just a → return a + Nothing → throwError $ "Non-ascii part name: " + ⧺ T.unpack name Nothing - → fail ("form-data without name: " ⧺ - A.toString (printContDispo ptContDispo)) + → throwError $ "form-data without name: " + ⧺ A.toString (printContDispo ptContDispo) partFileName ∷ Part → Maybe Text -{-# INLINEABLE partFileName #-} partFileName (Part {..}) = M.lookup "filename" $ dParams ptContDispo - -getContDispo ∷ Monad m ⇒ Headers → m ContDispo -{-# INLINEABLE getContDispo #-} -getContDispo hdr - = case getHeader "Content-Disposition" hdr of - Nothing - → fail ("There is a part without Content-Disposition in the multipart/form-data.") - Just str - → let p = do d ← contDispoP - endOfInput - return d - bs = A.toByteString str - in - case parseOnly p bs of - Right d → return d - Left err → fail (concat [ "Unparsable Content-Disposition: " - , BS.unpack bs - , ": " - , err - ]) - -contDispoP ∷ Parser ContDispo -contDispoP = do dispoType ← A.toCIAscii <$> token - params ← paramsP - return $ ContDispo dispoType params