X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FMultipartForm.hs;h=98699e43ca37d2e2a2978236130f50b6a705c04a;hp=10d1f64276f07307c581e66d463492c3ef7358d6;hb=90fca0675b1694e69b8e431c989343855cbd125d;hpb=05f49fae07dfcac0c039f25c8a51123603918a93 diff --git a/Network/HTTP/Lucu/MultipartForm.hs b/Network/HTTP/Lucu/MultipartForm.hs index 10d1f64..98699e4 100644 --- a/Network/HTTP/Lucu/MultipartForm.hs +++ b/Network/HTTP/Lucu/MultipartForm.hs @@ -1,158 +1,255 @@ {-# LANGUAGE DoAndIfThenElse + , FlexibleInstances + , FlexibleContexts + , MultiParamTypeClasses , OverloadedStrings + , QuasiQuotes + , RecordWildCards , ScopedTypeVariables + , TemplateHaskell , UnicodeSyntax + , ViewPatterns #-} +-- |Parse \"multipart/form-data\" based on RFC 2388: +-- module Network.HTTP.Lucu.MultipartForm ( FormData(..) - , multipartFormP + , parseMultipartFormData ) where import Control.Applicative hiding (many) +import Control.Applicative.Unicode hiding ((∅)) +import Control.Monad.Error (MonadError, throwError) +import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii, AsciiBuilder) -import qualified Data.Ascii as A -import Data.Attoparsec.Char8 -import qualified Data.ByteString.Char8 as BS -import qualified Data.ByteString.Lazy.Char8 as LS -import Data.Char -import Data.List -import Data.Map (Map) +import Data.Attempt +import Data.Attoparsec +import qualified Data.Attoparsec.Lazy as LP +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as LS +import Data.ByteString.Lazy.Search +import Data.Collections +import Data.Convertible.Base +import Data.Convertible.Instances.Ascii () +import Data.Convertible.Utils +import Data.Default +import Data.List (intercalate) import Data.Maybe import Data.Monoid.Unicode +import Data.Sequence (Seq) import Data.Text (Text) import Network.HTTP.Lucu.Headers +import Network.HTTP.Lucu.MIMEParams +import Network.HTTP.Lucu.MIMEType +import Network.HTTP.Lucu.Parser import Network.HTTP.Lucu.Parser.Http -import Network.HTTP.Lucu.RFC2231 -import Network.HTTP.Lucu.Response import Network.HTTP.Lucu.Utils +import Prelude hiding (lookup, mapM) import Prelude.Unicode --- |This data type represents a form value and possibly an uploaded --- file name. +-- |'FormData' represents a form value and possibly an uploaded file +-- name. data FormData = FormData { - fdFileName ∷ Maybe Text - , fdContent ∷ LS.ByteString + -- | @'Nothing'@ for non-file values. + fdFileName ∷ !(Maybe Text) + -- | MIME Type of this value, defaulted to \"text/plain\". + , fdMIMEType ∷ !MIMEType + -- | The form value. + , fdContent ∷ !(LS.ByteString) } data Part = Part { - ptHeaders ∷ Headers - , ptContDispo ∷ ContDispo - , ptBody ∷ LS.ByteString + ptContDispo ∷ !ContDispo + , ptContType ∷ !MIMEType + , ptBody ∷ !LS.ByteString } -instance HasHeaders Part where - getHeaders = ptHeaders - setHeaders pt hs = pt { ptHeaders = hs } - data ContDispo = ContDispo { dType ∷ !CIAscii - , dParams ∷ !(Map CIAscii Text) + , dParams ∷ !MIMEParams } -printContDispo ∷ ContDispo → Ascii -printContDispo d - = A.fromAsciiBuilder $ - ( A.toAsciiBuilder (A.fromCIAscii $ dType d) - ⊕ - printParams (dParams d) ) - -multipartFormP ∷ Ascii → Parser [(Text, FormData)] -multipartFormP boundary - = try $ - do parts ← many (partP boundary) - _ ← string "--" - _ ← string $ A.toByteString boundary - _ ← string "--" - crlf - catMaybes <$> mapM partToFormPair parts - -partP ∷ Ascii → Parser Part -partP boundary - = try $ - do _ ← string "--" - _ ← string $ A.toByteString boundary - crlf - hs ← headersP - d ← getContDispo hs - body ← bodyP boundary - return $ Part hs d body - -bodyP ∷ Ascii → Parser LS.ByteString -bodyP boundary - = try $ - do body ← manyCharsTill anyChar $ - try $ - do crlf - _ ← string "--" - _ ← string $ A.toByteString boundary - return () - crlf - return body - -partToFormPair ∷ Monad m ⇒ Part → m (Maybe (Text, FormData)) +instance ConvertSuccess ContDispo Ascii where + {-# INLINE convertSuccess #-} + convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder) + +instance ConvertSuccess ContDispo AsciiBuilder where + {-# INLINE convertSuccess #-} + convertSuccess (ContDispo {..}) + = cs dType ⊕ cs dParams + +deriveAttempts [ ([t| ContDispo |], [t| Ascii |]) + , ([t| ContDispo |], [t| AsciiBuilder |]) + ] + +-- |Parse \"multipart/form-data\" to a list of @(name, +-- formData)@. Note that there are currently the following +-- limitations: +-- +-- * Multiple files embedded as \"multipart/mixed\" within the +-- \"multipart/form-data\" won't be decomposed. +-- +-- * \"Content-Transfer-Encoding\" is always ignored. +-- +-- * RFC 2388 () says +-- that non-ASCII field names are encoded according to the method +-- in RFC 2047 (), but this +-- function currently doesn't decode them. +parseMultipartFormData ∷ Ascii -- ^boundary + → LS.ByteString -- ^input + → Either String [(Ascii, FormData)] +parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go + where + go ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → m [Part] + {-# INLINEABLE go #-} + go src + = case LP.parse (prologue boundary) src of + LP.Done src' _ + → go' src' (∅) + LP.Fail _ eCtx e + → throwError $ "Unparsable multipart/form-data: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + go' ∷ (Functor m, MonadError String m) + ⇒ LS.ByteString + → Seq Part + → m [Part] + {-# INLINEABLE go' #-} + go' src xs + = case LP.parse epilogue src of + LP.Done _ _ + → return $ toList xs + LP.Fail _ _ _ + → do (src', x) ← parsePart boundary src + go' src' $ xs ⊳ x + +prologue ∷ Ascii → Parser () +prologue boundary + = ( (string "--" "prefix") + *> + (string (cs boundary) "boundary") + *> + pure () + ) + + "prologue" + +epilogue ∷ Parser () +epilogue = finishOff ((string "--" "suffix") *> crlf) + + "epilogue" + +parsePart ∷ (Functor m, MonadError String m) + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, Part) +{-# INLINEABLE parsePart #-} +parsePart boundary src + = case LP.parse partHeader src of + LP.Done src' hdrs + → do dispo ← getContDispo hdrs + cType ← fromMaybe defaultCType <$> getContType hdrs + (body, src'') + ← getBody boundary src' + return (src'', Part dispo cType body) + LP.Fail _ eCtx e + → throwError $ "unparsable part: " + ⧺ intercalate ", " eCtx + ⧺ ": " + ⧺ e + where + defaultCType ∷ MIMEType + defaultCType = [mimeType| text/plain |] + +partHeader ∷ Parser Headers +{-# INLINE partHeader #-} +partHeader = crlf *> def + +getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo +{-# INLINEABLE getContDispo #-} +getContDispo hdrs + = case getHeader "Content-Disposition" hdrs of + Nothing + → throwError "Content-Disposition is missing" + Just str + → case parseOnly (finishOff contentDisposition) $ cs str of + Right d → return d + Left err → throwError $ "malformed Content-Disposition: " + ⊕ cs str + ⊕ ": " + ⊕ err + +contentDisposition ∷ Parser ContDispo +{-# INLINEABLE contentDisposition #-} +contentDisposition + = (ContDispo <$> (cs <$> token) ⊛ def) + + "contentDisposition" + +getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType) +{-# INLINEABLE getContType #-} +getContType hdrs + = case getHeader "Content-Type" hdrs of + Nothing + → return Nothing + Just str + → case parseOnly (finishOff def) $ cs str of + Right d → return $ Just d + Left err → throwError $ "malformed Content-Type: " + ⊕ cs str + ⊕ ": " + ⊕ err + +getBody ∷ MonadError String m + ⇒ Ascii + → LS.ByteString + → m (LS.ByteString, LS.ByteString) +{-# INLINEABLE getBody #-} +getBody (("\r\n--" ⊕) ∘ cs → boundary) src + = case breakOn boundary src of + (before, after) + | LS.null after + → throwError "missing boundary" + | otherwise + → let len = fromIntegral $ BS.length boundary + after' = LS.drop len after + in + return (before, after') + +partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData) {-# INLINEABLE partToFormPair #-} -partToFormPair pt - | dType (ptContDispo pt) ≡ "form-data" - = do name ← partName pt - let fname = partFileName pt - let fd = FormData { - fdFileName = fname - , fdContent = ptBody pt - } - return $ Just (name, fd) +partToFormPair pt@(Part {..}) + | dType ptContDispo ≡ "form-data" + = do name ← partName pt + let fd = FormData { + fdFileName = partFileName pt + , fdMIMEType = ptContType + , fdContent = ptBody + } + return (name, fd) | otherwise - = return Nothing + = throwError $ "disposition type is not \"form-data\": " + ⊕ cs (dType ptContDispo) -partName ∷ Monad m ⇒ Part → m Text +partName ∷ MonadError String m ⇒ Part → m Ascii {-# INLINEABLE partName #-} -partName pt - = case find ((≡ "name") ∘ fst) $ dParams $ ptContDispo pt of - Just (_, name) - → return name +partName (Part {..}) + = case lookup "name" $ dParams ptContDispo of + Just name + → case ca name of + Success a → return a + Failure e → throwError $ show e Nothing - → fail ("form-data without name: " ⧺ - A.toString (printContDispo $ ptContDispo pt)) + → throwError $ "form-data without name: " + ⊕ convertSuccessVia ((⊥) ∷ Ascii) ptContDispo partFileName ∷ Part → Maybe Text -{-# INLINEABLE partFileName #-} -partFileName pt - = snd <$> (find ((== "filename") ∘ fst) $ dParams $ ptContDispo pt) - -getContDispo ∷ Monad m ⇒ Headers → m ContDispo -{-# INLINEABLE getContDispo #-} -getContDispo hdr - = case getHeader "Content-Disposition" hdr of - Nothing - → fail ("There is a part without Content-Disposition in the multipart/form-data.") - Just str - → let p = do d ← contDispoP - endOfInput - return d - bs = A.toByteString str - in - case parseOnly p bs of - Right d → return d - Left err → fail (concat [ "Unparsable Content-Disposition: " - , BS.unpack bs - , ": " - , err - ]) - -contDispoP ∷ Parser ContDispo -contDispoP = try $ - do dispoType ← A.toCIAscii <$> token - params ← many paramP - return $ ContDispo dispoType params - where - paramP ∷ Parser (CIAscii, Ascii) - paramP = do skipMany lws - _ ← char ';' - skipMany lws - name ← A.toCIAscii <$> token - _ ← char '=' - value ← token <|> quotedStr - return (name, value) +partFileName (ptContDispo → ContDispo {..}) + = lookup "filename" dParams