{-# LANGUAGE DoAndIfThenElse , FlexibleContexts , OverloadedStrings , QuasiQuotes , RecordWildCards , ScopedTypeVariables , UnicodeSyntax , ViewPatterns #-} -- |Parse \"multipart/form-data\" based on RFC 2388: -- -- -- You usually don't have to use this module directly. module Network.HTTP.Lucu.MultipartForm ( FormData(..) , parseMultipartFormData ) where import Control.Applicative hiding (many) import Control.Applicative.Unicode hiding ((∅)) import Control.Monad.Error import Control.Monad.Unicode import Data.Ascii (Ascii, CIAscii) import qualified Data.Ascii as A import Data.Attoparsec import qualified Data.Attoparsec.Lazy as LP import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LS import Data.ByteString.Lazy.Search import Data.Foldable import Data.List import qualified Data.Map as M import Data.Maybe import Data.Monoid.Unicode import Data.Sequence (Seq) import Data.Sequence.Unicode hiding ((∅)) import Data.Text (Text) import qualified Data.Text as T import Network.HTTP.Lucu.Headers import Network.HTTP.Lucu.MIMEParams import Network.HTTP.Lucu.MIMEType (MIMEType) import qualified Network.HTTP.Lucu.MIMEType as MT import Network.HTTP.Lucu.MIMEType.TH import Network.HTTP.Lucu.Parser import Network.HTTP.Lucu.Parser.Http import Prelude.Unicode -- |'FormData' represents a form value and possibly an uploaded file -- name. data FormData = FormData { -- | @'Nothing'@ for non-file values. fdFileName ∷ !(Maybe Text) -- | MIME Type of this value, defaulted to \"text/plain\". , fdMIMEType ∷ !MIMEType -- | The form value. , fdContent ∷ !(LS.ByteString) } data Part = Part { ptContDispo ∷ !ContDispo , ptContType ∷ !MIMEType , ptBody ∷ !LS.ByteString } data ContDispo = ContDispo { dType ∷ !CIAscii , dParams ∷ !MIMEParams } printContDispo ∷ ContDispo → Ascii printContDispo d = A.fromAsciiBuilder ( A.toAsciiBuilder (A.fromCIAscii $ dType d) ⊕ printMIMEParams (dParams d) ) -- |Parse \"multipart/form-data\" and return either @'Left' err@ or -- @'Right' result@. Note that there are currently the following -- limitations: -- -- * Multiple files embedded as \"multipart/mixed\" within the -- \"multipart/form-data\" won't be decomposed. -- -- * \"Content-Transfer-Encoding\" is always ignored. -- -- * RFC 2388 () says -- that non-ASCII field names are encoded according to the method in -- RFC 2047 (), but they won't -- be decoded. parseMultipartFormData ∷ Ascii -- ^boundary → LS.ByteString -- ^input → Either String [(Ascii, FormData)] parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go where go ∷ (Functor m, MonadError String m) ⇒ LS.ByteString → m [Part] {-# INLINEABLE go #-} go src = case LP.parse (prologue boundary) src of LP.Done src' _ → go' src' (∅) LP.Fail _ eCtx e → throwError $ "Unparsable multipart/form-data: " ⧺ intercalate ", " eCtx ⧺ ": " ⧺ e go' ∷ (Functor m, MonadError String m) ⇒ LS.ByteString → Seq Part → m [Part] {-# INLINEABLE go' #-} go' src xs = case LP.parse epilogue src of LP.Done _ _ → return $ toList xs LP.Fail _ _ _ → do (src', x) ← parsePart boundary src go' src' $ xs ⊳ x prologue ∷ Ascii → Parser () prologue boundary = ( (string "--" "prefix") *> (string (A.toByteString boundary) "boundary") *> pure () ) "prologue" epilogue ∷ Parser () epilogue = finishOff ((string "--" "suffix") *> crlf) "epilogue" parsePart ∷ (Functor m, MonadError String m) ⇒ Ascii → LS.ByteString → m (LS.ByteString, Part) {-# INLINEABLE parsePart #-} parsePart boundary src = case LP.parse partHeader src of LP.Done src' hdrs → do dispo ← getContDispo hdrs cType ← fromMaybe defaultCType <$> getContType hdrs (body, src'') ← getBody boundary src' return (src'', Part dispo cType body) LP.Fail _ eCtx e → throwError $ "unparsable part: " ⧺ intercalate ", " eCtx ⧺ ": " ⧺ e where defaultCType ∷ MIMEType defaultCType = [mimeType| text/plain |] partHeader ∷ Parser Headers partHeader = crlf *> headers getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo {-# INLINEABLE getContDispo #-} getContDispo hdrs = case getHeader "Content-Disposition" hdrs of Nothing → throwError "Content-Disposition is missing" Just str → case parseOnly (finishOff contentDisposition) $ A.toByteString str of Right d → return d Left err → throwError $ "malformed Content-Disposition: " ⧺ A.toString str ⧺ ": " ⧺ err contentDisposition ∷ Parser ContDispo contentDisposition = (ContDispo <$> (A.toCIAscii <$> token) ⊛ mimeParams) "contentDisposition" getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType) {-# INLINEABLE getContType #-} getContType hdrs = case getHeader "Content-Type" hdrs of Nothing → return Nothing Just str → case parseOnly (finishOff MT.mimeType) $ A.toByteString str of Right d → return $ Just d Left err → throwError $ "malformed Content-Type: " ⧺ A.toString str ⧺ ": " ⧺ err getBody ∷ MonadError String m ⇒ Ascii → LS.ByteString → m (LS.ByteString, LS.ByteString) {-# INLINEABLE getBody #-} getBody (("\r\n--" ⊕) ∘ A.toByteString → boundary) src = case breakOn boundary src of (before, after) | LS.null after → throwError "missing boundary" | otherwise → let len = fromIntegral $ BS.length boundary after' = LS.drop len after in return (before, after') partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData) {-# INLINEABLE partToFormPair #-} partToFormPair pt@(Part {..}) | dType ptContDispo ≡ "form-data" = do name ← partName pt let fd = FormData { fdFileName = partFileName pt , fdMIMEType = ptContType , fdContent = ptBody } return (name, fd) | otherwise = throwError $ "disposition type is not \"form-data\": " ⧺ A.toString (A.fromCIAscii $ dType ptContDispo) partName ∷ MonadError String m ⇒ Part → m Ascii {-# INLINEABLE partName #-} partName (Part {..}) = case M.lookup "name" params of Just name → case A.fromText name of Just a → return a Nothing → throwError $ "Non-ascii part name: " ⧺ T.unpack name Nothing → throwError $ "form-data without name: " ⧺ A.toString (printContDispo ptContDispo) where params = case dParams ptContDispo of MIMEParams m → m partFileName ∷ Part → Maybe Text partFileName (dParams ∘ ptContDispo → MIMEParams m) = M.lookup "filename" m