+{-# LANGUAGE
+ DoAndIfThenElse
+ , FlexibleInstances
+ , FlexibleContexts
+ , MultiParamTypeClasses
+ , OverloadedStrings
+ , QuasiQuotes
+ , RecordWildCards
+ , ScopedTypeVariables
+ , TemplateHaskell
+ , UnicodeSyntax
+ , ViewPatterns
+ #-}
+-- |Parse \"multipart/form-data\" based on RFC 2388:
+-- <http://tools.ietf.org/html/rfc2388>
module Network.HTTP.Lucu.MultipartForm
( FormData(..)
- , multipartFormP
+ , parseMultipartFormData
)
where
-
-import qualified Data.ByteString.Char8 as C8
-import qualified Data.ByteString.Lazy.Char8 as L8
-import Data.Char
-import Data.List
-import Network.HTTP.Lucu.Abortion
-import Network.HTTP.Lucu.Headers
-import Network.HTTP.Lucu.Parser
-import Network.HTTP.Lucu.Parser.Http
-import Network.HTTP.Lucu.Response
-import Network.HTTP.Lucu.Utils
-
-
-data Part = Part Headers L8.ByteString
-
--- |This data type represents a form value and possibly an uploaded
--- file name.
+import Control.Applicative hiding (many)
+import Control.Applicative.Unicode hiding ((∅))
+import Control.Monad.Error (MonadError, throwError)
+import Control.Monad.Unicode
+import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
+import Data.Attempt
+import Data.Attoparsec
+import qualified Data.Attoparsec.Lazy as LP
+import Data.Attoparsec.Parsable
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as LS
+import Data.ByteString.Lazy.Search
+import Data.Collections
+import Data.Convertible.Base
+import Data.Convertible.Instances.Ascii ()
+import Data.Convertible.Utils
+import Data.List (intercalate)
+import Data.Maybe
+import Data.Monoid.Unicode
+import Data.Sequence (Seq)
+import Data.Text (Text)
+import Network.HTTP.Lucu.Headers
+import Network.HTTP.Lucu.MIMEParams
+import Network.HTTP.Lucu.MIMEType
+import Network.HTTP.Lucu.Parser
+import Network.HTTP.Lucu.Parser.Http
+import Network.HTTP.Lucu.Utils
+import Prelude hiding (lookup, mapM)
+import Prelude.Unicode
+
+-- |'FormData' represents a form value and possibly an uploaded file
+-- name.
data FormData
= FormData {
- fdFileName :: Maybe String
- , fdContent :: L8.ByteString
+ -- | @'Nothing'@ for non-file values.
+ fdFileName ∷ !(Maybe Text)
+ -- | MIME Type of this value, defaulted to \"text/plain\".
+ , fdMIMEType ∷ !MIMEType
+ -- | The form value.
+ , fdContent ∷ !(LS.ByteString)
}
-instance HasHeaders Part where
- getHeaders (Part hs _) = hs
- setHeaders (Part _ b) hs = Part hs b
-
-
-data ContDispo = ContDispo String [(String, String)]
-
-instance Show ContDispo where
- show (ContDispo dType dParams)
- = dType ++
- if null dParams then
- ""
- else
- "; " ++ joinWith "; " (map showPair dParams)
- where
- showPair :: (String, String) -> String
- showPair (name, value)
- = name ++ "=" ++ if any (not . isToken) value then
- quoteStr value
- else
- value
-
-
-multipartFormP :: String -> Parser [(String, FormData)]
-multipartFormP boundary
- = do parts <- many (partP boundary)
- _ <- string "--"
- _ <- string boundary
- _ <- string "--"
- _ <- crlf
- eof
- return $ map partToFormPair parts
-
-
-partP :: String -> Parser Part
-partP boundary
- = do _ <- string "--"
- _ <- string boundary
- _ <- crlf -- バウンダリの末尾に -- が付いてゐたらここで fail する。
- hs <- headersP
- body <- bodyP boundary
- return $ Part hs body
-
-
-bodyP :: String -> Parser L8.ByteString
-bodyP boundary
- = do body <- manyChar $
- do notFollowedBy $ ( crlf >>
- string "--" >>
- string boundary )
- anyChar
- _ <- crlf
- return body
-
-
-partToFormPair :: Part -> (String, FormData)
-partToFormPair part@(Part _ body)
- = let name = partName part
- fname = partFileName part
- fd = FormData {
- fdFileName = fname
- , fdContent = body
- }
- in (name, fd)
-
-partName :: Part -> String
-partName = getName' . getContDispoFormData
- where
- getName' :: ContDispo -> String
- getName' dispo@(ContDispo _ dParams)
- = case find ((== "name") . map toLower . fst) dParams of
- Just (_, name) -> name
- Nothing
- -> abortPurely BadRequest []
- (Just $ "form-data without name: " ++ show dispo)
+data Part
+ = Part {
+ ptContDispo ∷ !ContDispo
+ , ptContType ∷ !MIMEType
+ , ptBody ∷ !LS.ByteString
+ }
+data ContDispo
+ = ContDispo {
+ dType ∷ !CIAscii
+ , dParams ∷ !MIMEParams
+ }
-partFileName :: Part -> Maybe String
-partFileName = getFileName' . getContDispoFormData
- where
- getFileName' :: ContDispo -> Maybe String
- getFileName' (ContDispo _ dParams)
- = do (_, fileName) <- find ((== "filename") . map toLower . fst) dParams
- return fileName
-
-getContDispoFormData :: Part -> ContDispo
-getContDispoFormData part
- = let dispo@(ContDispo dType _) = getContDispo part
- in
- if map toLower dType == "form-data" then
- dispo
- else
- abortPurely BadRequest []
- (Just $ "Content-Disposition type is not form-data: " ++ dType)
-
-
-getContDispo :: Part -> ContDispo
-getContDispo part
- = case getHeader (C8.pack "Content-Disposition") part of
- Nothing
- -> abortPurely BadRequest []
- (Just "There is a part without Content-Disposition in the multipart/form-data.")
- Just dispoStr
- -> case parse contDispoP (L8.fromChunks [dispoStr]) of
- (# Success dispo, _ #)
- -> dispo
- (# _, _ #)
- -> abortPurely BadRequest []
- (Just $ "Unparsable Content-Disposition: " ++ C8.unpack dispoStr)
-
-
-contDispoP :: Parser ContDispo
-contDispoP = do dispoType <- token
- params <- allowEOF $ many paramP
- return $ ContDispo dispoType params
+instance ConvertSuccess ContDispo Ascii where
+ {-# INLINE convertSuccess #-}
+ convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder)
+
+instance ConvertSuccess ContDispo AsciiBuilder where
+ {-# INLINE convertSuccess #-}
+ convertSuccess (ContDispo {..})
+ = cs dType ⊕ cs dParams
+
+deriveAttempts [ ([t| ContDispo |], [t| Ascii |])
+ , ([t| ContDispo |], [t| AsciiBuilder |])
+ ]
+
+-- |Parse \"multipart/form-data\" to a list of @(name,
+-- formData)@. Note that there are currently the following
+-- limitations:
+--
+-- * Multiple files embedded as \"multipart/mixed\" within the
+-- \"multipart/form-data\" won't be decomposed.
+--
+-- * \"Content-Transfer-Encoding\" is always ignored.
+--
+-- * RFC 2388 (<http://tools.ietf.org/html/rfc2388#section-3>) says
+-- that non-ASCII field names are encoded according to the method
+-- in RFC 2047 (<http://tools.ietf.org/html/rfc2047>), but this
+-- function currently doesn't decode them.
+parseMultipartFormData ∷ Ascii -- ^boundary
+ → LS.ByteString -- ^input
+ → Either String [(Ascii, FormData)]
+parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go
where
- paramP :: Parser (String, String)
- paramP = do _ <- many lws
- _ <- char ';'
- _ <- many lws
- name <- token
- _ <- char '='
- value <- token <|> quotedStr
- return (name, value)
+ go ∷ (Functor m, MonadError String m)
+ ⇒ LS.ByteString
+ → m [Part]
+ {-# INLINEABLE go #-}
+ go src
+ = case LP.parse (prologue boundary) src of
+ LP.Done src' _
+ → go' src' (∅)
+ LP.Fail _ eCtx e
+ → throwError $ "Unparsable multipart/form-data: "
+ ⧺ intercalate ", " eCtx
+ ⧺ ": "
+ ⧺ e
+ go' ∷ (Functor m, MonadError String m)
+ ⇒ LS.ByteString
+ → Seq Part
+ → m [Part]
+ {-# INLINEABLE go' #-}
+ go' src xs
+ = case LP.parse epilogue src of
+ LP.Done _ _
+ → return $ toList xs
+ LP.Fail _ _ _
+ → do (src', x) ← parsePart boundary src
+ go' src' $ xs ⊳ x
+
+prologue ∷ Ascii → Parser ()
+prologue boundary
+ = ( (string "--" <?> "prefix")
+ *>
+ (string (cs boundary) <?> "boundary")
+ *>
+ pure ()
+ )
+ <?>
+ "prologue"
+
+epilogue ∷ Parser ()
+epilogue = finishOff ((string "--" <?> "suffix") *> crlf)
+ <?>
+ "epilogue"
+
+parsePart ∷ (Functor m, MonadError String m)
+ ⇒ Ascii
+ → LS.ByteString
+ → m (LS.ByteString, Part)
+{-# INLINEABLE parsePart #-}
+parsePart boundary src
+ = case LP.parse partHeader src of
+ LP.Done src' hdrs
+ → do dispo ← getContDispo hdrs
+ cType ← fromMaybe defaultCType <$> getContType hdrs
+ (body, src'')
+ ← getBody boundary src'
+ return (src'', Part dispo cType body)
+ LP.Fail _ eCtx e
+ → throwError $ "unparsable part: "
+ ⧺ intercalate ", " eCtx
+ ⧺ ": "
+ ⧺ e
+ where
+ defaultCType ∷ MIMEType
+ defaultCType = [mimeType| text/plain |]
+
+partHeader ∷ Parser Headers
+partHeader = crlf *> parser
+
+getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo
+{-# INLINEABLE getContDispo #-}
+getContDispo hdrs
+ = case getHeader "Content-Disposition" hdrs of
+ Nothing
+ → throwError "Content-Disposition is missing"
+ Just str
+ → case parseOnly (finishOff contentDisposition) $ cs str of
+ Right d → return d
+ Left err → throwError $ "malformed Content-Disposition: "
+ ⊕ cs str
+ ⊕ ": "
+ ⊕ err
+
+contentDisposition ∷ Parser ContDispo
+contentDisposition
+ = (ContDispo <$> (cs <$> token) ⊛ parser)
+ <?>
+ "contentDisposition"
+
+getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType)
+{-# INLINEABLE getContType #-}
+getContType hdrs
+ = case getHeader "Content-Type" hdrs of
+ Nothing
+ → return Nothing
+ Just str
+ → case parseOnly (finishOff parser) $ cs str of
+ Right d → return $ Just d
+ Left err → throwError $ "malformed Content-Type: "
+ ⊕ cs str
+ ⊕ ": "
+ ⊕ err
+
+getBody ∷ MonadError String m
+ ⇒ Ascii
+ → LS.ByteString
+ → m (LS.ByteString, LS.ByteString)
+{-# INLINEABLE getBody #-}
+getBody (("\r\n--" ⊕) ∘ cs → boundary) src
+ = case breakOn boundary src of
+ (before, after)
+ | LS.null after
+ → throwError "missing boundary"
+ | otherwise
+ → let len = fromIntegral $ BS.length boundary
+ after' = LS.drop len after
+ in
+ return (before, after')
+
+partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData)
+{-# INLINEABLE partToFormPair #-}
+partToFormPair pt@(Part {..})
+ | dType ptContDispo ≡ "form-data"
+ = do name ← partName pt
+ let fd = FormData {
+ fdFileName = partFileName pt
+ , fdMIMEType = ptContType
+ , fdContent = ptBody
+ }
+ return (name, fd)
+ | otherwise
+ = throwError $ "disposition type is not \"form-data\": "
+ ⊕ cs (dType ptContDispo)
+
+partName ∷ MonadError String m ⇒ Part → m Ascii
+{-# INLINEABLE partName #-}
+partName (Part {..})
+ = case lookup "name" $ dParams ptContDispo of
+ Just name
+ → case ca name of
+ Success a → return a
+ Failure e → throwError $ show e
+ Nothing
+ → throwError $ "form-data without name: "
+ ⊕ convertSuccessVia ((⊥) ∷ Ascii) ptContDispo
+
+partFileName ∷ Part → Maybe Text
+partFileName (ptContDispo → ContDispo {..})
+ = lookup "filename" dParams