11 -- |Parse \"multipart/form-data\" based on RFC 2388:
12 -- <http://tools.ietf.org/html/rfc2388>
13 module Network.HTTP.Lucu.MultipartForm
15 , parseMultipartFormData
18 import Control.Applicative hiding (many)
19 import Control.Applicative.Unicode hiding ((∅))
20 import Control.Monad.Error (MonadError, throwError)
21 import Control.Monad.Unicode
22 import Data.Ascii (Ascii, CIAscii)
23 import qualified Data.Ascii as A
24 import Data.Attoparsec
25 import qualified Data.Attoparsec.Lazy as LP
26 import qualified Data.ByteString as BS
27 import qualified Data.ByteString.Lazy as LS
28 import Data.ByteString.Lazy.Search
29 import Data.Collections
30 import Data.List (intercalate)
32 import Data.Monoid.Unicode
33 import Data.Sequence (Seq)
34 import Data.Text (Text)
35 import qualified Data.Text as T
36 import Network.HTTP.Lucu.Headers
37 import Network.HTTP.Lucu.MIMEParams
38 import Network.HTTP.Lucu.MIMEType (MIMEType)
39 import qualified Network.HTTP.Lucu.MIMEType as MT
40 import Network.HTTP.Lucu.MIMEType.TH
41 import Network.HTTP.Lucu.Parser
42 import Network.HTTP.Lucu.Parser.Http
43 import Network.HTTP.Lucu.Utils
44 import Prelude hiding (lookup, mapM)
45 import Prelude.Unicode
47 -- |'FormData' represents a form value and possibly an uploaded file
51 -- | @'Nothing'@ for non-file values.
52 fdFileName ∷ !(Maybe Text)
53 -- | MIME Type of this value, defaulted to \"text/plain\".
54 , fdMIMEType ∷ !MIMEType
56 , fdContent ∷ !(LS.ByteString)
61 ptContDispo ∷ !ContDispo
62 , ptContType ∷ !MIMEType
63 , ptBody ∷ !LS.ByteString
69 , dParams ∷ !MIMEParams
72 printContDispo ∷ ContDispo → Ascii
75 ( A.toAsciiBuilder (A.fromCIAscii $ dType d)
77 printMIMEParams (dParams d) )
79 -- |Parse \"multipart/form-data\" and return either @'Left' err@ or
80 -- @'Right' result@. Note that there are currently the following
83 -- * Multiple files embedded as \"multipart/mixed\" within the
84 -- \"multipart/form-data\" won't be decomposed.
86 -- * \"Content-Transfer-Encoding\" is always ignored.
88 -- * RFC 2388 (<http://tools.ietf.org/html/rfc2388#section-3>) says
89 -- that non-ASCII field names are encoded according to the method in
90 -- RFC 2047 (<http://tools.ietf.org/html/rfc2047>), but they won't
92 parseMultipartFormData ∷ Ascii -- ^boundary
93 → LS.ByteString -- ^input
94 → Either String [(Ascii, FormData)]
95 parseMultipartFormData boundary = (mapM partToFormPair =≪) ∘ go
97 go ∷ (Functor m, MonadError String m)
100 {-# INLINEABLE go #-}
102 = case LP.parse (prologue boundary) src of
106 → throwError $ "Unparsable multipart/form-data: "
107 ⧺ intercalate ", " eCtx
110 go' ∷ (Functor m, MonadError String m)
114 {-# INLINEABLE go' #-}
116 = case LP.parse epilogue src of
120 → do (src', x) ← parsePart boundary src
123 prologue ∷ Ascii → Parser ()
125 = ( (string "--" <?> "prefix")
127 (string (A.toByteString boundary) <?> "boundary")
135 epilogue = finishOff ((string "--" <?> "suffix") *> crlf)
139 parsePart ∷ (Functor m, MonadError String m)
142 → m (LS.ByteString, Part)
143 {-# INLINEABLE parsePart #-}
144 parsePart boundary src
145 = case LP.parse partHeader src of
147 → do dispo ← getContDispo hdrs
148 cType ← fromMaybe defaultCType <$> getContType hdrs
150 ← getBody boundary src'
151 return (src'', Part dispo cType body)
153 → throwError $ "unparsable part: "
154 ⧺ intercalate ", " eCtx
158 defaultCType ∷ MIMEType
159 defaultCType = [mimeType| text/plain |]
161 partHeader ∷ Parser Headers
162 partHeader = crlf *> headers
164 getContDispo ∷ MonadError String m ⇒ Headers → m ContDispo
165 {-# INLINEABLE getContDispo #-}
167 = case getHeader "Content-Disposition" hdrs of
169 → throwError "Content-Disposition is missing"
171 → case parseOnly (finishOff contentDisposition) $ A.toByteString str of
173 Left err → throwError $ "malformed Content-Disposition: "
178 contentDisposition ∷ Parser ContDispo
180 = (ContDispo <$> (A.toCIAscii <$> token) ⊛ mimeParams)
184 getContType ∷ MonadError String m ⇒ Headers → m (Maybe MIMEType)
185 {-# INLINEABLE getContType #-}
187 = case getHeader "Content-Type" hdrs of
191 → case parseOnly (finishOff MT.mimeType) $ A.toByteString str of
192 Right d → return $ Just d
193 Left err → throwError $ "malformed Content-Type: "
198 getBody ∷ MonadError String m
201 → m (LS.ByteString, LS.ByteString)
202 {-# INLINEABLE getBody #-}
203 getBody (("\r\n--" ⊕) ∘ A.toByteString → boundary) src
204 = case breakOn boundary src of
207 → throwError "missing boundary"
209 → let len = fromIntegral $ BS.length boundary
210 after' = LS.drop len after
212 return (before, after')
214 partToFormPair ∷ MonadError String m ⇒ Part → m (Ascii, FormData)
215 {-# INLINEABLE partToFormPair #-}
216 partToFormPair pt@(Part {..})
217 | dType ptContDispo ≡ "form-data"
218 = do name ← partName pt
220 fdFileName = partFileName pt
221 , fdMIMEType = ptContType
226 = throwError $ "disposition type is not \"form-data\": "
227 ⧺ A.toString (A.fromCIAscii $ dType ptContDispo)
229 partName ∷ MonadError String m ⇒ Part → m Ascii
230 {-# INLINEABLE partName #-}
232 = case lookup "name" $ dParams ptContDispo of
234 → case A.fromText name of
236 Nothing → throwError $ "Non-ascii part name: "
239 → throwError $ "form-data without name: "
240 ⧺ A.toString (printContDispo ptContDispo)
242 partFileName ∷ Part → Maybe Text
243 partFileName (ptContDispo → ContDispo {..})
244 = lookup "filename" dParams