From 7bc27fc4e86df6cb4d269b42252de735247f8c57 Mon Sep 17 00:00:00 2001 From: PHO Date: Fri, 21 Oct 2011 13:30:17 +0900 Subject: [PATCH] Resource paths should not be assumed to be encoded in UTF-8. HTTP/1.1 says nothing about that. Ditz-issue: 8959dadc07db1bd363283dee401073f6e48dc7fa --- Network/HTTP/Lucu/Interaction.hs | 3 +- Network/HTTP/Lucu/RequestReader.hs | 3 +- Network/HTTP/Lucu/Resource.hs | 16 +++---- Network/HTTP/Lucu/Resource/Internal.hs | 3 +- Network/HTTP/Lucu/Resource/Tree.hs | 65 +++++++++++++------------- Network/HTTP/Lucu/StaticFile.hs | 27 +++++++---- Network/HTTP/Lucu/Utils.hs | 6 +-- 7 files changed, 62 insertions(+), 61 deletions(-) diff --git a/Network/HTTP/Lucu/Interaction.hs b/Network/HTTP/Lucu/Interaction.hs index f57a474..86b6dbd 100644 --- a/Network/HTTP/Lucu/Interaction.hs +++ b/Network/HTTP/Lucu/Interaction.hs @@ -20,7 +20,6 @@ import qualified Data.ByteString as Strict import Data.Monoid.Unicode import Data.Sequence (Seq) import qualified Data.Sequence as S -import Data.Text (Text) import Network.Socket import Network.HTTP.Lucu.Config import Network.HTTP.Lucu.HttpVersion @@ -34,7 +33,7 @@ data Interaction = Interaction { , itrLocalPort ∷ !PortNumber , itrRemoteAddr ∷ !SockAddr , itrRemoteCert ∷ !(Maybe X509) - , itrResourcePath ∷ !(Maybe [Text]) + , itrResourcePath ∷ !(Maybe [Strict.ByteString]) , itrRequest ∷ !(Maybe Request) , itrExpectedContinue ∷ !(Maybe Bool) diff --git a/Network/HTTP/Lucu/RequestReader.hs b/Network/HTTP/Lucu/RequestReader.hs index a80ecae..ecaaadb 100644 --- a/Network/HTTP/Lucu/RequestReader.hs +++ b/Network/HTTP/Lucu/RequestReader.hs @@ -26,6 +26,7 @@ import Network.HTTP.Lucu.Postprocess import Network.HTTP.Lucu.Preprocess import Network.HTTP.Lucu.Request import Network.HTTP.Lucu.Response +import Network.HTTP.Lucu.Resource.Internal import Network.HTTP.Lucu.Resource.Tree import Network.Socket import Network.URI @@ -162,7 +163,7 @@ acceptRequestForExistentResource ∷ HandleLike h acceptRequestForExistentResource ctx oldItr input rsrcPath rsrcDef = do let itr = oldItr { itrResourcePath = Just rsrcPath } atomically $ enqueue ctx itr - do _ ← runResource rsrcDef itr + do _ ← spawnResource rsrcDef itr if reqMustHaveBody $ fromJust $ itrRequest itr then observeRequest ctx itr input else diff --git a/Network/HTTP/Lucu/Resource.hs b/Network/HTTP/Lucu/Resource.hs index eed224f..72b7517 100644 --- a/Network/HTTP/Lucu/Resource.hs +++ b/Network/HTTP/Lucu/Resource.hs @@ -205,21 +205,17 @@ getRequestVersion = reqVersion <$> getRequest -- 'Network.HTTP.Lucu.Resource.Tree.ResourceDef' is not greedy. See -- 'getResourcePath'. -- --- Note that the returned path is URI-decoded and then UTF-8 decoded. -getPathInfo ∷ Resource [Text] +-- Note that the returned path components are URI-decoded. +getPathInfo ∷ Resource [Strict.ByteString] getPathInfo = do rsrcPath ← getResourcePath reqPath ← splitPathInfo <$> getRequestURI - -- rsrcPath と reqPath の共通する先頭部分を reqPath か - -- ら全部取り除くと、それは PATH_INFO のやうなものにな - -- る。rsrcPath は全部一致してゐるに決まってゐる(でな - -- ければこの Resource が撰ばれた筈が無い)ので、 - -- rsrcPath の長さの分だけ削除すれば良い。 return $ drop (length rsrcPath) reqPath -- |Assume the query part of request URI as -- application\/x-www-form-urlencoded, and parse it into pairs of -- @(name, formData)@. This function doesn't read the request --- body. Field names are decoded in UTF-8. See 'getForm'. +-- body. Field names are decoded in UTF-8 for an hardly avoidable +-- reason. See 'getForm'. getQueryForm ∷ Resource [(Text, FormData)] getQueryForm = parse' <$> getRequestURI where @@ -563,8 +559,8 @@ getChunks' limit = go limit (∅) -- -- Field names in @multipart\/form-data@ will be precisely decoded in -- accordance with RFC 2231. On the other hand, --- @application\/x-www-form-urlencoded@ says nothing about the --- encoding of field names, so they'll always be decoded in +-- @application\/x-www-form-urlencoded@ says nothing about character +-- encodings for field names, so they'll always be decoded in -- UTF-8. (This could be a bad design, but I can't think of any better -- idea.) getForm ∷ Maybe Int → Resource [(Text, FormData)] diff --git a/Network/HTTP/Lucu/Resource/Internal.hs b/Network/HTTP/Lucu/Resource/Internal.hs index 418a330..1d01a82 100644 --- a/Network/HTTP/Lucu/Resource/Internal.hs +++ b/Network/HTTP/Lucu/Resource/Internal.hs @@ -41,7 +41,6 @@ import qualified Data.ByteString as Strict import Data.List import Data.Maybe import Data.Monoid.Unicode -import Data.Text (Text) import qualified Data.Text as T import Network.HTTP.Lucu.Abortion import Network.HTTP.Lucu.Config @@ -267,7 +266,7 @@ getRequest = (fromJust ∘ itrRequest) <$> getInteraction -- > ... -- > , ... -- > } -getResourcePath ∷ Resource [Text] +getResourcePath ∷ Resource [Strict.ByteString] getResourcePath = (fromJust ∘ itrResourcePath) <$> getInteraction -- |@'getChunk' n@ attempts to read a chunk of request body up to @n@ diff --git a/Network/HTTP/Lucu/Resource/Tree.hs b/Network/HTTP/Lucu/Resource/Tree.hs index 9ab6f66..f3fca16 100644 --- a/Network/HTTP/Lucu/Resource/Tree.hs +++ b/Network/HTTP/Lucu/Resource/Tree.hs @@ -15,28 +15,18 @@ module Network.HTTP.Lucu.Resource.Tree ) where import Control.Arrow -import Control.Applicative -import Data.Ascii (Ascii) -import qualified Data.Ascii as A -import Control.Concurrent -import Control.Concurrent.STM -import Control.Exception +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS import Control.Monad -import Data.Text (Text) -import qualified Data.Text as T +import Data.Foldable import Data.List import qualified Data.Map as M import Data.Map (Map) import Data.Maybe import Data.Monoid.Unicode -import Network.HTTP.Lucu.Abortion -import Network.HTTP.Lucu.Config -import Network.HTTP.Lucu.DefaultPage -import Network.HTTP.Lucu.Headers (fromHeaders) -import Network.HTTP.Lucu.Request -import Network.HTTP.Lucu.Resource -import Network.HTTP.Lucu.Response -import Network.HTTP.Lucu.Interaction +import Data.Sequence (Seq) +import Data.Sequence.Unicode hiding ((∅)) +import Network.HTTP.Lucu.Resource.Internal import Network.HTTP.Lucu.Utils import Network.URI hiding (path) import System.IO @@ -48,12 +38,12 @@ import Prelude.Unicode -- httpd first searches for a resource in the tree, and then calls -- fallback handlers to ask them for a resource. If all of the -- handlers returned 'Nothing', the httpd responds with 404 Not Found. -type FallbackHandler = [Text] → IO (Maybe ResourceDef) +type FallbackHandler = [ByteString] → IO (Maybe ResourceDef) -- |'ResTree' is an opaque structure which is a map from resource path -- to 'ResourceDef'. newtype ResTree = ResTree ResNode -- root だから Map ではない -type ResSubtree = Map Text ResNode +type ResSubtree = Map ByteString ResNode data ResNode = ResNode (Maybe ResourceDef) ResSubtree -- |'mkResTree' converts a list of @(path, def)@ to a 'ResTree' e.g. @@ -64,15 +54,18 @@ data ResNode = ResNode (Maybe ResourceDef) ResSubtree -- ] -- @ -- --- Note that the request path in an incoming HTTP request is always --- treated as an URI-encoded UTF-8 string. -mkResTree ∷ [ ([Text], ResourceDef) ] → ResTree +-- Note that path components are always represented as octet streams +-- in this system. Lucu automatically decodes percent-encoded URIs but +-- has no involvement in character encodings such as UTF-8, since RFC +-- 2616 (HTTP/1.1) says nothing about character encodings to be used +-- in \"http\" and \"https\" URI schemas. +mkResTree ∷ [ ([ByteString], ResourceDef) ] → ResTree mkResTree = processRoot ∘ map (first canonicalisePath) where - canonicalisePath ∷ [Text] → [Text] - canonicalisePath = filter (≢ "") + canonicalisePath ∷ [ByteString] → [ByteString] + canonicalisePath = filter ((¬) ∘ BS.null) - processRoot ∷ [ ([Text], ResourceDef) ] → ResTree + processRoot ∷ [ ([ByteString], ResourceDef) ] → ResTree processRoot list = let (roots, nonRoots) = partition (\(path, _) → null path) list children = processNonRoot nonRoots @@ -87,7 +80,7 @@ mkResTree = processRoot ∘ map (first canonicalisePath) in ResTree (ResNode (Just def) children) - processNonRoot ∷ [ ([Text], ResourceDef) ] → ResSubtree + processNonRoot ∷ [ ([ByteString], ResourceDef) ] → ResSubtree processNonRoot list = let subtree = M.fromList [(name, node name) | name ← childNames] @@ -107,7 +100,10 @@ mkResTree = processRoot ∘ map (first canonicalisePath) in subtree -findResource ∷ ResTree → [FallbackHandler] → URI → IO (Maybe ([Text], ResourceDef)) +findResource ∷ ResTree + → [FallbackHandler] + → URI + → IO (Maybe ([ByteString], ResourceDef)) findResource (ResTree (ResNode rootDefM subtree)) fbs uri = do let path = splitPathInfo uri hasGreedyRoot = maybe False resIsGreedy rootDefM @@ -115,13 +111,16 @@ findResource (ResTree (ResNode rootDefM subtree)) fbs uri do def ← rootDefM return ([], def) else - walkTree subtree path [] + walkTree subtree path (∅) if isJust foundInTree then return foundInTree else fallback path fbs where - walkTree ∷ ResSubtree → [Text] → [Text] → Maybe ([Text], ResourceDef) + walkTree ∷ ResSubtree + → [ByteString] + → Seq ByteString + → Maybe ([ByteString], ResourceDef) walkTree _ [] _ = error "Internal error: should not reach here." @@ -129,17 +128,19 @@ findResource (ResTree (ResNode rootDefM subtree)) fbs uri walkTree tree (name:[]) soFar = do ResNode defM _ ← M.lookup name tree def ← defM - return (soFar ⧺ [name], def) + return (toList $ soFar ⊳ name, def) walkTree tree (x:xs) soFar = do ResNode defM sub ← M.lookup x tree case defM of Just (ResourceDef { resIsGreedy = True }) → do def ← defM - return (soFar ⧺ [x], def) - _ → walkTree sub xs (soFar ⧺ [x]) + return (toList $ soFar ⊳ x, def) + _ → walkTree sub xs (soFar ⊳ x) - fallback ∷ [Text] → [FallbackHandler] → IO (Maybe ([Text], ResourceDef)) + fallback ∷ [ByteString] + → [FallbackHandler] + → IO (Maybe ([ByteString], ResourceDef)) fallback _ [] = return Nothing fallback path (x:xs) = do m ← x path case m of diff --git a/Network/HTTP/Lucu/StaticFile.hs b/Network/HTTP/Lucu/StaticFile.hs index c227205..82bc59b 100644 --- a/Network/HTTP/Lucu/StaticFile.hs +++ b/Network/HTTP/Lucu/StaticFile.hs @@ -17,9 +17,12 @@ import Control.Monad import Control.Monad.Unicode import Control.Monad.Trans import qualified Data.Ascii as A -import qualified Data.ByteString.Lazy.Char8 as B +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Monoid.Unicode +import Data.String import qualified Data.Text as T +import qualified Data.Text.Encoding as T import Data.Time.Clock.POSIX import Network.HTTP.Lucu.Abortion import Network.HTTP.Lucu.Config @@ -27,7 +30,7 @@ import Network.HTTP.Lucu.ETag import Network.HTTP.Lucu.MIMEType import Network.HTTP.Lucu.MIMEType.Guess import Network.HTTP.Lucu.Resource -import Network.HTTP.Lucu.Resource.Tree +import Network.HTTP.Lucu.Resource.Internal import Network.HTTP.Lucu.Response import Prelude.Unicode import System.FilePath @@ -72,7 +75,7 @@ handleStaticFile sendContent path Just mime → setContentType mime when sendContent - $ liftIO (B.readFile path) ≫= putChunk + $ liftIO (LBS.readFile path) ≫= putChunks -- |@'generateETagFromFile' fpath@ generates a strong entity tag from -- a file. The file doesn't necessarily have to be a regular file; it @@ -102,7 +105,8 @@ generateETagFromFile path return $ strongETag tag -- | @'staticDir' dir@ is a 'ResourceDef' which maps all files in --- @dir@ and its subdirectories on the filesystem to the 'ResTree'. +-- @dir@ and its subdirectories on the filesystem to the +-- 'Network.HTTP.Lucu.Resource.Tree.ResTree'. -- -- Note that 'staticDir' currently doesn't have a directory-listing -- capability. Requesting the content of a directory will end up being @@ -115,15 +119,18 @@ staticDir path , resHead = Just $ handleStaticDir False path } +-- TODO: implement directory listing. handleStaticDir ∷ Bool → FilePath → Resource () handleStaticDir sendContent basePath = do extraPath ← getPathInfo securityCheck extraPath - let path = basePath joinPath (map T.unpack extraPath) - + let path = basePath joinPath (map dec8 extraPath) handleStaticFile sendContent path where - securityCheck pathElems - = when (any (≡ "..") pathElems) - $ fail ("security error: " ⧺ show pathElems) --- TODO: implement directory listing. + dec8 ∷ ByteString → String + dec8 = T.unpack ∘ T.decodeUtf8 + +securityCheck ∷ (Eq s, Show s, IsString s, Monad m) ⇒ [s] → m () +securityCheck pathElems + = when (any (≡ "..") pathElems) + $ fail ("security error: " ⧺ show pathElems) diff --git a/Network/HTTP/Lucu/Utils.hs b/Network/HTTP/Lucu/Utils.hs index 5102524..4db7c05 100644 --- a/Network/HTTP/Lucu/Utils.hs +++ b/Network/HTTP/Lucu/Utils.hs @@ -22,8 +22,6 @@ import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as BS import Data.List hiding (last) import Data.Monoid.Unicode -import Data.Text (Text) -import Data.Text.Encoding as T import Network.URI import Prelude hiding (last) import Prelude.Unicode @@ -94,12 +92,12 @@ parseWWWFormURLEncoded src -- |> splitPathInfo "http://example.com/foo/bar" -- > ==> ["foo", "bar"] -splitPathInfo ∷ URI → [Text] +splitPathInfo ∷ URI → [ByteString] splitPathInfo uri = let reqPathStr = uriPath uri reqPath = [unEscapeString x | x ← splitBy (≡ '/') reqPathStr, (¬) (null x)] in - map (T.decodeUtf8 ∘ BS.pack) reqPath + map BS.pack reqPath -- |> show3 5 -- > ==> "005" -- 2.40.0