X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FUtils.hs;h=297ea3a762ef64992466469a3845053ffb86d7af;hp=6b749a80c8aefb448be43b06908961713ba428c1;hb=6680828c79aff38431704075c339e043b577589e;hpb=3d017dd65ddede9a11c5b7a34a91e04340e67bc4 diff --git a/Network/HTTP/Lucu/Utils.hs b/Network/HTTP/Lucu/Utils.hs index 6b749a8..297ea3a 100644 --- a/Network/HTTP/Lucu/Utils.hs +++ b/Network/HTTP/Lucu/Utils.hs @@ -1,75 +1,168 @@ --- |Utility functions used internally in the Lucu httpd. These --- functions may be useful too for something else. +{-# LANGUAGE + FlexibleContexts + , OverloadedStrings + , UnicodeSyntax + #-} +-- |Utility functions used internally in this package. module Network.HTTP.Lucu.Utils - ( splitBy - , joinWith - , trim - , isWhiteSpace + ( Host + , PathSegment + , Path + + , splitBy , quoteStr , parseWWWFormURLEncoded + , uriHost + , uriPathSegments + , trim + + , (⊲) + , (⊳) + , (⋈) + , mapM + + , getLastModified ) where - -import Data.List hiding (last) +import Control.Applicative hiding (empty) +import Control.Monad hiding (mapM) +import Data.Ascii (Ascii, AsciiBuilder) +import qualified Data.Ascii as A +import Data.ByteString (ByteString) +import qualified Data.ByteString.Char8 as BS +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI +import Data.Char +import Data.Collections +import Data.Collections.BaseInstances () +import Data.Convertible.Base +import Data.Convertible.Instances.Time () +import Data.Maybe +import Data.Monoid.Unicode +import Data.Text (Text) +import qualified Data.Text as T +import Data.Time import Network.URI -import Prelude hiding (last) +import Prelude hiding (last, mapM, null, reverse) +import Prelude.Unicode +import System.Directory + +-- |'Host' represents an IP address or a host name in an URI +-- authority. +type Host = CI Text --- |> splitBy (== ':') "ab:c:def" --- > ==> ["ab", "c", "def"] -splitBy :: (a -> Bool) -> [a] -> [[a]] +-- |'PathSegment' represents an URI path segment, split by slashes and +-- percent-decoded. +type PathSegment = ByteString + +-- |'Path' is a list of URI path segments. +type Path = [PathSegment] + +-- |>>> splitBy (== ':') "ab:c:def" +-- ["ab", "c", "def"] +splitBy ∷ (a → Bool) → [a] → [[a]] +{-# INLINEABLE splitBy #-} splitBy isSep src - = case break isSep src - of (last , [] ) -> last : [] - (first, _sep:rest) -> first : splitBy isSep rest - --- |> joinWith ":" ["ab", "c", "def"] --- > ==> "ab:c:def" -joinWith :: [a] -> [[a]] -> [a] -joinWith separator xs - = separator `seq` xs `seq` - foldr (++) [] $! intersperse separator xs - --- |> trim (== '_') "__ab_c__def___" --- > ==> "ab_c__def" -trim :: (a -> Bool) -> [a] -> [a] -trim p = p `seq` trimTail . trimHead - where - trimHead = dropWhile p - trimTail = reverse . trimHead . reverse - --- |@'isWhiteSpace' c@ is 'Prelude.True' iff c is one of SP, HT, CR --- and LF. -isWhiteSpace :: Char -> Bool -isWhiteSpace ' ' = True -isWhiteSpace '\t' = True -isWhiteSpace '\r' = True -isWhiteSpace '\n' = True -isWhiteSpace _ = False -{-# INLINE isWhiteSpace #-} - --- |> quoteStr "abc" --- > ==> "\"abc\"" + = case break isSep src of + (last , [] ) → [last] + (first, _sep:rest) → first : splitBy isSep rest + +-- |>>> quoteStr "abc" +-- "\"abc\"" -- --- > quoteStr "ab\"c" --- > ==> "\"ab\\\"c\"" -quoteStr :: String -> String -quoteStr str = str `seq` - foldr (++) "" (["\""] ++ map quote str ++ ["\""]) +-- >>> quoteStr "ab\"c" +-- "\"ab\\\"c\"" +quoteStr ∷ Ascii → AsciiBuilder +quoteStr str = A.toAsciiBuilder "\"" ⊕ + go (A.toByteString str) (∅) ⊕ + A.toAsciiBuilder "\"" where - quote :: Char -> String - quote '"' = "\\\"" - quote c = [c] + go ∷ ByteString → AsciiBuilder → AsciiBuilder + go bs ab + = case BS.break (≡ '"') bs of + (x, y) + | BS.null y + → ab ⊕ b2ab x + | otherwise + → go (BS.tail y) + (ab ⊕ b2ab x ⊕ A.toAsciiBuilder "\\\"") + b2ab ∷ ByteString → AsciiBuilder + b2ab = A.toAsciiBuilder ∘ A.unsafeFromByteString --- |> parseWWWFormURLEncoded "aaa=bbb&ccc=ddd" --- > ==> [("aaa", "bbb"), ("ccc", "ddd")] -parseWWWFormURLEncoded :: String -> [(String, String)] +-- |>>> parseWWWFormURLEncoded "aaa=bbb&ccc=ddd" +-- [("aaa", "bbb"), ("ccc", "ddd")] +parseWWWFormURLEncoded ∷ Ascii → [(ByteString, ByteString)] parseWWWFormURLEncoded src - | src == "" = [] - | otherwise = do pairStr <- splitBy (\ c -> c == ';' || c == '&') src - let (key, value) = break (== '=') pairStr - return ( unEscapeString key - , unEscapeString $ case value of - ('=':val) -> val - val -> val + -- THINKME: We could gain some performance by using attoparsec + -- here. + | src ≡ "" = [] + | otherwise = do pairStr ← splitBy (\ c → c ≡ ';' ∨ c ≡ '&') (A.toString src) + let (key, value) = break (≡ '=') pairStr + return ( unescape key + , unescape $ case value of + ('=':val) → val + val → val ) + where + unescape ∷ String → ByteString + unescape = BS.pack ∘ unEscapeString ∘ (plusToSpace <$>) + + plusToSpace ∷ Char → Char + plusToSpace '+' = ' ' + plusToSpace c = c + +-- |>>> uriHost "http://example.com/foo/bar" +-- "example.com" +uriHost ∷ URI → Host +{-# INLINE uriHost #-} +uriHost = CI.mk ∘ T.pack ∘ uriRegName ∘ fromJust ∘ uriAuthority + +-- |>>> uriPathSegments "http://example.com/foo/bar" +-- ["foo", "bar"] +uriPathSegments ∷ URI → Path +uriPathSegments uri + = let reqPathStr = uriPath uri + reqPath = [ unEscapeString x + | x ← splitBy (≡ '/') reqPathStr, (¬) (null x) ] + in + BS.pack <$> reqPath + +-- |>>> trim " ab c d " +-- "ab c d" +trim ∷ String → String +trim = reverse ∘ f ∘ reverse ∘ f + where + f = dropWhile isSpace + +infixr 5 ⊲ +-- | (⊲) = ('<|') +-- +-- U+22B2, NORMAL SUBGROUP OF +(⊲) ∷ Sequence α a ⇒ a → α → α +(⊲) = (<|) + +infixl 5 ⊳ +-- | (⊳) = ('|>') +-- +-- U+22B3, CONTAINS AS NORMAL SUBGROUP +(⊳) ∷ Sequence α a ⇒ α → a → α +(⊳) = (|>) + +infixr 5 ⋈ +-- | (⋈) = ('><') +-- +-- U+22C8, BOWTIE +(⋈) ∷ Sequence α a ⇒ α → α → α +(⋈) = (><) + +-- |Generalised @mapM@ from any 'Foldable' to 'Unfoldable'. Why isn't +-- this in the @collections-api@? +mapM ∷ (Foldable α a, Unfoldable β b, Functor m, Monad m) + ⇒ (a → m b) → α → m β +{-# INLINE mapM #-} +mapM = flip foldrM empty ∘ (flip ((<$>) ∘ flip insert) ∘) + +-- |Get the modification time of a given file. +getLastModified ∷ FilePath → IO UTCTime +getLastModified = (cs <$>) ∘ getModificationTime