StatusCode: modified again

[Lucu.git] / Network / HTTP / Lucu / Utils.hs
diff --git a/Network/HTTP/Lucu/Utils.hs b/Network/HTTP/Lucu/Utils.hs

index d7ace3f8309aab60c21f2c8c6c4362189a05bc73..7537eafcffacf7d8edf755e2bb8fea30cd636547 100644 (file)
--- a/Network/HTTP/Lucu/Utils.hs
+++ b/Network/HTTP/Lucu/Utils.hs
@@ -1,75 +1,169 @@
--- |Utility functions used internally in the Lucu httpd. These
--- functions may be useful too for something else.
+{-# LANGUAGE
+    FlexibleContexts
+  , OverloadedStrings
+  , UnicodeSyntax
+  #-}
+-- |Utility functions used internally in this package.
  module Network.HTTP.Lucu.Utils
-    ( splitBy
-    , joinWith
-    , trim
-    , noCaseEq
-    , isWhiteSpace
+    ( Host
+    , PathSegment
+    , Path
+
+    , splitBy
      , quoteStr
      , parseWWWFormURLEncoded
+    , uriHost
+    , uriPathSegments
+    , trim
+
+    , (⊲)
+    , (⊳)
+    , (⋈)
+    , mapM
+
+    , getLastModified
      )
      where
-
-import Control.Monad.Trans
+import Control.Applicative hiding (empty)
+import Control.Monad hiding (mapM)
+import Data.Ascii (Ascii, AsciiBuilder)
+import qualified Data.Ascii as A
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BS
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
  import Data.Char
-import Data.List
-import Foreign
-import Foreign.C
+import Data.Collections
+import Data.Collections.BaseInstances ()
+import Data.Convertible.Base
+import Data.Convertible.Instances.Ascii ()
+import Data.Convertible.Instances.Text ()
+import Data.Convertible.Instances.Time ()
+import Data.Maybe
+import Data.Monoid.Unicode
+import Data.Text (Text)
+import Data.Time
  import Network.URI
+import Prelude hiding (last, mapM, null, reverse)
+import Prelude.Unicode
+import System.Directory
  
--- |> splitBy (== ':') "ab:c:def"
---  > ==> ["ab", "c", "def"]
-splitBy :: (a -> Bool) -> [a] -> [[a]]
-splitBy isSeparator src
-    = case break isSeparator src
-      of (last , []      ) -> last  : []
-         (first, sep:rest) -> first : splitBy isSeparator rest
-
--- |> joinWith ':' ["ab", "c", "def"]
---  > ==> "ab:c:def"
-joinWith :: [a] -> [[a]] -> [a]
-joinWith separator xs
-    = foldr (++) [] $ intersperse separator xs
-
--- |> trim (== '_') "__ab_c__def___"
---  > ==> "ab_c__def"
-trim :: (a -> Bool) -> [a] -> [a]
-trim p = trimTail . trimHead
-    where
-      trimHead = dropWhile p
-      trimTail = reverse . trimHead . reverse
+-- |'Host' represents an IP address or a host name in an URI
+-- authority.
+type Host = CI Text
+
+-- |'PathSegment' represents an URI path segment, split by slashes and
+-- percent-decoded.
+type PathSegment = ByteString
  
--- |@'noCaseEq' a b@ is equivalent to @(map toLower a) == (map toLower
--- b)@
-noCaseEq :: String -> String -> Bool
-noCaseEq a b
-    = (map toLower a) == (map toLower b)
+-- |'Path' is a list of URI path segments.
+type Path = [PathSegment]
  
--- |@'isWhiteSpace' c@ is True iff c is one of SP, HT, CR and LF.
-isWhiteSpace :: Char -> Bool
-isWhiteSpace = flip elem " \t\r\n"
+-- |>>> splitBy (== ':') "ab:c:def"
+-- ["ab", "c", "def"]
+splitBy ∷ (a → Bool) → [a] → [[a]]
+{-# INLINEABLE splitBy #-}
+splitBy isSep src
+    = case break isSep src of
+        (last , []       ) → [last]
+        (first, _sep:rest) → first : splitBy isSep rest
  
--- |> quoteStr "abc"
---  > ==> "\"abc\""
+-- |>>> quoteStr "abc"
+-- "\"abc\""
  --
---  > quoteStr "ab\"c"
---  > ==> "\"ab\\\"c\""
-quoteStr :: String -> String
-quoteStr str = foldr (++) "" (["\""] ++ map quote str ++ ["\""])
+-- >>> quoteStr "ab\"c"
+-- "\"ab\\\"c\""
+quoteStr ∷ Ascii → AsciiBuilder
+quoteStr str = cs ("\"" ∷ Ascii) ⊕
+               go (cs str) (∅)   ⊕
+               cs ("\"" ∷ Ascii)
      where
-      quote :: Char -> String
-      quote '"' = "\\\""
-      quote c   = [c]
+      go ∷ ByteString → AsciiBuilder → AsciiBuilder
+      go bs ab
+          = case BS.break (≡ '"') bs of
+              (x, y)
+                  | BS.null y
+                      → ab ⊕ b2ab x
+                  | otherwise
+                      → go (BS.tail y)
+                           (ab ⊕ b2ab x ⊕ cs ("\\\"" ∷ Ascii))
  
+      b2ab ∷ ByteString → AsciiBuilder
+      b2ab = cs ∘ A.unsafeFromByteString
  
--- |> parseWWWFormURLEncoded "aaa=bbb&ccc=ddd"
---  > ==> [("aaa", "bbb"), ("ccc", "ddd")]
-parseWWWFormURLEncoded :: String -> [(String, String)]
+-- |>>> parseWWWFormURLEncoded "aaa=bbb&ccc=ddd"
+-- [("aaa", "bbb"), ("ccc", "ddd")]
+parseWWWFormURLEncoded ∷ Ascii → [(ByteString, ByteString)]
  parseWWWFormURLEncoded src
-    | src == "" = []
-    | otherwise = do pairStr <- splitBy (\ c -> c == ';' || c == '&') src
-                     let pair = break (== '=') pairStr
-                     return ( unEscapeString $ fst pair
-                            , unEscapeString $ snd pair
+    -- THINKME: We could gain some performance by using attoparsec
+    -- here.
+    | src ≡ ""  = []
+    | otherwise = do pairStr ← splitBy (\ c → c ≡ ';' ∨ c ≡ '&') (cs src)
+                     let (key, value) = break (≡ '=') pairStr
+                     return ( unescape key
+                            , unescape $ case value of
+                                           ('=':val) → val
+                                           val       → val
                              )
+    where
+      unescape ∷ String → ByteString
+      unescape = BS.pack ∘ unEscapeString ∘ (plusToSpace <$>)
+
+      plusToSpace ∷ Char → Char
+      plusToSpace '+' = ' '
+      plusToSpace c   = c
+
+-- |>>> uriHost "http://example.com/foo/bar"
+-- "example.com"
+uriHost ∷ URI → Host
+{-# INLINE uriHost #-}
+uriHost = CI.mk ∘ cs ∘ uriRegName ∘ fromJust ∘ uriAuthority
+
+-- |>>> uriPathSegments "http://example.com/foo/bar"
+-- ["foo", "bar"]
+uriPathSegments ∷ URI → Path
+uriPathSegments uri
+    = let reqPathStr = uriPath uri
+          reqPath    = [ unEscapeString x
+                         | x ← splitBy (≡ '/') reqPathStr, (¬) (null x) ]
+      in
+        BS.pack <$> reqPath
+
+-- |>>> trim "  ab c d "
+-- "ab c d"
+trim ∷ String → String
+trim = reverse ∘ f ∘ reverse ∘ f
+    where
+      f = dropWhile isSpace
+
+infixr 5 ⊲
+-- | (&#x22B2;) = ('<|')
+--
+-- U+22B2, NORMAL SUBGROUP OF
+(⊲) ∷ Sequence α a ⇒ a → α → α
+(⊲) = (<|)
+
+infixl 5 ⊳
+-- | (&#x22B3;) = ('|>')
+--
+-- U+22B3, CONTAINS AS NORMAL SUBGROUP
+(⊳) ∷ Sequence α a ⇒ α → a → α
+(⊳) = (|>)
+
+infixr 5 ⋈
+-- | (&#x22C8;) = ('><')
+--
+-- U+22C8, BOWTIE
+(⋈) ∷ Sequence α a ⇒ α → α → α
+(⋈) = (><)
+
+-- |Generalised @mapM@ from any 'Foldable' to 'Unfoldable'. Why isn't
+-- this in the @collections-api@?
+mapM ∷ (Foldable α a, Unfoldable β b, Functor m, Monad m)
+     ⇒ (a → m b) → α → m β
+{-# INLINE mapM #-}
+mapM = flip foldrM empty ∘ (flip ((<$>) ∘ flip insert) ∘)
+
+-- |Get the modification time of a given file.
+getLastModified ∷ FilePath → IO UTCTime
+getLastModified = (cs <$>) ∘ getModificationTime