ditz: close lucu-10

[Lucu.git] / Network / HTTP / Lucu / Utils.hs
diff --git a/Network/HTTP/Lucu/Utils.hs b/Network/HTTP/Lucu/Utils.hs

index 7d6eeeb69531b73bede3d9e573c749c162c5e640..9abaf1e9c511d3e0d2d61bf1008640471f2baa51 100644 (file)
--- a/Network/HTTP/Lucu/Utils.hs
+++ b/Network/HTTP/Lucu/Utils.hs
@@ -1,33 +1,174 @@
+{-# LANGUAGE
+    FlexibleContexts
+  , OverloadedStrings
+  , UnicodeSyntax
+  #-}
+-- |Utility functions used internally in this package.
  module Network.HTTP.Lucu.Utils
-    ( splitBy      -- (a -> Bool) -> [a] -> [[a]]
-    , trim         -- (a -> Bool) -> [a] -> [a]
-    , noCaseEq     -- String -> String -> Bool
-    , isWhiteSpace -- Char -> Bool
+    ( Host
+    , PathSegment
+    , Path
+
+    , splitBy
+    , quoteStr
+    , parseWWWFormURLEncoded
+    , uriHost
+    , uriPathSegments
+    , trim
+
+    , (⊲)
+    , (⊳)
+    , (⋈)
+    , mapM
+
+    , getLastModified
      )
      where
-
+import Control.Applicative hiding (empty)
+import Control.Monad hiding (mapM)
+import Data.Ascii (Ascii, AsciiBuilder)
+import qualified Data.Ascii as A
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BS
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
  import Data.Char
-import Data.List
+import Data.Collections
+import Data.Collections.BaseInstances ()
+import Data.Maybe
+import Data.Monoid.Unicode
+import Data.Ratio
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Time
+import Data.Time.Clock.POSIX
+import Network.URI
+import Prelude hiding (last, mapM, null, reverse)
+import Prelude.Unicode
+import System.Directory
+import System.Time (ClockTime(..))
  
+-- |'Host' represents an IP address or a host name in an URI
+-- authority.
+type Host = CI Text
  
-splitBy :: (a -> Bool) -> [a] -> [[a]]
-splitBy isSeparator src
-    = case break isSeparator src
-      of (last , []      ) -> last  : []
-         (first, sep:rest) -> first : splitBy isSeparator rest
+-- |'PathSegment' represents an URI path segment, split by slashes and
+-- percent-decoded.
+type PathSegment = ByteString
+
+-- |'Path' is a list of URI path segments.
+type Path = [PathSegment]
+
+-- |>>> splitBy (== ':') "ab:c:def"
+-- ["ab", "c", "def"]
+splitBy ∷ (a → Bool) → [a] → [[a]]
+{-# INLINEABLE splitBy #-}
+splitBy isSep src
+    = case break isSep src of
+        (last , []       ) → [last]
+        (first, _sep:rest) → first : splitBy isSep rest
+
+-- |>>> quoteStr "abc"
+-- "\"abc\""
+--
+-- >>> quoteStr "ab\"c"
+-- "\"ab\\\"c\""
+quoteStr ∷ Ascii → AsciiBuilder
+quoteStr str = A.toAsciiBuilder "\"" ⊕
+               go (A.toByteString str) (∅) ⊕
+               A.toAsciiBuilder "\""
+    where
+      go ∷ ByteString → AsciiBuilder → AsciiBuilder
+      go bs ab
+          = case BS.break (≡ '"') bs of
+              (x, y)
+                  | BS.null y
+                      → ab ⊕ b2ab x
+                  | otherwise
+                      → go (BS.tail y)
+                           (ab ⊕ b2ab x ⊕ A.toAsciiBuilder "\\\"")
  
+      b2ab ∷ ByteString → AsciiBuilder
+      b2ab = A.toAsciiBuilder ∘ A.unsafeFromByteString
  
-trim :: (a -> Bool) -> [a] -> [a]
-trim p = trimTail . trimHead
+-- |>>> parseWWWFormURLEncoded "aaa=bbb&ccc=ddd"
+-- [("aaa", "bbb"), ("ccc", "ddd")]
+parseWWWFormURLEncoded ∷ Ascii → [(ByteString, ByteString)]
+parseWWWFormURLEncoded src
+    -- THINKME: We could gain some performance by using attoparsec
+    -- here.
+    | src ≡ ""  = []
+    | otherwise = do pairStr ← splitBy (\ c → c ≡ ';' ∨ c ≡ '&') (A.toString src)
+                     let (key, value) = break (≡ '=') pairStr
+                     return ( unescape key
+                            , unescape $ case value of
+                                           ('=':val) → val
+                                           val       → val
+                            )
      where
-      trimHead = dropWhile p
-      trimTail = reverse . trimHead . reverse
+      unescape ∷ String → ByteString
+      unescape = BS.pack ∘ unEscapeString ∘ (plusToSpace <$>)
  
+      plusToSpace ∷ Char → Char
+      plusToSpace '+' = ' '
+      plusToSpace c   = c
  
-noCaseEq :: String -> String -> Bool
-noCaseEq a b
-    = (map toLower a) == (map toLower b)
+-- |>>> uriHost "http://example.com/foo/bar"
+-- "example.com"
+uriHost ∷ URI → Host
+{-# INLINE uriHost #-}
+uriHost = CI.mk ∘ T.pack ∘ uriRegName ∘ fromJust ∘ uriAuthority
  
+-- |>>> uriPathSegments "http://example.com/foo/bar"
+-- ["foo", "bar"]
+uriPathSegments ∷ URI → Path
+uriPathSegments uri
+    = let reqPathStr = uriPath uri
+          reqPath    = [ unEscapeString x
+                         | x ← splitBy (≡ '/') reqPathStr, (¬) (null x) ]
+      in
+        BS.pack <$> reqPath
  
-isWhiteSpace :: Char -> Bool
-isWhiteSpace = flip elem " \t\r\n"
+-- |>>> trim "  ab c d "
+-- "ab c d"
+trim ∷ String → String
+trim = reverse ∘ f ∘ reverse ∘ f
+    where
+      f = dropWhile isSpace
+
+infixr 5 ⊲
+-- | (&#x22B2;) = ('<|')
+--
+-- U+22B2, NORMAL SUBGROUP OF
+(⊲) ∷ Sequence α a ⇒ a → α → α
+(⊲) = (<|)
+
+infixl 5 ⊳
+-- | (&#x22B3;) = ('|>')
+--
+-- U+22B3, CONTAINS AS NORMAL SUBGROUP
+(⊳) ∷ Sequence α a ⇒ α → a → α
+(⊳) = (|>)
+
+infixr 5 ⋈
+-- | (&#x22C8;) = ('><')
+--
+-- U+22C8, BOWTIE
+(⋈) ∷ Sequence α a ⇒ α → α → α
+(⋈) = (><)
+
+-- |Generalised @mapM@ from any 'Foldable' to 'Unfoldable'. Why isn't
+-- this in the @collections-api@?
+mapM ∷ (Foldable α a, Unfoldable β b, Functor m, Monad m)
+     ⇒ (a → m b) → α → m β
+{-# INLINE mapM #-}
+mapM = flip foldrM empty ∘ (flip ((<$>) ∘ flip insert) ∘)
+
+-- |Get the modification time of a given file.
+getLastModified ∷ FilePath → IO UTCTime
+getLastModified = (clockTimeToUTC <$>) ∘ getModificationTime
+    where
+      clockTimeToUTC ∷ ClockTime → UTCTime
+      clockTimeToUTC (TOD sec picoSec)
+          = posixSecondsToUTCTime ∘ fromRational
+            $ sec % 1 + picoSec % (1000 ⋅ 1000 ⋅ 1000 ⋅ 1000)