X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Rakka%2FPage.hs;h=b4c88fcc5f2fef07de28d67825e62b68f6c03112;hb=HEAD;hp=b293b1fb0258445edfec5261687c3996c1893e9a;hpb=b4a3d2cf3854b10d923cb4c546bf1fe32b021a68;p=Rakka.git
diff --git a/Rakka/Page.hs b/Rakka/Page.hs
index b293b1f..b4c88fc 100644
--- a/Rakka/Page.hs
+++ b/Rakka/Page.hs
@@ -1,3 +1,8 @@
+{-# LANGUAGE
+ Arrows
+ , TypeOperators
+ , UnicodeSyntax
+ #-}
module Rakka.Page
( PageName
, Page(..)
@@ -10,58 +15,63 @@ module Rakka.Page
, pageName
, pageUpdateInfo
+ , pageRevision
, encodePageName
, decodePageName
- , entityFileName'
- , defaultFileName
-
, mkPageURI
, mkPageFragmentURI
, mkObjectURI
, mkFragmentURI
, mkAuxiliaryURI
+ , mkFeedURI
, mkRakkaURI
, xmlizePage
, parseXmlizedPage
)
where
-
-import qualified Codec.Binary.Base64 as B64
-import Codec.Binary.UTF8.String
-import Control.Arrow
-import Control.Arrow.ArrowIO
-import Control.Arrow.ArrowList
+import Control.Applicative
+import Control.Arrow
+import Control.Arrow.ArrowIO
+import Control.Arrow.ArrowList
+import Control.Arrow.Unicode
+import qualified Codec.Binary.UTF8.String as UTF8
+import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.ByteString.Lazy as L hiding (ByteString)
+import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
import Data.Char
import Data.Map (Map)
import qualified Data.Map as M
-import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Text.Encoding
import Data.Time
+import qualified Data.Time.W3C as W3C
import Network.HTTP.Lucu hiding (redirect)
import Network.URI hiding (fragment)
+import OpenSSL.EVP.Base64
+import Prelude.Unicode
import Rakka.Utils
-import Rakka.W3CDateTime
import Subversion.Types
import System.FilePath.Posix
-import Text.XML.HXT.Arrow.XmlArrow
-import Text.XML.HXT.Arrow.XmlNodeSet
-import Text.XML.HXT.DOM.TypeDefs
-
-
-type PageName = String
-
-type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
-type LanguageName = String -- i.e. "æ¥æ¬èª"
+import Text.XML.HXT.Arrow.XmlArrow
+import Text.XML.HXT.DOM.TypeDefs
+import Text.XML.HXT.XPath
+type PageName = Text
+type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
+type LanguageName = Text -- i.e. "æ¥æ¬èª"
data Page
= Redirection {
redirName :: !PageName
, redirDest :: !PageName
+ , redirIsLocked :: !Bool
, redirRevision :: RevNum
, redirLastMod :: UTCTime
, redirUpdateInfo :: Maybe UpdateInfo
@@ -70,11 +80,9 @@ data Page
entityName :: !PageName
, entityType :: !MIMEType
, entityLanguage :: !(Maybe LanguageTag)
- , entityFileName :: !(Maybe String)
, entityIsTheme :: !Bool -- text/css 以å¤ã§ã¯ç¡æå³
, entityIsFeed :: !Bool -- text/x-rakka 以å¤ã§ã¯ç¡æå³
, entityIsLocked :: !Bool
- , entityIsBoring :: !Bool
, entityIsBinary :: !Bool
, entityRevision :: RevNum
, entityLastMod :: UTCTime
@@ -85,7 +93,6 @@ data Page
}
deriving (Show, Eq)
-
data UpdateInfo
= UpdateInfo {
uiOldRevision :: !RevNum
@@ -95,85 +102,68 @@ data UpdateInfo
isRedirect :: Page -> Bool
-isRedirect (Redirection _ _ _ _ _) = True
-isRedirect _ = False
+isRedirect (Redirection _ _ _ _ _ _) = True
+isRedirect _ = False
isEntity :: Page -> Bool
-isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _ _ _) = True
-isEntity _ = False
+isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
+isEntity _ = False
pageName :: Page -> PageName
pageName p
| isRedirect p = redirName p
| isEntity p = entityName p
- | otherwise = fail "neither redirection nor entity"
+ | otherwise = error "neither redirection nor entity"
pageUpdateInfo :: Page -> Maybe UpdateInfo
pageUpdateInfo p
| isRedirect p = redirUpdateInfo p
| isEntity p = entityUpdateInfo p
- | otherwise = fail "neither redirection nor entity"
-
-
--- UTF-8 ã« encode ãã¦ãã 0x20 - 0x7E ã®ç¯åãé¤ã㦠URI escape ããã
-encodePageName :: PageName -> FilePath
-encodePageName = escapeURIString isSafeChar . encodeString . fixPageName
- where
- fixPageName :: PageName -> PageName
- fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
-
-
-isSafeChar :: Char -> Bool
-isSafeChar c
- | c == '/' = True
- | isReserved c = False
- | c > ' ' && c <= '~' = True
- | otherwise = False
-
-
--- URI unescape ã㦠UTF-8 ãã decode ããã
-decodePageName :: FilePath -> PageName
-decodePageName = decodeString . unEscapeString
+ | otherwise = error "neither redirection nor entity"
-encodeFragment :: String -> String
-encodeFragment = escapeURIString isSafeChar . encodeString
+pageRevision :: Page -> RevNum
+pageRevision p
+ | isRedirect p = redirRevision p
+ | isEntity p = entityRevision p
+ | otherwise = error "neither redirection nor entity"
-entityFileName' :: Page -> String
-entityFileName' page
- = fromMaybe (defaultFileName (entityType page) (entityName page)) (entityFileName page)
+-- UTF-8 ã« encode ãã¦ãã 0x20 - 0x7E ã®ç¯åãé¤ã㦠URI escape ããã
+encodePageName â· PageName â FilePath
+encodePageName = escapeURIString isSafeChar â UTF8.encodeString â fixPageName â T.unpack
+ where
+ fixPageName â· String â String
+ fixPageName = capitalizeHead â map (\c â if c â¡ ' ' then '_' else c)
+ capitalizeHead â· String â String
+ capitalizeHead [] = (â¥)
+ capitalizeHead (x:xs) = toUpper x : xs
-defaultFileName :: MIMEType -> PageName -> String
-defaultFileName pType pName
- = let baseName = takeFileName pName
- in
- case pType of
- MIMEType "text" "x-rakka" _ -> baseName <.> "rakka"
- MIMEType "text" "css" _ -> baseName <.> "css"
- _ -> baseName
+-- FIXME: use system-filepath
+decodePageName â· FilePath â PageName
+decodePageName = T.pack â UTF8.decodeString â unEscapeString
+encodeFragment â· Text â String
+encodeFragment = escapeURIString isSafeChar â B8.unpack â encodeUtf8
-mkPageURI :: URI -> PageName -> URI
+mkPageURI â· URI â PageName â URI
mkPageURI baseURI name
= baseURI {
- uriPath = foldl (>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
+ uriPath = uriPath baseURI > encodePageName name <.> "html"
}
-
-mkPageFragmentURI :: URI -> PageName -> String -> URI
+mkPageFragmentURI â· URI â PageName â Text â URI
mkPageFragmentURI baseURI name fragment
= baseURI {
- uriPath = foldl (>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
+ uriPath = uriPath baseURI > encodePageName name <.> "html"
, uriFragment = ('#' : encodeFragment fragment)
}
-
-mkFragmentURI :: String -> URI
+mkFragmentURI â· Text â URI
mkFragmentURI fragment
= nullURI {
uriFragment = ('#' : encodeFragment fragment)
@@ -192,6 +182,13 @@ mkAuxiliaryURI baseURI basePath name
}
+mkFeedURI :: URI -> PageName -> URI
+mkFeedURI baseURI name
+ = baseURI {
+ uriPath = uriPath baseURI > encodePageName name <.> "rdf"
+ }
+
+
mkRakkaURI :: PageName -> URI
mkRakkaURI name = URI {
uriScheme = "rakka:"
@@ -206,12 +203,11 @@ mkRakkaURI name = URI {
-- ããã©ã«ãã§ãªãå ´åã®ã¿åå¨
+ revision="112"
lastModified="2000-01-01T00:00:00">
@@ -230,87 +226,107 @@ mkRakkaURI name = URI {
SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
+
+
-}
xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizePage
= proc page
- -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
- ( eelem "/"
- += ( eelem "page"
- += sattr "name" (pageName page)
- += sattr "type" (show $ entityType page)
- += ( case entityLanguage page of
- Just x -> sattr "lang" x
- Nothing -> none
- )
- += ( case entityFileName page of
- Just x -> sattr "fileName" x
- Nothing -> none
- )
- += ( case entityType page of
- MIMEType "text" "css" _
- -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
- MIMEType "text" "x-rakka" _
- -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
- _
- -> none
- )
- += sattr "isLocked" (yesOrNo $ entityIsLocked page)
- += sattr "isBoring" (yesOrNo $ entityIsBoring page)
- += sattr "isBinary" (yesOrNo $ entityIsBinary page)
- += sattr "revision" (show $ entityRevision page)
- += sattr "lastModified" (formatW3CDateTime lastMod)
- += ( case entitySummary page of
- Just s -> eelem "summary" += txt s
- Nothing -> none
- )
- += ( if M.null (entityOtherLang page) then
- none
- else
- selem "otherLang"
- [ eelem "link"
- += sattr "lang" lang
- += sattr "page" name
- | (lang, name) <- M.toList (entityOtherLang page) ]
- )
- += ( if entityIsBinary page then
- ( eelem "binaryData"
- += txt (B64.encode $ L.unpack $ entityContent page)
+ -> if isRedirect page then
+ xmlizeRedirection -< page
+ else
+ xmlizeEntity -< page
+ where
+ xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
+ xmlizeRedirection
+ = proc page
+ -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
+ ( eelem "/"
+ += ( eelem "page"
+ += sattr "name" (T.unpack $ redirName page )
+ += sattr "redirect" (T.unpack $ redirDest page )
+ += sattr "isLocked" (yesOrNo $ redirIsLocked page)
+ += sattr "revision" (show $ redirRevision page)
+ += sattr "lastModified" (W3C.format lastMod)
+ )) -<< ()
+
+ xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
+ xmlizeEntity
+ = proc page
+ -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
+ ( eelem "/"
+ += ( eelem "page"
+ += sattr "name" (T.unpack $ pageName page)
+ += sattr "type" (show $ entityType page)
+ += ( case entityLanguage page of
+ Just x -> sattr "lang" (T.unpack $ CI.foldedCase x)
+ Nothing -> none
)
- else
- ( eelem "textData"
- += txt (decode $ L.unpack $ entityContent page)
+ += ( case entityType page of
+ MIMEType "text" "css" _
+ -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
+ MIMEType "text" "x-rakka" _
+ -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
+ _
+ -> none
)
- )
- )) -<< ()
-
+ += sattr "isLocked" (yesOrNo $ entityIsLocked page)
+ += sattr "isBinary" (yesOrNo $ entityIsBinary page)
+ += sattr "revision" (show $ entityRevision page)
+ += sattr "lastModified" (W3C.format lastMod)
+ += ( case entitySummary page of
+ Just s -> eelem "summary" += txt s
+ Nothing -> none
+ )
+ += ( if M.null (entityOtherLang page) then
+ none
+ else
+ selem "otherLang"
+ [ eelem "link"
+ += sattr "lang" (T.unpack $ CI.foldedCase lang)
+ += sattr "page" (T.unpack name)
+ | (lang, name) â M.toList (entityOtherLang page) ]
+ )
+ += ( if entityIsBinary page then
+ ( eelem "binaryData"
+ += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
+ )
+ else
+ ( eelem "textData"
+ += txt (UTF8.decode $ L.unpack $ entityContent page)
+ )
+ )
+ )) -<< ()
-parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
+parseXmlizedPage â· (ArrowXml (â), ArrowChoice (â)) â (PageName, XmlTree) â Page
parseXmlizedPage
= proc (name, tree)
- -> do updateInfo <- maybeA parseUpdateInfo -< tree
- redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
- case redirect of
- Nothing -> parseEntity -< (name, tree)
- Just dest -> returnA -< (Redirection {
- redirName = name
- , redirDest = dest
- , redirRevision = undefined
- , redirLastMod = undefined
- , redirUpdateInfo = updateInfo
- })
-
+ â do updateInfo â maybeA parseUpdateInfo ⤠tree
+ redirect â maybeA (getXPathTreesInDoc "/page/@redirect/text()" â getText) ⤠tree
+ isLocked â (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" â getText) "no"
+ â parseYesOrNo) ⤠tree
+ case redirect of
+ Nothing â parseEntity ⤠(name, tree)
+ Just dest â returnA ⤠Redirection {
+ redirName = name
+ , redirDest = T.pack dest
+ , redirIsLocked = isLocked
+ , redirRevision = undefined
+ , redirLastMod = undefined
+ , redirUpdateInfo = updateInfo
+ }
parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
parseEntity
= proc (name, tree)
-> do updateInfo <- maybeA parseUpdateInfo -< tree
- mimeType <- (getXPathTreesInDoc "/page/@type/text()" >>> getText
- >>> arr read) -< tree
+ mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
- fileName <- maybeA (getXPathTreesInDoc "/page/@filename/text()" >>> getText) -< tree
isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
@@ -318,8 +334,6 @@ parseEntity
>>> parseYesOrNo) -< tree
isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
- isBoring <- (withDefault (getXPathTreesInDoc "/page/@isBoring/text()" >>> getText) "no"
- >>> parseYesOrNo) -< tree
summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
>>> getText
@@ -336,38 +350,44 @@ parseEntity
let (isBinary, content)
= case (textData, binaryData) of
- (Just text, Nothing ) -> (False, L.pack $ encode text )
- (Nothing , Just binary) -> (True , L.pack $ B64.decode binary)
+ (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
+ (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
_ -> error "one of textData or binaryData is required"
-
- returnA -< Entity {
+ mimeType
+ = if isBinary then
+ if null mimeTypeStr then
+ guessMIMEType content
+ else
+ read mimeTypeStr
+ else
+ read mimeTypeStr
+ returnA ⤠Entity {
entityName = name
, entityType = mimeType
- , entityLanguage = lang
- , entityFileName = fileName
+ , entityLanguage = CI.mk â T.pack <$> lang
, entityIsTheme = isTheme
, entityIsFeed = isFeed
, entityIsLocked = isLocked
- , entityIsBoring = isBoring
, entityIsBinary = isBinary
, entityRevision = undefined
, entityLastMod = undefined
, entitySummary = summary
- , entityOtherLang = M.fromList otherLang
+ , entityOtherLang = M.fromList ((CI.mk â T.pack â T.pack) <$> otherLang)
, entityContent = content
, entityUpdateInfo = updateInfo
}
-
-parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
+parseUpdateInfo â· (ArrowXml (â), ArrowChoice (â)) â XmlTree â UpdateInfo
parseUpdateInfo
= proc tree
- -> do uInfo <- getXPathTreesInDoc "/*/updateInfo" -< tree
- oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
- oldName <- maybeA (getXPathTrees "/move/@from/text()" >>> getText) -< uInfo
- returnA -< UpdateInfo {
- uiOldRevision = oldRev
- , uiOldName = oldName
- }
-
-
\ No newline at end of file
+ -> do uInfo â getXPathTreesInDoc "/page/updateInfo" ⤠tree
+ oldRev â (getAttrValue0 "oldRevision" â arr read) ⤠uInfo
+ oldName â maybeA (getXPathTrees "/updateInfo/move/@from/text()" â getText) ⤠uInfo
+ returnA ⤠UpdateInfo {
+ uiOldRevision = oldRev
+ , uiOldName = T.pack <$> oldName
+ }
+
+dropWhitespace :: String -> String
+{-# INLINE dropWhitespace #-}
+dropWhitespace = filter ((¬) â isSpace)