X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Rakka%2FPage.hs;h=b4c88fcc5f2fef07de28d67825e62b68f6c03112;hb=45bce2c29948649f74ada71f2fa851bdb812e96c;hp=5647c8e1481d61df62cf49828164e302cbdb3923;hpb=35f9c2ec3c1e81e42737c54f3f1a8230427691c6;p=Rakka.git
diff --git a/Rakka/Page.hs b/Rakka/Page.hs
index 5647c8e..b4c88fc 100644
--- a/Rakka/Page.hs
+++ b/Rakka/Page.hs
@@ -1,40 +1,393 @@
+{-# LANGUAGE
+ Arrows
+ , TypeOperators
+ , UnicodeSyntax
+ #-}
module Rakka.Page
( PageName
, Page(..)
+ , UpdateInfo(..)
+ , LanguageTag
+ , LanguageName
+
+ , isRedirect
+ , isEntity
+
+ , pageName
+ , pageUpdateInfo
+ , pageRevision
+
, encodePageName
+ , decodePageName
+
+ , mkPageURI
+ , mkPageFragmentURI
+ , mkObjectURI
+ , mkFragmentURI
+ , mkAuxiliaryURI
+ , mkFeedURI
+ , mkRakkaURI
+
+ , xmlizePage
+ , parseXmlizedPage
)
where
+import Control.Applicative
+import Control.Arrow
+import Control.Arrow.ArrowIO
+import Control.Arrow.ArrowList
+import Control.Arrow.Unicode
+import qualified Codec.Binary.UTF8.String as UTF8
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Lazy as Lazy (ByteString)
+import qualified Data.ByteString.Lazy as L hiding (ByteString)
+import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
+import Data.Char
+import Data.Map (Map)
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Text.Encoding
+import Data.Time
+import qualified Data.Time.W3C as W3C
+import Network.HTTP.Lucu hiding (redirect)
+import Network.URI hiding (fragment)
+import OpenSSL.EVP.Base64
+import Prelude.Unicode
+import Rakka.Utils
+import Subversion.Types
+import System.FilePath.Posix
+import Text.XML.HXT.Arrow.XmlArrow
+import Text.XML.HXT.DOM.TypeDefs
+import Text.XML.HXT.XPath
+
+type PageName = Text
+type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
+type LanguageName = Text -- i.e. "æ¥æ¬èª"
+
+data Page
+ = Redirection {
+ redirName :: !PageName
+ , redirDest :: !PageName
+ , redirIsLocked :: !Bool
+ , redirRevision :: RevNum
+ , redirLastMod :: UTCTime
+ , redirUpdateInfo :: Maybe UpdateInfo
+ }
+ | Entity {
+ entityName :: !PageName
+ , entityType :: !MIMEType
+ , entityLanguage :: !(Maybe LanguageTag)
+ , entityIsTheme :: !Bool -- text/css 以å¤ã§ã¯ç¡æå³
+ , entityIsFeed :: !Bool -- text/x-rakka 以å¤ã§ã¯ç¡æå³
+ , entityIsLocked :: !Bool
+ , entityIsBinary :: !Bool
+ , entityRevision :: RevNum
+ , entityLastMod :: UTCTime
+ , entitySummary :: !(Maybe String)
+ , entityOtherLang :: !(Map LanguageTag PageName)
+ , entityContent :: !Lazy.ByteString
+ , entityUpdateInfo :: Maybe UpdateInfo
+ }
+ deriving (Show, Eq)
-import Data.ByteString.Base (LazyByteString)
-import qualified Data.ByteString.Char8 as C8
-import Data.Encoding
-import Data.Encoding.UTF8
-import Network.HTTP.Lucu
-import Network.URI
+data UpdateInfo
+ = UpdateInfo {
+ uiOldRevision :: !RevNum
+ , uiOldName :: !(Maybe PageName)
+ }
+ deriving (Show, Eq)
-type PageName = String
+isRedirect :: Page -> Bool
+isRedirect (Redirection _ _ _ _ _ _) = True
+isRedirect _ = False
-data Page
- = Redirect PageName
- | Page {
- pageType :: MIMEType
- , pageIsTheme :: Bool -- text/css 以å¤ã§ã¯ç¡æå³
- , pageIsFeed :: Bool -- text/x-rakka 以å¤ã§ã¯ç¡æå³
- , pageIsLocked :: Bool
- , pageIsBoring :: Bool
- , pageSummary :: Maybe String
- , pageOtherLang :: [(String, PageName)]
- , pageContent :: LazyByteString
- }
+isEntity :: Page -> Bool
+isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
+isEntity _ = False
+
+
+pageName :: Page -> PageName
+pageName p
+ | isRedirect p = redirName p
+ | isEntity p = entityName p
+ | otherwise = error "neither redirection nor entity"
+
+
+pageUpdateInfo :: Page -> Maybe UpdateInfo
+pageUpdateInfo p
+ | isRedirect p = redirUpdateInfo p
+ | isEntity p = entityUpdateInfo p
+ | otherwise = error "neither redirection nor entity"
+
+
+pageRevision :: Page -> RevNum
+pageRevision p
+ | isRedirect p = redirRevision p
+ | isEntity p = entityRevision p
+ | otherwise = error "neither redirection nor entity"
-- UTF-8 ã« encode ãã¦ãã 0x20 - 0x7E ã®ç¯åãé¤ã㦠URI escape ããã
-encodePageName :: PageName -> FilePath
-encodePageName = escapeURIString shouldEscape . C8.unpack . encode UTF8
+encodePageName â· PageName â FilePath
+encodePageName = escapeURIString isSafeChar â UTF8.encodeString â fixPageName â T.unpack
+ where
+ fixPageName â· String â String
+ fixPageName = capitalizeHead â map (\c â if c â¡ ' ' then '_' else c)
+
+ capitalizeHead â· String â String
+ capitalizeHead [] = (â¥)
+ capitalizeHead (x:xs) = toUpper x : xs
+
+-- FIXME: use system-filepath
+decodePageName â· FilePath â PageName
+decodePageName = T.pack â UTF8.decodeString â unEscapeString
+
+encodeFragment â· Text â String
+encodeFragment = escapeURIString isSafeChar â B8.unpack â encodeUtf8
+
+mkPageURI â· URI â PageName â URI
+mkPageURI baseURI name
+ = baseURI {
+ uriPath = uriPath baseURI > encodePageName name <.> "html"
+ }
+
+mkPageFragmentURI â· URI â PageName â Text â URI
+mkPageFragmentURI baseURI name fragment
+ = baseURI {
+ uriPath = uriPath baseURI > encodePageName name <.> "html"
+ , uriFragment = ('#' : encodeFragment fragment)
+ }
+
+mkFragmentURI â· Text â URI
+mkFragmentURI fragment
+ = nullURI {
+ uriFragment = ('#' : encodeFragment fragment)
+ }
+
+
+mkObjectURI :: URI -> PageName -> URI
+mkObjectURI baseURI name
+ = mkAuxiliaryURI baseURI ["object"] name
+
+
+mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
+mkAuxiliaryURI baseURI basePath name
+ = baseURI {
+ uriPath = foldl (>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
+ }
+
+
+mkFeedURI :: URI -> PageName -> URI
+mkFeedURI baseURI name
+ = baseURI {
+ uriPath = uriPath baseURI > encodePageName name <.> "rdf"
+ }
+
+
+mkRakkaURI :: PageName -> URI
+mkRakkaURI name = URI {
+ uriScheme = "rakka:"
+ , uriAuthority = Nothing
+ , uriPath = encodePageName name
+ , uriQuery = ""
+ , uriFragment = ""
+ }
+
+
+{-
+
+
+
+ blah blah...
+ -- åå¨ããªãå ´åããã
+
+ -- åå¨ããªãå ´åããã
+
+
+
+
+
+ blah blah...
+
+
+ SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
+
+
+
+
+-}
+xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
+xmlizePage
+ = proc page
+ -> if isRedirect page then
+ xmlizeRedirection -< page
+ else
+ xmlizeEntity -< page
where
- shouldEscape :: Char -> Bool
- shouldEscape c
- | c >= ' ' && c <= '~' = False
- | otherwise = True
+ xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
+ xmlizeRedirection
+ = proc page
+ -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
+ ( eelem "/"
+ += ( eelem "page"
+ += sattr "name" (T.unpack $ redirName page )
+ += sattr "redirect" (T.unpack $ redirDest page )
+ += sattr "isLocked" (yesOrNo $ redirIsLocked page)
+ += sattr "revision" (show $ redirRevision page)
+ += sattr "lastModified" (W3C.format lastMod)
+ )) -<< ()
+
+ xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
+ xmlizeEntity
+ = proc page
+ -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
+ ( eelem "/"
+ += ( eelem "page"
+ += sattr "name" (T.unpack $ pageName page)
+ += sattr "type" (show $ entityType page)
+ += ( case entityLanguage page of
+ Just x -> sattr "lang" (T.unpack $ CI.foldedCase x)
+ Nothing -> none
+ )
+ += ( case entityType page of
+ MIMEType "text" "css" _
+ -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
+ MIMEType "text" "x-rakka" _
+ -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
+ _
+ -> none
+ )
+ += sattr "isLocked" (yesOrNo $ entityIsLocked page)
+ += sattr "isBinary" (yesOrNo $ entityIsBinary page)
+ += sattr "revision" (show $ entityRevision page)
+ += sattr "lastModified" (W3C.format lastMod)
+ += ( case entitySummary page of
+ Just s -> eelem "summary" += txt s
+ Nothing -> none
+ )
+ += ( if M.null (entityOtherLang page) then
+ none
+ else
+ selem "otherLang"
+ [ eelem "link"
+ += sattr "lang" (T.unpack $ CI.foldedCase lang)
+ += sattr "page" (T.unpack name)
+ | (lang, name) â M.toList (entityOtherLang page) ]
+ )
+ += ( if entityIsBinary page then
+ ( eelem "binaryData"
+ += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
+ )
+ else
+ ( eelem "textData"
+ += txt (UTF8.decode $ L.unpack $ entityContent page)
+ )
+ )
+ )) -<< ()
+
+parseXmlizedPage â· (ArrowXml (â), ArrowChoice (â)) â (PageName, XmlTree) â Page
+parseXmlizedPage
+ = proc (name, tree)
+ â do updateInfo â maybeA parseUpdateInfo ⤠tree
+ redirect â maybeA (getXPathTreesInDoc "/page/@redirect/text()" â getText) ⤠tree
+ isLocked â (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" â getText) "no"
+ â parseYesOrNo) ⤠tree
+ case redirect of
+ Nothing â parseEntity ⤠(name, tree)
+ Just dest â returnA ⤠Redirection {
+ redirName = name
+ , redirDest = T.pack dest
+ , redirIsLocked = isLocked
+ , redirRevision = undefined
+ , redirLastMod = undefined
+ , redirUpdateInfo = updateInfo
+ }
+
+parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
+parseEntity
+ = proc (name, tree)
+ -> do updateInfo <- maybeA parseUpdateInfo -< tree
+
+ mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
+
+ lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
+
+ isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+ isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+ isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+
+ summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
+ >>> getText
+ >>> deleteIfEmpty)) -< tree
+
+ otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
+ >>>
+ (getAttrValue0 "lang"
+ &&&
+ getAttrValue0 "page")) -< tree
+
+ textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
+ binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
+
+ let (isBinary, content)
+ = case (textData, binaryData) of
+ (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
+ (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
+ _ -> error "one of textData or binaryData is required"
+ mimeType
+ = if isBinary then
+ if null mimeTypeStr then
+ guessMIMEType content
+ else
+ read mimeTypeStr
+ else
+ read mimeTypeStr
+ returnA ⤠Entity {
+ entityName = name
+ , entityType = mimeType
+ , entityLanguage = CI.mk â T.pack <$> lang
+ , entityIsTheme = isTheme
+ , entityIsFeed = isFeed
+ , entityIsLocked = isLocked
+ , entityIsBinary = isBinary
+ , entityRevision = undefined
+ , entityLastMod = undefined
+ , entitySummary = summary
+ , entityOtherLang = M.fromList ((CI.mk â T.pack â T.pack) <$> otherLang)
+ , entityContent = content
+ , entityUpdateInfo = updateInfo
+ }
+
+parseUpdateInfo â· (ArrowXml (â), ArrowChoice (â)) â XmlTree â UpdateInfo
+parseUpdateInfo
+ = proc tree
+ -> do uInfo â getXPathTreesInDoc "/page/updateInfo" ⤠tree
+ oldRev â (getAttrValue0 "oldRevision" â arr read) ⤠uInfo
+ oldName â maybeA (getXPathTrees "/updateInfo/move/@from/text()" â getText) ⤠uInfo
+ returnA ⤠UpdateInfo {
+ uiOldRevision = oldRev
+ , uiOldName = T.pack <$> oldName
+ }
+
+dropWhitespace :: String -> String
+{-# INLINE dropWhitespace #-}
+dropWhitespace = filter ((¬) â isSpace)