X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Rakka.git;a=blobdiff_plain;f=Rakka%2FPage.hs;h=b4c88fcc5f2fef07de28d67825e62b68f6c03112;hp=2e3ea45cae6250310d6d88191e2c4e4208b957d8;hb=HEAD;hpb=03585f9c5773f6c0b59497f4f563909576c402b5 diff --git a/Rakka/Page.hs b/Rakka/Page.hs index 2e3ea45..b4c88fc 100644 --- a/Rakka/Page.hs +++ b/Rakka/Page.hs @@ -1,67 +1,393 @@ +{-# LANGUAGE + Arrows + , TypeOperators + , UnicodeSyntax + #-} module Rakka.Page ( PageName , Page(..) + , UpdateInfo(..) + , LanguageTag + , LanguageName + + , isRedirect + , isEntity + + , pageName + , pageUpdateInfo + , pageRevision + , encodePageName , decodePageName + , mkPageURI + , mkPageFragmentURI + , mkObjectURI + , mkFragmentURI + , mkAuxiliaryURI + , mkFeedURI + , mkRakkaURI + + , xmlizePage + , parseXmlizedPage ) where - -import Data.ByteString.Base (LazyByteString) -import qualified Data.ByteString.Char8 as C8 -import Data.Encoding -import Data.Encoding.UTF8 -import Network.HTTP.Lucu -import Network.URI +import Control.Applicative +import Control.Arrow +import Control.Arrow.ArrowIO +import Control.Arrow.ArrowList +import Control.Arrow.Unicode +import qualified Codec.Binary.UTF8.String as UTF8 +import qualified Data.ByteString.Char8 as B8 +import qualified Data.ByteString.Lazy as Lazy (ByteString) +import qualified Data.ByteString.Lazy as L hiding (ByteString) +import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString) +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI +import Data.Char +import Data.Map (Map) +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding +import Data.Time +import qualified Data.Time.W3C as W3C +import Network.HTTP.Lucu hiding (redirect) +import Network.URI hiding (fragment) +import OpenSSL.EVP.Base64 +import Prelude.Unicode +import Rakka.Utils import Subversion.Types -import System.Time - - -type PageName = String +import System.FilePath.Posix +import Text.XML.HXT.Arrow.XmlArrow +import Text.XML.HXT.DOM.TypeDefs +import Text.XML.HXT.XPath +type PageName = Text +type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt +type LanguageName = Text -- i.e. "日本語" data Page = Redirection { - redirName :: PageName - , redirDest :: PageName - , redirRevision :: Maybe RevNum - , redirLastMod :: CalendarTime + redirName :: !PageName + , redirDest :: !PageName + , redirIsLocked :: !Bool + , redirRevision :: RevNum + , redirLastMod :: UTCTime + , redirUpdateInfo :: Maybe UpdateInfo } | Entity { - pageName :: PageName - , pageType :: MIMEType - , pageIsTheme :: Bool -- text/css 以外では無意味 - , pageIsFeed :: Bool -- text/x-rakka 以外では無意味 - , pageIsLocked :: Bool - , pageIsBoring :: Bool - , pageRevision :: Maybe RevNum - , pageLastMod :: CalendarTime - , pageSummary :: Maybe String - , pageOtherLang :: [(String, PageName)] - , pageContent :: LazyByteString + entityName :: !PageName + , entityType :: !MIMEType + , entityLanguage :: !(Maybe LanguageTag) + , entityIsTheme :: !Bool -- text/css 以外では無意味 + , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味 + , entityIsLocked :: !Bool + , entityIsBinary :: !Bool + , entityRevision :: RevNum + , entityLastMod :: UTCTime + , entitySummary :: !(Maybe String) + , entityOtherLang :: !(Map LanguageTag PageName) + , entityContent :: !Lazy.ByteString + , entityUpdateInfo :: Maybe UpdateInfo + } + deriving (Show, Eq) + +data UpdateInfo + = UpdateInfo { + uiOldRevision :: !RevNum + , uiOldName :: !(Maybe PageName) } + deriving (Show, Eq) + + +isRedirect :: Page -> Bool +isRedirect (Redirection _ _ _ _ _ _) = True +isRedirect _ = False + + +isEntity :: Page -> Bool +isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True +isEntity _ = False + + +pageName :: Page -> PageName +pageName p + | isRedirect p = redirName p + | isEntity p = entityName p + | otherwise = error "neither redirection nor entity" + + +pageUpdateInfo :: Page -> Maybe UpdateInfo +pageUpdateInfo p + | isRedirect p = redirUpdateInfo p + | isEntity p = entityUpdateInfo p + | otherwise = error "neither redirection nor entity" + + +pageRevision :: Page -> RevNum +pageRevision p + | isRedirect p = redirRevision p + | isEntity p = entityRevision p + | otherwise = error "neither redirection nor entity" -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 -encodePageName :: PageName -> FilePath -encodePageName = escapeURIString shouldEscape . C8.unpack . encode UTF8 +encodePageName ∷ PageName → FilePath +encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack where - shouldEscape :: Char -> Bool - shouldEscape c - | c >= ' ' && c <= '~' = False - | otherwise = True + fixPageName ∷ String → String + fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c) + capitalizeHead ∷ String → String + capitalizeHead [] = (⊥) + capitalizeHead (x:xs) = toUpper x : xs --- URI unescape して UTF-8 から decode する。 -decodePageName :: FilePath -> PageName -decodePageName = decode UTF8 . C8.pack . unEscapeString +-- FIXME: use system-filepath +decodePageName ∷ FilePath → PageName +decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString +encodeFragment ∷ Text → String +encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8 -mkPageURI :: URI -> PageName -> URI +mkPageURI ∷ URI → PageName → URI mkPageURI baseURI name - | uriPath baseURI == "" = baseURI { uriPath = "/" ++ encoded } - | uriPath baseURI == "/" = baseURI { uriPath = "/" ++ encoded } - | last (uriPath baseURI) == '/' = baseURI { uriPath = uriPath baseURI ++ encoded } - | otherwise = baseURI { uriPath = uriPath baseURI ++ "/" ++ encoded } + = baseURI { + uriPath = uriPath baseURI encodePageName name <.> "html" + } + +mkPageFragmentURI ∷ URI → PageName → Text → URI +mkPageFragmentURI baseURI name fragment + = baseURI { + uriPath = uriPath baseURI encodePageName name <.> "html" + , uriFragment = ('#' : encodeFragment fragment) + } + +mkFragmentURI ∷ Text → URI +mkFragmentURI fragment + = nullURI { + uriFragment = ('#' : encodeFragment fragment) + } + + +mkObjectURI :: URI -> PageName -> URI +mkObjectURI baseURI name + = mkAuxiliaryURI baseURI ["object"] name + + +mkAuxiliaryURI :: URI -> [String] -> PageName -> URI +mkAuxiliaryURI baseURI basePath name + = baseURI { + uriPath = foldl () "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name]) + } + + +mkFeedURI :: URI -> PageName -> URI +mkFeedURI baseURI name + = baseURI { + uriPath = uriPath baseURI encodePageName name <.> "rdf" + } + + +mkRakkaURI :: PageName -> URI +mkRakkaURI name = URI { + uriScheme = "rakka:" + , uriAuthority = Nothing + , uriPath = encodePageName name + , uriQuery = "" + , uriFragment = "" + } + + +{- + + + + blah blah... + -- 存在しない場合もある + + -- 存在しない場合もある + + + + + + blah blah... + + + SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS... + + + + +-} +xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree +xmlizePage + = proc page + -> if isRedirect page then + xmlizeRedirection -< page + else + xmlizeEntity -< page where - encoded = encodePageName name + xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree + xmlizeRedirection + = proc page + -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page + ( eelem "/" + += ( eelem "page" + += sattr "name" (T.unpack $ redirName page ) + += sattr "redirect" (T.unpack $ redirDest page ) + += sattr "isLocked" (yesOrNo $ redirIsLocked page) + += sattr "revision" (show $ redirRevision page) + += sattr "lastModified" (W3C.format lastMod) + )) -<< () + + xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree + xmlizeEntity + = proc page + -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page + ( eelem "/" + += ( eelem "page" + += sattr "name" (T.unpack $ pageName page) + += sattr "type" (show $ entityType page) + += ( case entityLanguage page of + Just x -> sattr "lang" (T.unpack $ CI.foldedCase x) + Nothing -> none + ) + += ( case entityType page of + MIMEType "text" "css" _ + -> sattr "isTheme" (yesOrNo $ entityIsTheme page) + MIMEType "text" "x-rakka" _ + -> sattr "isFeed" (yesOrNo $ entityIsFeed page) + _ + -> none + ) + += sattr "isLocked" (yesOrNo $ entityIsLocked page) + += sattr "isBinary" (yesOrNo $ entityIsBinary page) + += sattr "revision" (show $ entityRevision page) + += sattr "lastModified" (W3C.format lastMod) + += ( case entitySummary page of + Just s -> eelem "summary" += txt s + Nothing -> none + ) + += ( if M.null (entityOtherLang page) then + none + else + selem "otherLang" + [ eelem "link" + += sattr "lang" (T.unpack $ CI.foldedCase lang) + += sattr "page" (T.unpack name) + | (lang, name) ← M.toList (entityOtherLang page) ] + ) + += ( if entityIsBinary page then + ( eelem "binaryData" + += txt (L8.unpack $ encodeBase64LBS $ entityContent page) + ) + else + ( eelem "textData" + += txt (UTF8.decode $ L.unpack $ entityContent page) + ) + ) + )) -<< () + +parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page +parseXmlizedPage + = proc (name, tree) + → do updateInfo ← maybeA parseUpdateInfo ⤙ tree + redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree + isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no" + ⋙ parseYesOrNo) ⤙ tree + case redirect of + Nothing → parseEntity ⤙ (name, tree) + Just dest → returnA ⤙ Redirection { + redirName = name + , redirDest = T.pack dest + , redirIsLocked = isLocked + , redirRevision = undefined + , redirLastMod = undefined + , redirUpdateInfo = updateInfo + } + +parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page +parseEntity + = proc (name, tree) + -> do updateInfo <- maybeA parseUpdateInfo -< tree + + mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree + + lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree + + isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no" + >>> parseYesOrNo) -< tree + isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no" + >>> parseYesOrNo) -< tree + isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" + >>> parseYesOrNo) -< tree + + summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()" + >>> getText + >>> deleteIfEmpty)) -< tree + + otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link" + >>> + (getAttrValue0 "lang" + &&& + getAttrValue0 "page")) -< tree + + textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree + binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree + + let (isBinary, content) + = case (textData, binaryData) of + (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text) + (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary) + _ -> error "one of textData or binaryData is required" + mimeType + = if isBinary then + if null mimeTypeStr then + guessMIMEType content + else + read mimeTypeStr + else + read mimeTypeStr + returnA ⤙ Entity { + entityName = name + , entityType = mimeType + , entityLanguage = CI.mk ∘ T.pack <$> lang + , entityIsTheme = isTheme + , entityIsFeed = isFeed + , entityIsLocked = isLocked + , entityIsBinary = isBinary + , entityRevision = undefined + , entityLastMod = undefined + , entitySummary = summary + , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang) + , entityContent = content + , entityUpdateInfo = updateInfo + } + +parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo +parseUpdateInfo + = proc tree + -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree + oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo + oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo + returnA ⤙ UpdateInfo { + uiOldRevision = oldRev + , uiOldName = T.pack <$> oldName + } + +dropWhitespace :: String -> String +{-# INLINE dropWhitespace #-} +dropWhitespace = filter ((¬) ∘ isSpace)