module Rakka.Page ( PageName , Page(..) , UpdateInfo(..) , LanguageTag , LanguageName , isRedirect , isEntity , pageName , pageUpdateInfo , pageRevision , encodePageName , decodePageName , mkPageURI , mkPageFragmentURI , mkObjectURI , mkFragmentURI , mkAuxiliaryURI , mkFeedURI , mkRakkaURI , xmlizePage , parseXmlizedPage ) where import qualified Codec.Binary.Base64 as B64 import qualified Codec.Binary.UTF8.String as UTF8 import Control.Arrow import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as L hiding (ByteString) import Data.Char import Data.Map (Map) import qualified Data.Map as M import Data.Maybe import Data.Time import Network.HTTP.Lucu hiding (redirect) import Network.URI hiding (fragment) import Rakka.Utils import Rakka.W3CDateTime import Subversion.Types import System.FilePath.Posix import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlNodeSet import Text.XML.HXT.DOM.TypeDefs type PageName = String type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt type LanguageName = String -- i.e. "日本語" data Page = Redirection { redirName :: !PageName , redirDest :: !PageName , redirIsLocked :: !Bool , redirRevision :: RevNum , redirLastMod :: UTCTime , redirUpdateInfo :: Maybe UpdateInfo } | Entity { entityName :: !PageName , entityType :: !MIMEType , entityLanguage :: !(Maybe LanguageTag) , entityIsTheme :: !Bool -- text/css 以外では無意味 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味 , entityIsLocked :: !Bool , entityIsBinary :: !Bool , entityRevision :: RevNum , entityLastMod :: UTCTime , entitySummary :: !(Maybe String) , entityOtherLang :: !(Map LanguageTag PageName) , entityContent :: !Lazy.ByteString , entityUpdateInfo :: Maybe UpdateInfo } deriving (Show, Eq) data UpdateInfo = UpdateInfo { uiOldRevision :: !RevNum , uiOldName :: !(Maybe PageName) } deriving (Show, Eq) isRedirect :: Page -> Bool isRedirect (Redirection _ _ _ _ _ _) = True isRedirect _ = False isEntity :: Page -> Bool isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True isEntity _ = False pageName :: Page -> PageName pageName p | isRedirect p = redirName p | isEntity p = entityName p | otherwise = error "neither redirection nor entity" pageUpdateInfo :: Page -> Maybe UpdateInfo pageUpdateInfo p | isRedirect p = redirUpdateInfo p | isEntity p = entityUpdateInfo p | otherwise = error "neither redirection nor entity" pageRevision :: Page -> RevNum pageRevision p | isRedirect p = redirRevision p | isEntity p = entityRevision p | otherwise = error "neither redirection nor entity" -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 encodePageName :: PageName -> FilePath encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName where fixPageName :: PageName -> PageName fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c) isSafeChar :: Char -> Bool isSafeChar c | c == '/' = True | isReserved c = False | c > ' ' && c <= '~' = True | otherwise = False -- URI unescape して UTF-8 から decode する。 decodePageName :: FilePath -> PageName decodePageName = UTF8.decodeString . unEscapeString encodeFragment :: String -> String encodeFragment = escapeURIString isSafeChar . UTF8.encodeString mkPageURI :: URI -> PageName -> URI mkPageURI baseURI name = baseURI { uriPath = "/" uriPath baseURI encodePageName name <.> "html" } mkPageFragmentURI :: URI -> PageName -> String -> URI mkPageFragmentURI baseURI name fragment = baseURI { uriPath = "/" uriPath baseURI encodePageName name <.> "html" , uriFragment = ('#' : encodeFragment fragment) } mkFragmentURI :: String -> URI mkFragmentURI fragment = nullURI { uriFragment = ('#' : encodeFragment fragment) } mkObjectURI :: URI -> PageName -> URI mkObjectURI baseURI name = mkAuxiliaryURI baseURI ["object"] name mkAuxiliaryURI :: URI -> [String] -> PageName -> URI mkAuxiliaryURI baseURI basePath name = baseURI { uriPath = foldl () "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name]) } mkFeedURI :: URI -> PageName -> URI mkFeedURI baseURI name = baseURI { uriPath = "/" uriPath baseURI encodePageName name <.> "rdf" } mkRakkaURI :: PageName -> URI mkRakkaURI name = URI { uriScheme = "rakka:" , uriAuthority = Nothing , uriPath = encodePageName name , uriQuery = "" , uriFragment = "" } {- blah blah... -- 存在しない場合もある -- 存在しない場合もある blah blah... SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS... -} xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizePage = proc page -> if isRedirect page then xmlizeRedirection -< page else xmlizeEntity -< page where xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizeRedirection = proc page -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page ( eelem "/" += ( eelem "page" += sattr "name" (redirName page) += sattr "redirect" (redirDest page) += sattr "isLocked" (yesOrNo $ redirIsLocked page) += sattr "revision" (show $ redirRevision page) += sattr "lastModified" (formatW3CDateTime lastMod) )) -<< () xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizeEntity = proc page -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page ( eelem "/" += ( eelem "page" += sattr "name" (pageName page) += sattr "type" (show $ entityType page) += ( case entityLanguage page of Just x -> sattr "lang" x Nothing -> none ) += ( case entityType page of MIMEType "text" "css" _ -> sattr "isTheme" (yesOrNo $ entityIsTheme page) MIMEType "text" "x-rakka" _ -> sattr "isFeed" (yesOrNo $ entityIsFeed page) _ -> none ) += sattr "isLocked" (yesOrNo $ entityIsLocked page) += sattr "isBinary" (yesOrNo $ entityIsBinary page) += sattr "revision" (show $ entityRevision page) += sattr "lastModified" (formatW3CDateTime lastMod) += ( case entitySummary page of Just s -> eelem "summary" += txt s Nothing -> none ) += ( if M.null (entityOtherLang page) then none else selem "otherLang" [ eelem "link" += sattr "lang" lang += sattr "page" name | (lang, name) <- M.toList (entityOtherLang page) ] ) += ( if entityIsBinary page then ( eelem "binaryData" += txt (B64.encode $ L.unpack $ entityContent page) ) else ( eelem "textData" += txt (UTF8.decode $ L.unpack $ entityContent page) ) ) )) -<< () parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseXmlizedPage = proc (name, tree) -> do updateInfo <- maybeA parseUpdateInfo -< tree redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" >>> parseYesOrNo) -< tree case redirect of Nothing -> parseEntity -< (name, tree) Just dest -> returnA -< (Redirection { redirName = name , redirDest = dest , redirIsLocked = isLocked , redirRevision = undefined , redirLastMod = undefined , redirUpdateInfo = updateInfo }) parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseEntity = proc (name, tree) -> do updateInfo <- maybeA parseUpdateInfo -< tree mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no" >>> parseYesOrNo) -< tree isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no" >>> parseYesOrNo) -< tree isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" >>> parseYesOrNo) -< tree summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()" >>> getText >>> deleteIfEmpty)) -< tree otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link" >>> (getAttrValue0 "lang" &&& getAttrValue0 "page")) -< tree textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree let (isBinary, content) = case (textData, binaryData) of (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text ) (Nothing , Just binary) -> (True , L.pack $ fromJust $ B64.decode $ dropWhitespace binary) _ -> error "one of textData or binaryData is required" mimeType = if isBinary then if null mimeTypeStr then guessMIMEType content else read mimeTypeStr else read mimeTypeStr returnA -< Entity { entityName = name , entityType = mimeType , entityLanguage = lang , entityIsTheme = isTheme , entityIsFeed = isFeed , entityIsLocked = isLocked , entityIsBinary = isBinary , entityRevision = undefined , entityLastMod = undefined , entitySummary = summary , entityOtherLang = M.fromList otherLang , entityContent = content , entityUpdateInfo = updateInfo } where dropWhitespace :: String -> String dropWhitespace [] = [] dropWhitespace (x:xs) | x == ' ' || x == '\t' || x == '\n' = dropWhitespace xs | otherwise = x : dropWhitespace xs parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo parseUpdateInfo = proc tree -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo returnA -< UpdateInfo { uiOldRevision = oldRev , uiOldName = oldName }