-- -*- coding: utf-8 -*- {-# LANGUAGE Arrows , UnicodeSyntax #-} module Rakka.Page ( PageName , Page(..) , UpdateInfo(..) , LanguageTag , LanguageName , isRedirect , isEntity , pageName , pageUpdateInfo , pageRevision , encodePageName , decodePageName , mkPageURI , mkPageFragmentURI , mkObjectURI , mkFragmentURI , mkAuxiliaryURI , mkFeedURI , mkRakkaURI , xmlizePage , parseXmlizedPage ) where import Control.Arrow import qualified Data.Ascii as Ascii import qualified Data.Text as T import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as L hiding (ByteString) import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString) import Data.Char import Data.Map (Map) import qualified Data.Map as M import Data.Time import qualified Data.Time.W3C as W3C import Network.HTTP.Lucu hiding (redirect) import Network.URI hiding (fragment) import Rakka.Utils import Subversion.Types import System.FilePath.Posix import Text.XML.HXT.DOM.TypeDefs import Text.XML.HXT.XPath import Text.XML.HXT.Arrow.XmlArrow import Prelude.Unicode type PageName = T.Text type LanguageTag = Ascii -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt type LanguageName = T.Text -- i.e. "日本語" data Page = Redirection { redirName :: !PageName , redirDest :: !PageName , redirIsLocked :: !Bool , redirRevision :: RevNum , redirLastMod :: UTCTime , redirUpdateInfo :: Maybe UpdateInfo } | Entity { entityName :: !PageName , entityType :: !MIMEType , entityLanguage :: !(Maybe LanguageTag) , entityIsTheme :: !Bool -- text/css 以外では無意味 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味 , entityIsLocked :: !Bool , entityIsBinary :: !Bool , entityRevision :: RevNum , entityLastMod :: UTCTime , entitySummary :: !(Maybe String) , entityOtherLang :: !(Map LanguageTag PageName) , entityContent :: !Lazy.ByteString , entityUpdateInfo :: Maybe UpdateInfo } deriving (Show, Eq) data UpdateInfo = UpdateInfo { uiOldRevision :: !RevNum , uiOldName :: !(Maybe PageName) } deriving (Show, Eq) isRedirect :: Page -> Bool isRedirect (Redirection _ _ _ _ _ _) = True isRedirect _ = False isEntity :: Page -> Bool isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True isEntity _ = False pageName :: Page -> PageName pageName p | isRedirect p = redirName p | isEntity p = entityName p | otherwise = error "neither redirection nor entity" pageUpdateInfo :: Page -> Maybe UpdateInfo pageUpdateInfo p | isRedirect p = redirUpdateInfo p | isEntity p = entityUpdateInfo p | otherwise = error "neither redirection nor entity" pageRevision :: Page -> RevNum pageRevision p | isRedirect p = redirRevision p | isEntity p = entityRevision p | otherwise = error "neither redirection nor entity" -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 encodePageName :: PageName -> FilePath encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName where fixPageName :: PageName -> PageName fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c) decodePageName :: FilePath -> PageName decodePageName = UTF8.decodeString . unEscapeString encodeFragment :: String -> String encodeFragment = escapeURIString isSafeChar . UTF8.encodeString mkPageURI :: URI -> PageName -> URI mkPageURI baseURI name = baseURI { uriPath = uriPath baseURI encodePageName name <.> "html" } mkPageFragmentURI :: URI -> PageName -> String -> URI mkPageFragmentURI baseURI name fragment = baseURI { uriPath = uriPath baseURI encodePageName name <.> "html" , uriFragment = ('#' : encodeFragment fragment) } mkFragmentURI :: String -> URI mkFragmentURI fragment = nullURI { uriFragment = ('#' : encodeFragment fragment) } mkObjectURI :: URI -> PageName -> URI mkObjectURI baseURI name = mkAuxiliaryURI baseURI ["object"] name mkAuxiliaryURI :: URI -> [String] -> PageName -> URI mkAuxiliaryURI baseURI basePath name = baseURI { uriPath = foldl () "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name]) } mkFeedURI :: URI -> PageName -> URI mkFeedURI baseURI name = baseURI { uriPath = uriPath baseURI encodePageName name <.> "rdf" } mkRakkaURI :: PageName -> URI mkRakkaURI name = URI { uriScheme = "rakka:" , uriAuthority = Nothing , uriPath = encodePageName name , uriQuery = "" , uriFragment = "" } {- blah blah... -- 存在しない場合もある -- 存在しない場合もある blah blah... SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS... -} xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizePage = proc page -> if isRedirect page then xmlizeRedirection -< page else xmlizeEntity -< page where xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizeRedirection = proc page -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page ( eelem "/" += ( eelem "page" += sattr "name" (redirName page) += sattr "redirect" (redirDest page) += sattr "isLocked" (yesOrNo $ redirIsLocked page) += sattr "revision" (show $ redirRevision page) += sattr "lastModified" (W3C.format lastMod) )) -<< () xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizeEntity = proc page -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page ( eelem "/" += ( eelem "page" += sattr "name" (pageName page) += sattr "type" (show $ entityType page) += ( case entityLanguage page of Just x -> sattr "lang" x Nothing -> none ) += ( case entityType page of MIMEType "text" "css" _ -> sattr "isTheme" (yesOrNo $ entityIsTheme page) MIMEType "text" "x-rakka" _ -> sattr "isFeed" (yesOrNo $ entityIsFeed page) _ -> none ) += sattr "isLocked" (yesOrNo $ entityIsLocked page) += sattr "isBinary" (yesOrNo $ entityIsBinary page) += sattr "revision" (show $ entityRevision page) += sattr "lastModified" (W3C.format lastMod) += ( case entitySummary page of Just s -> eelem "summary" += txt s Nothing -> none ) += ( if M.null (entityOtherLang page) then none else selem "otherLang" [ eelem "link" += sattr "lang" lang += sattr "page" name | (lang, name) <- M.toList (entityOtherLang page) ] ) += ( if entityIsBinary page then ( eelem "binaryData" += txt (L8.unpack $ encodeBase64LBS $ entityContent page) ) else ( eelem "textData" += txt (UTF8.decode $ L.unpack $ entityContent page) ) ) )) -<< () parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseXmlizedPage = proc (name, tree) -> do updateInfo <- maybeA parseUpdateInfo -< tree redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" >>> parseYesOrNo) -< tree case redirect of Nothing -> parseEntity -< (name, tree) Just dest -> returnA -< (Redirection { redirName = name , redirDest = dest , redirIsLocked = isLocked , redirRevision = undefined , redirLastMod = undefined , redirUpdateInfo = updateInfo }) parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseEntity = proc (name, tree) -> do updateInfo <- maybeA parseUpdateInfo -< tree mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no" >>> parseYesOrNo) -< tree isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no" >>> parseYesOrNo) -< tree isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" >>> parseYesOrNo) -< tree summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()" >>> getText >>> deleteIfEmpty)) -< tree otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link" >>> (getAttrValue0 "lang" &&& getAttrValue0 "page")) -< tree textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree let (isBinary, content) = case (textData, binaryData) of (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text) (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary) _ -> error "one of textData or binaryData is required" mimeType = if isBinary then if null mimeTypeStr then guessMIMEType content else read mimeTypeStr else read mimeTypeStr returnA -< Entity { entityName = name , entityType = mimeType , entityLanguage = lang , entityIsTheme = isTheme , entityIsFeed = isFeed , entityIsLocked = isLocked , entityIsBinary = isBinary , entityRevision = undefined , entityLastMod = undefined , entitySummary = summary , entityOtherLang = M.fromList otherLang , entityContent = content , entityUpdateInfo = updateInfo } dropWhitespace :: String -> String {-# INLINE dropWhitespace #-} dropWhitespace = filter ((¬) ∘ isSpace) parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo parseUpdateInfo = proc tree -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo returnA -< UpdateInfo { uiOldRevision = oldRev , uiOldName = oldName }