{-# LANGUAGE Arrows , TypeOperators , UnicodeSyntax #-} module Rakka.Page ( PageName , Page(..) , UpdateInfo(..) , LanguageTag , LanguageName , isRedirect , isEntity , pageName , pageUpdateInfo , pageRevision , encodePageName , decodePageName , mkPageURI , mkPageFragmentURI , mkObjectURI , mkFragmentURI , mkAuxiliaryURI , mkFeedURI , mkRakkaURI , xmlizePage , parseXmlizedPage ) where import Control.Applicative import Control.Arrow import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.Unicode import qualified Codec.Binary.UTF8.String as UTF8 import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as L hiding (ByteString) import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString) import Data.CaseInsensitive (CI) import qualified Data.CaseInsensitive as CI import Data.Char import Data.Map (Map) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Data.Text.Encoding import Data.Time import qualified Data.Time.W3C as W3C import Network.HTTP.Lucu hiding (redirect) import Network.URI hiding (fragment) import OpenSSL.EVP.Base64 import Prelude.Unicode import Rakka.Utils import Subversion.Types import System.FilePath.Posix import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.DOM.TypeDefs import Text.XML.HXT.XPath type PageName = Text type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt type LanguageName = Text -- i.e. "日本語" data Page = Redirection { redirName :: !PageName , redirDest :: !PageName , redirIsLocked :: !Bool , redirRevision :: RevNum , redirLastMod :: UTCTime , redirUpdateInfo :: Maybe UpdateInfo } | Entity { entityName :: !PageName , entityType :: !MIMEType , entityLanguage :: !(Maybe LanguageTag) , entityIsTheme :: !Bool -- text/css 以外では無意味 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味 , entityIsLocked :: !Bool , entityIsBinary :: !Bool , entityRevision :: RevNum , entityLastMod :: UTCTime , entitySummary :: !(Maybe String) , entityOtherLang :: !(Map LanguageTag PageName) , entityContent :: !Lazy.ByteString , entityUpdateInfo :: Maybe UpdateInfo } deriving (Show, Eq) data UpdateInfo = UpdateInfo { uiOldRevision :: !RevNum , uiOldName :: !(Maybe PageName) } deriving (Show, Eq) isRedirect :: Page -> Bool isRedirect (Redirection _ _ _ _ _ _) = True isRedirect _ = False isEntity :: Page -> Bool isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True isEntity _ = False pageName :: Page -> PageName pageName p | isRedirect p = redirName p | isEntity p = entityName p | otherwise = error "neither redirection nor entity" pageUpdateInfo :: Page -> Maybe UpdateInfo pageUpdateInfo p | isRedirect p = redirUpdateInfo p | isEntity p = entityUpdateInfo p | otherwise = error "neither redirection nor entity" pageRevision :: Page -> RevNum pageRevision p | isRedirect p = redirRevision p | isEntity p = entityRevision p | otherwise = error "neither redirection nor entity" -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 encodePageName ∷ PageName → FilePath encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack where fixPageName ∷ String → String fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c) capitalizeHead ∷ String → String capitalizeHead [] = (⊥) capitalizeHead (x:xs) = toUpper x : xs -- FIXME: use system-filepath decodePageName ∷ FilePath → PageName decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString encodeFragment ∷ Text → String encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8 mkPageURI ∷ URI → PageName → URI mkPageURI baseURI name = baseURI { uriPath = uriPath baseURI encodePageName name <.> "html" } mkPageFragmentURI ∷ URI → PageName → Text → URI mkPageFragmentURI baseURI name fragment = baseURI { uriPath = uriPath baseURI encodePageName name <.> "html" , uriFragment = ('#' : encodeFragment fragment) } mkFragmentURI ∷ Text → URI mkFragmentURI fragment = nullURI { uriFragment = ('#' : encodeFragment fragment) } mkObjectURI :: URI -> PageName -> URI mkObjectURI baseURI name = mkAuxiliaryURI baseURI ["object"] name mkAuxiliaryURI :: URI -> [String] -> PageName -> URI mkAuxiliaryURI baseURI basePath name = baseURI { uriPath = foldl () "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name]) } mkFeedURI :: URI -> PageName -> URI mkFeedURI baseURI name = baseURI { uriPath = uriPath baseURI encodePageName name <.> "rdf" } mkRakkaURI :: PageName -> URI mkRakkaURI name = URI { uriScheme = "rakka:" , uriAuthority = Nothing , uriPath = encodePageName name , uriQuery = "" , uriFragment = "" } {- blah blah... -- 存在しない場合もある -- 存在しない場合もある blah blah... SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS... -} xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizePage = proc page -> if isRedirect page then xmlizeRedirection -< page else xmlizeEntity -< page where xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizeRedirection = proc page -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page ( eelem "/" += ( eelem "page" += sattr "name" (T.unpack $ redirName page ) += sattr "redirect" (T.unpack $ redirDest page ) += sattr "isLocked" (yesOrNo $ redirIsLocked page) += sattr "revision" (show $ redirRevision page) += sattr "lastModified" (W3C.format lastMod) )) -<< () xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizeEntity = proc page -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page ( eelem "/" += ( eelem "page" += sattr "name" (T.unpack $ pageName page) += sattr "type" (show $ entityType page) += ( case entityLanguage page of Just x -> sattr "lang" (T.unpack $ CI.foldedCase x) Nothing -> none ) += ( case entityType page of MIMEType "text" "css" _ -> sattr "isTheme" (yesOrNo $ entityIsTheme page) MIMEType "text" "x-rakka" _ -> sattr "isFeed" (yesOrNo $ entityIsFeed page) _ -> none ) += sattr "isLocked" (yesOrNo $ entityIsLocked page) += sattr "isBinary" (yesOrNo $ entityIsBinary page) += sattr "revision" (show $ entityRevision page) += sattr "lastModified" (W3C.format lastMod) += ( case entitySummary page of Just s -> eelem "summary" += txt s Nothing -> none ) += ( if M.null (entityOtherLang page) then none else selem "otherLang" [ eelem "link" += sattr "lang" (T.unpack $ CI.foldedCase lang) += sattr "page" (T.unpack name) | (lang, name) ← M.toList (entityOtherLang page) ] ) += ( if entityIsBinary page then ( eelem "binaryData" += txt (L8.unpack $ encodeBase64LBS $ entityContent page) ) else ( eelem "textData" += txt (UTF8.decode $ L.unpack $ entityContent page) ) ) )) -<< () parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page parseXmlizedPage = proc (name, tree) → do updateInfo ← maybeA parseUpdateInfo ⤙ tree redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no" ⋙ parseYesOrNo) ⤙ tree case redirect of Nothing → parseEntity ⤙ (name, tree) Just dest → returnA ⤙ Redirection { redirName = name , redirDest = T.pack dest , redirIsLocked = isLocked , redirRevision = undefined , redirLastMod = undefined , redirUpdateInfo = updateInfo } parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseEntity = proc (name, tree) -> do updateInfo <- maybeA parseUpdateInfo -< tree mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no" >>> parseYesOrNo) -< tree isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no" >>> parseYesOrNo) -< tree isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" >>> parseYesOrNo) -< tree summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()" >>> getText >>> deleteIfEmpty)) -< tree otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link" >>> (getAttrValue0 "lang" &&& getAttrValue0 "page")) -< tree textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree let (isBinary, content) = case (textData, binaryData) of (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text) (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary) _ -> error "one of textData or binaryData is required" mimeType = if isBinary then if null mimeTypeStr then guessMIMEType content else read mimeTypeStr else read mimeTypeStr returnA ⤙ Entity { entityName = name , entityType = mimeType , entityLanguage = CI.mk ∘ T.pack <$> lang , entityIsTheme = isTheme , entityIsFeed = isFeed , entityIsLocked = isLocked , entityIsBinary = isBinary , entityRevision = undefined , entityLastMod = undefined , entitySummary = summary , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang) , entityContent = content , entityUpdateInfo = updateInfo } parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo parseUpdateInfo = proc tree -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo returnA ⤙ UpdateInfo { uiOldRevision = oldRev , uiOldName = T.pack <$> oldName } dropWhitespace :: String -> String {-# INLINE dropWhitespace #-} dropWhitespace = filter ((¬) ∘ isSpace)