module Rakka.Page
( PageName
, Page(..)
, UpdateInfo(..)
, LanguageTag
, LanguageName
, isRedirect
, isEntity
, pageName
, pageUpdateInfo
, pageRevision
, encodePageName
, decodePageName
, mkPageURI
, mkPageFragmentURI
, mkObjectURI
, mkFragmentURI
, mkAuxiliaryURI
, mkFeedURI
, mkRakkaURI
, xmlizePage
, parseXmlizedPage
)
where
import qualified Codec.Binary.UTF8.String as UTF8
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.ByteString.Lazy as L hiding (ByteString)
import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
import Data.Char
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Data.Time
import Network.HTTP.Lucu hiding (redirect)
import Network.URI hiding (fragment)
import OpenSSL.EVP.Base64
import Rakka.Utils
import Rakka.W3CDateTime
import Subversion.Types
import System.FilePath.Posix
import Text.XML.HXT.Arrow
import Text.XML.HXT.DOM.TypeDefs
type PageName = String
type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
type LanguageName = String -- i.e. "日本語"
data Page
= Redirection {
redirName :: !PageName
, redirDest :: !PageName
, redirIsLocked :: !Bool
, redirRevision :: RevNum
, redirLastMod :: UTCTime
, redirUpdateInfo :: Maybe UpdateInfo
}
| Entity {
entityName :: !PageName
, entityType :: !MIMEType
, entityLanguage :: !(Maybe LanguageTag)
, entityIsTheme :: !Bool -- text/css 以外では無意味
, entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
, entityIsLocked :: !Bool
, entityIsBinary :: !Bool
, entityRevision :: RevNum
, entityLastMod :: UTCTime
, entitySummary :: !(Maybe String)
, entityOtherLang :: !(Map LanguageTag PageName)
, entityContent :: !Lazy.ByteString
, entityUpdateInfo :: Maybe UpdateInfo
}
deriving (Show, Eq)
data UpdateInfo
= UpdateInfo {
uiOldRevision :: !RevNum
, uiOldName :: !(Maybe PageName)
}
deriving (Show, Eq)
isRedirect :: Page -> Bool
isRedirect (Redirection _ _ _ _ _ _) = True
isRedirect _ = False
isEntity :: Page -> Bool
isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
isEntity _ = False
pageName :: Page -> PageName
pageName p
| isRedirect p = redirName p
| isEntity p = entityName p
| otherwise = error "neither redirection nor entity"
pageUpdateInfo :: Page -> Maybe UpdateInfo
pageUpdateInfo p
| isRedirect p = redirUpdateInfo p
| isEntity p = entityUpdateInfo p
| otherwise = error "neither redirection nor entity"
pageRevision :: Page -> RevNum
pageRevision p
| isRedirect p = redirRevision p
| isEntity p = entityRevision p
| otherwise = error "neither redirection nor entity"
-- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
encodePageName :: PageName -> FilePath
encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
where
fixPageName :: PageName -> PageName
fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
-- URI unescape して UTF-8 から decode する。
decodePageName :: FilePath -> PageName
decodePageName = UTF8.decodeString . unEscapeString
encodeFragment :: String -> String
encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
mkPageURI :: URI -> PageName -> URI
mkPageURI baseURI name
= baseURI {
uriPath = uriPath baseURI > encodePageName name <.> "html"
}
mkPageFragmentURI :: URI -> PageName -> String -> URI
mkPageFragmentURI baseURI name fragment
= baseURI {
uriPath = uriPath baseURI > encodePageName name <.> "html"
, uriFragment = ('#' : encodeFragment fragment)
}
mkFragmentURI :: String -> URI
mkFragmentURI fragment
= nullURI {
uriFragment = ('#' : encodeFragment fragment)
}
mkObjectURI :: URI -> PageName -> URI
mkObjectURI baseURI name
= mkAuxiliaryURI baseURI ["object"] name
mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
mkAuxiliaryURI baseURI basePath name
= baseURI {
uriPath = foldl (>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
}
mkFeedURI :: URI -> PageName -> URI
mkFeedURI baseURI name
= baseURI {
uriPath = uriPath baseURI > encodePageName name <.> "rdf"
}
mkRakkaURI :: PageName -> URI
mkRakkaURI name = URI {
uriScheme = "rakka:"
, uriAuthority = Nothing
, uriPath = encodePageName name
, uriQuery = ""
, uriFragment = ""
}
{-
blah blah...
-- 存在しない場合もある
-- 存在しない場合もある
blah blah...
SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
-}
xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizePage
= proc page
-> if isRedirect page then
xmlizeRedirection -< page
else
xmlizeEntity -< page
where
xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizeRedirection
= proc page
-> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
( eelem "/"
+= ( eelem "page"
+= sattr "name" (redirName page)
+= sattr "redirect" (redirDest page)
+= sattr "isLocked" (yesOrNo $ redirIsLocked page)
+= sattr "revision" (show $ redirRevision page)
+= sattr "lastModified" (formatW3CDateTime lastMod)
)) -<< ()
xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizeEntity
= proc page
-> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
( eelem "/"
+= ( eelem "page"
+= sattr "name" (pageName page)
+= sattr "type" (show $ entityType page)
+= ( case entityLanguage page of
Just x -> sattr "lang" x
Nothing -> none
)
+= ( case entityType page of
MIMEType "text" "css" _
-> sattr "isTheme" (yesOrNo $ entityIsTheme page)
MIMEType "text" "x-rakka" _
-> sattr "isFeed" (yesOrNo $ entityIsFeed page)
_
-> none
)
+= sattr "isLocked" (yesOrNo $ entityIsLocked page)
+= sattr "isBinary" (yesOrNo $ entityIsBinary page)
+= sattr "revision" (show $ entityRevision page)
+= sattr "lastModified" (formatW3CDateTime lastMod)
+= ( case entitySummary page of
Just s -> eelem "summary" += txt s
Nothing -> none
)
+= ( if M.null (entityOtherLang page) then
none
else
selem "otherLang"
[ eelem "link"
+= sattr "lang" lang
+= sattr "page" name
| (lang, name) <- M.toList (entityOtherLang page) ]
)
+= ( if entityIsBinary page then
( eelem "binaryData"
+= txt (L8.unpack $ encodeBase64LBS $ entityContent page)
)
else
( eelem "textData"
+= txt (UTF8.decode $ L.unpack $ entityContent page)
)
)
)) -<< ()
parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
parseXmlizedPage
= proc (name, tree)
-> do updateInfo <- maybeA parseUpdateInfo -< tree
redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
case redirect of
Nothing -> parseEntity -< (name, tree)
Just dest -> returnA -< (Redirection {
redirName = name
, redirDest = dest
, redirIsLocked = isLocked
, redirRevision = undefined
, redirLastMod = undefined
, redirUpdateInfo = updateInfo
})
parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
parseEntity
= proc (name, tree)
-> do updateInfo <- maybeA parseUpdateInfo -< tree
mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
>>> getText
>>> deleteIfEmpty)) -< tree
otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
>>>
(getAttrValue0 "lang"
&&&
getAttrValue0 "page")) -< tree
textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
let (isBinary, content)
= case (textData, binaryData) of
(Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
(Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
_ -> error "one of textData or binaryData is required"
mimeType
= if isBinary then
if null mimeTypeStr then
guessMIMEType content
else
read mimeTypeStr
else
read mimeTypeStr
returnA -< Entity {
entityName = name
, entityType = mimeType
, entityLanguage = lang
, entityIsTheme = isTheme
, entityIsFeed = isFeed
, entityIsLocked = isLocked
, entityIsBinary = isBinary
, entityRevision = undefined
, entityLastMod = undefined
, entitySummary = summary
, entityOtherLang = M.fromList otherLang
, entityContent = content
, entityUpdateInfo = updateInfo
}
where
dropWhitespace :: String -> String
dropWhitespace [] = []
dropWhitespace (x:xs)
| x == ' ' || x == '\t' || x == '\n'
= dropWhitespace xs
| otherwise
= x : dropWhitespace xs
parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
parseUpdateInfo
= proc tree
-> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
returnA -< UpdateInfo {
uiOldRevision = oldRev
, uiOldName = oldName
}