{-# LANGUAGE
Arrows
, TypeOperators
, UnicodeSyntax
#-}
module Rakka.Page
( PageName
, Page(..)
, UpdateInfo(..)
, LanguageTag
, LanguageName
, isRedirect
, isEntity
, pageName
, pageUpdateInfo
, pageRevision
, encodePageName
, decodePageName
, mkPageURI
, mkPageFragmentURI
, mkObjectURI
, mkFragmentURI
, mkAuxiliaryURI
, mkFeedURI
, mkRakkaURI
, xmlizePage
, parseXmlizedPage
)
where
import Control.Applicative
import Control.Arrow
import Control.Arrow.ArrowIO
import Control.Arrow.ArrowList
import Control.Arrow.Unicode
import qualified Codec.Binary.UTF8.String as UTF8
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.ByteString.Lazy as L hiding (ByteString)
import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
import Data.Char
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding
import Data.Time
import qualified Data.Time.W3C as W3C
import Network.HTTP.Lucu hiding (redirect)
import Network.URI hiding (fragment)
import OpenSSL.EVP.Base64
import Prelude.Unicode
import Rakka.Utils
import Subversion.Types
import System.FilePath.Posix
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.DOM.TypeDefs
import Text.XML.HXT.XPath
type PageName = Text
type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
type LanguageName = Text -- i.e. "日本語"
data Page
= Redirection {
redirName :: !PageName
, redirDest :: !PageName
, redirIsLocked :: !Bool
, redirRevision :: RevNum
, redirLastMod :: UTCTime
, redirUpdateInfo :: Maybe UpdateInfo
}
| Entity {
entityName :: !PageName
, entityType :: !MIMEType
, entityLanguage :: !(Maybe LanguageTag)
, entityIsTheme :: !Bool -- text/css 以外では無意味
, entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
, entityIsLocked :: !Bool
, entityIsBinary :: !Bool
, entityRevision :: RevNum
, entityLastMod :: UTCTime
, entitySummary :: !(Maybe String)
, entityOtherLang :: !(Map LanguageTag PageName)
, entityContent :: !Lazy.ByteString
, entityUpdateInfo :: Maybe UpdateInfo
}
deriving (Show, Eq)
data UpdateInfo
= UpdateInfo {
uiOldRevision :: !RevNum
, uiOldName :: !(Maybe PageName)
}
deriving (Show, Eq)
isRedirect :: Page -> Bool
isRedirect (Redirection _ _ _ _ _ _) = True
isRedirect _ = False
isEntity :: Page -> Bool
isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
isEntity _ = False
pageName :: Page -> PageName
pageName p
| isRedirect p = redirName p
| isEntity p = entityName p
| otherwise = error "neither redirection nor entity"
pageUpdateInfo :: Page -> Maybe UpdateInfo
pageUpdateInfo p
| isRedirect p = redirUpdateInfo p
| isEntity p = entityUpdateInfo p
| otherwise = error "neither redirection nor entity"
pageRevision :: Page -> RevNum
pageRevision p
| isRedirect p = redirRevision p
| isEntity p = entityRevision p
| otherwise = error "neither redirection nor entity"
-- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
encodePageName ∷ PageName → FilePath
encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack
where
fixPageName ∷ String → String
fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c)
capitalizeHead ∷ String → String
capitalizeHead [] = (⊥)
capitalizeHead (x:xs) = toUpper x : xs
-- FIXME: use system-filepath
decodePageName ∷ FilePath → PageName
decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString
encodeFragment ∷ Text → String
encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8
mkPageURI ∷ URI → PageName → URI
mkPageURI baseURI name
= baseURI {
uriPath = uriPath baseURI > encodePageName name <.> "html"
}
mkPageFragmentURI ∷ URI → PageName → Text → URI
mkPageFragmentURI baseURI name fragment
= baseURI {
uriPath = uriPath baseURI > encodePageName name <.> "html"
, uriFragment = ('#' : encodeFragment fragment)
}
mkFragmentURI ∷ Text → URI
mkFragmentURI fragment
= nullURI {
uriFragment = ('#' : encodeFragment fragment)
}
mkObjectURI :: URI -> PageName -> URI
mkObjectURI baseURI name
= mkAuxiliaryURI baseURI ["object"] name
mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
mkAuxiliaryURI baseURI basePath name
= baseURI {
uriPath = foldl (>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
}
mkFeedURI :: URI -> PageName -> URI
mkFeedURI baseURI name
= baseURI {
uriPath = uriPath baseURI > encodePageName name <.> "rdf"
}
mkRakkaURI :: PageName -> URI
mkRakkaURI name = URI {
uriScheme = "rakka:"
, uriAuthority = Nothing
, uriPath = encodePageName name
, uriQuery = ""
, uriFragment = ""
}
{-
blah blah...
-- 存在しない場合もある
-- 存在しない場合もある
blah blah...
SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
-}
xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizePage
= proc page
-> if isRedirect page then
xmlizeRedirection -< page
else
xmlizeEntity -< page
where
xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizeRedirection
= proc page
-> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
( eelem "/"
+= ( eelem "page"
+= sattr "name" (T.unpack $ redirName page )
+= sattr "redirect" (T.unpack $ redirDest page )
+= sattr "isLocked" (yesOrNo $ redirIsLocked page)
+= sattr "revision" (show $ redirRevision page)
+= sattr "lastModified" (W3C.format lastMod)
)) -<< ()
xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
xmlizeEntity
= proc page
-> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
( eelem "/"
+= ( eelem "page"
+= sattr "name" (T.unpack $ pageName page)
+= sattr "type" (show $ entityType page)
+= ( case entityLanguage page of
Just x -> sattr "lang" (T.unpack $ CI.foldedCase x)
Nothing -> none
)
+= ( case entityType page of
MIMEType "text" "css" _
-> sattr "isTheme" (yesOrNo $ entityIsTheme page)
MIMEType "text" "x-rakka" _
-> sattr "isFeed" (yesOrNo $ entityIsFeed page)
_
-> none
)
+= sattr "isLocked" (yesOrNo $ entityIsLocked page)
+= sattr "isBinary" (yesOrNo $ entityIsBinary page)
+= sattr "revision" (show $ entityRevision page)
+= sattr "lastModified" (W3C.format lastMod)
+= ( case entitySummary page of
Just s -> eelem "summary" += txt s
Nothing -> none
)
+= ( if M.null (entityOtherLang page) then
none
else
selem "otherLang"
[ eelem "link"
+= sattr "lang" (T.unpack $ CI.foldedCase lang)
+= sattr "page" (T.unpack name)
| (lang, name) ← M.toList (entityOtherLang page) ]
)
+= ( if entityIsBinary page then
( eelem "binaryData"
+= txt (L8.unpack $ encodeBase64LBS $ entityContent page)
)
else
( eelem "textData"
+= txt (UTF8.decode $ L.unpack $ entityContent page)
)
)
)) -<< ()
parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page
parseXmlizedPage
= proc (name, tree)
→ do updateInfo ← maybeA parseUpdateInfo ⤙ tree
redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree
isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no"
⋙ parseYesOrNo) ⤙ tree
case redirect of
Nothing → parseEntity ⤙ (name, tree)
Just dest → returnA ⤙ Redirection {
redirName = name
, redirDest = T.pack dest
, redirIsLocked = isLocked
, redirRevision = undefined
, redirLastMod = undefined
, redirUpdateInfo = updateInfo
}
parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
parseEntity
= proc (name, tree)
-> do updateInfo <- maybeA parseUpdateInfo -< tree
mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
>>> parseYesOrNo) -< tree
summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
>>> getText
>>> deleteIfEmpty)) -< tree
otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
>>>
(getAttrValue0 "lang"
&&&
getAttrValue0 "page")) -< tree
textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
let (isBinary, content)
= case (textData, binaryData) of
(Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
(Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
_ -> error "one of textData or binaryData is required"
mimeType
= if isBinary then
if null mimeTypeStr then
guessMIMEType content
else
read mimeTypeStr
else
read mimeTypeStr
returnA ⤙ Entity {
entityName = name
, entityType = mimeType
, entityLanguage = CI.mk ∘ T.pack <$> lang
, entityIsTheme = isTheme
, entityIsFeed = isFeed
, entityIsLocked = isLocked
, entityIsBinary = isBinary
, entityRevision = undefined
, entityLastMod = undefined
, entitySummary = summary
, entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang)
, entityContent = content
, entityUpdateInfo = updateInfo
}
parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo
parseUpdateInfo
= proc tree
-> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree
oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo
oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo
returnA ⤙ UpdateInfo {
uiOldRevision = oldRev
, uiOldName = T.pack <$> oldName
}
dropWhitespace :: String -> String
{-# INLINE dropWhitespace #-}
dropWhitespace = filter ((¬) ∘ isSpace)