module Rakka.Page
( PageName
, Page(..)
+ , LanguageTag
+ , LanguageName
+
, encodePageName
, decodePageName
+
+ , pageFileName'
+ , defaultFileName
+
, mkPageURI
+ , mkPageFragmentURI
, mkObjectURI
+ , mkFragmentURI
+ , mkAuxiliaryURI
+ , mkRakkaURI
+
+ , xmlizePage
+ , parseXmlizedPage
)
where
-import Data.ByteString.Base (LazyByteString)
-import qualified Data.ByteString.Char8 as C8
-import Data.Encoding
-import Data.Encoding.UTF8
-import Network.HTTP.Lucu
-import Network.URI
+import qualified Codec.Binary.Base64 as B64
+import Codec.Binary.UTF8.String
+import Control.Arrow
+import Control.Arrow.ArrowIO
+import Control.Arrow.ArrowList
+import qualified Data.ByteString.Lazy as Lazy (ByteString)
+import qualified Data.ByteString.Lazy as L hiding (ByteString)
+import Data.Char
+import Data.Map (Map)
+import qualified Data.Map as M
+import Data.Maybe
+import Data.Time
+import Network.HTTP.Lucu hiding (redirect)
+import Network.URI hiding (fragment)
+import Rakka.Utils
+import Rakka.W3CDateTime
import Subversion.Types
-import System.FilePath
-import System.Time
+import System.FilePath.Posix
+import Text.XML.HXT.Arrow.XmlArrow
+import Text.XML.HXT.Arrow.XmlNodeSet
+import Text.XML.HXT.DOM.TypeDefs
type PageName = String
+type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
+type LanguageName = String -- i.e. "日本語"
+
data Page
= Redirection {
redirName :: !PageName
, redirDest :: !PageName
- , redirRevision :: !(Maybe RevNum)
- , redirLastMod :: !CalendarTime
+ , redirRevision :: RevNum
+ , redirLastMod :: UTCTime
}
| Entity {
pageName :: !PageName
, pageType :: !MIMEType
+ , pageLanguage :: !(Maybe LanguageTag)
+ , pageFileName :: !(Maybe String)
, pageIsTheme :: !Bool -- text/css 以外では無意味
, pageIsFeed :: !Bool -- text/x-rakka 以外では無意味
, pageIsLocked :: !Bool
, pageIsBoring :: !Bool
, pageIsBinary :: !Bool
- , pageRevision :: !(Maybe RevNum)
- , pageLastMod :: !CalendarTime
+ , pageRevision :: RevNum
+ , pageLastMod :: UTCTime
, pageSummary :: !(Maybe String)
- , pageOtherLang :: ![(String, PageName)]
- , pageContent :: !LazyByteString
+ , pageOtherLang :: !(Map LanguageTag PageName)
+ , pageContent :: !Lazy.ByteString
}
+ deriving (Show, Eq)
-- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
encodePageName :: PageName -> FilePath
-encodePageName = escapeURIString isSafe . C8.unpack . encode UTF8
+encodePageName = escapeURIString isSafeChar . encodeString . fixPageName
where
- isSafe :: Char -> Bool
- isSafe c
- | c >= ' ' && c <= '~' = True
- | otherwise = False
+ fixPageName :: PageName -> PageName
+ fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
+
+
+isSafeChar :: Char -> Bool
+isSafeChar c
+ | c == '/' = True
+ | isReserved c = False
+ | c > ' ' && c <= '~' = True
+ | otherwise = False
-- URI unescape して UTF-8 から decode する。
decodePageName :: FilePath -> PageName
-decodePageName = decode UTF8 . C8.pack . unEscapeString
+decodePageName = decodeString . unEscapeString
+
+
+encodeFragment :: String -> String
+encodeFragment = escapeURIString isSafeChar . encodeString
+
+
+pageFileName' :: Page -> String
+pageFileName' page
+ = fromMaybe (defaultFileName (pageType page) (pageName page)) (pageFileName page)
+
+
+defaultFileName :: MIMEType -> PageName -> String
+defaultFileName pType pName
+ = let baseName = takeFileName pName
+ in
+ case pType of
+ MIMEType "text" "x-rakka" _ -> baseName <.> "rakka"
+ MIMEType "text" "css" _ -> baseName <.> "css"
+ _ -> baseName
mkPageURI :: URI -> PageName -> URI
mkPageURI baseURI name
= baseURI {
- uriPath = foldl combine "/" [uriPath baseURI, encodePageName name]
+ uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
+ }
+
+
+mkPageFragmentURI :: URI -> PageName -> String -> URI
+mkPageFragmentURI baseURI name fragment
+ = baseURI {
+ uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
+ , uriFragment = ('#' : encodeFragment fragment)
+ }
+
+
+mkFragmentURI :: String -> URI
+mkFragmentURI fragment
+ = nullURI {
+ uriFragment = ('#' : encodeFragment fragment)
}
mkObjectURI :: URI -> PageName -> URI
mkObjectURI baseURI name
+ = mkAuxiliaryURI baseURI ["object"] name
+
+
+mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
+mkAuxiliaryURI baseURI basePath name
= baseURI {
- uriPath = foldl combine "/" [uriPath baseURI, "object", encodePageName name]
+ uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
}
+
+
+mkRakkaURI :: PageName -> URI
+mkRakkaURI name = URI {
+ uriScheme = "rakka:"
+ , uriAuthority = Nothing
+ , uriPath = encodePageName name
+ , uriQuery = ""
+ , uriFragment = ""
+ }
+
+
+{-
+ <page name="Foo/Bar"
+ type="text/x-rakka"
+ lang="ja" -- 存在しない場合もある
+ fileName="bar.rakka" -- 存在しない場合もある
+ isTheme="no" -- text/css の場合のみ存在
+ isFeed="no" -- text/x-rakka の場合のみ存在
+ isLocked="no"
+ isBinary="no"
+ revision="112"> -- デフォルトでない場合のみ存在
+ lastModified="2000-01-01T00:00:00">
+
+ <summary>
+ blah blah...
+ </summary> -- 存在しない場合もある
+
+ <otherLang> -- 存在しない場合もある
+ <link lang="ja" page="Bar/Baz" />
+ </otherLang>
+
+ <!-- 何れか一方のみ -->
+ <textData>
+ blah blah...
+ </textData>
+ <binaryData>
+ SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
+ </binaryData>
+ </page>
+-}
+xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
+xmlizePage
+ = proc page
+ -> do lastMod <- arrIO (utcToLocalZonedTime . pageLastMod) -< page
+ ( eelem "/"
+ += ( eelem "page"
+ += sattr "name" (pageName page)
+ += sattr "type" (show $ pageType page)
+ += ( case pageLanguage page of
+ Just x -> sattr "lang" x
+ Nothing -> none
+ )
+ += ( case pageFileName page of
+ Just x -> sattr "fileName" x
+ Nothing -> none
+ )
+ += ( case pageType page of
+ MIMEType "text" "css" _
+ -> sattr "isTheme" (yesOrNo $ pageIsTheme page)
+ MIMEType "text" "x-rakka" _
+ -> sattr "isFeed" (yesOrNo $ pageIsFeed page)
+ _
+ -> none
+ )
+ += sattr "isLocked" (yesOrNo $ pageIsLocked page)
+ += sattr "isBoring" (yesOrNo $ pageIsBoring page)
+ += sattr "isBinary" (yesOrNo $ pageIsBinary page)
+ += sattr "revision" (show $ pageRevision page)
+ += sattr "lastModified" (formatW3CDateTime lastMod)
+ += ( case pageSummary page of
+ Just s -> eelem "summary" += txt s
+ Nothing -> none
+ )
+ += ( if M.null (pageOtherLang page) then
+ none
+ else
+ selem "otherLang"
+ [ eelem "link"
+ += sattr "lang" lang
+ += sattr "page" name
+ | (lang, name) <- M.toList (pageOtherLang page) ]
+ )
+ += ( if pageIsBinary page then
+ ( eelem "binaryData"
+ += txt (B64.encode $ L.unpack $ pageContent page)
+ )
+ else
+ ( eelem "textData"
+ += txt (decode $ L.unpack $ pageContent page)
+ )
+ )
+ )) -<< ()
+
+
+parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
+parseXmlizedPage
+ = proc (name, tree)
+ -> do redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
+ case redirect of
+ Nothing -> parseEntity -< (name, tree)
+ Just dest -> returnA -< (Redirection {
+ redirName = name
+ , redirDest = dest
+ , redirRevision = undefined
+ , redirLastMod = undefined
+ })
+
+
+parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
+parseEntity
+ = proc (name, tree)
+ -> do mimeType <- (getXPathTreesInDoc "/page/@type/text()" >>> getText
+ >>> arr read) -< tree
+
+ lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
+ fileName <- maybeA (getXPathTreesInDoc "/page/@filename/text()" >>> getText) -< tree
+
+ isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+ isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+ isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+ isBoring <- (withDefault (getXPathTreesInDoc "/page/@isBoring/text()" >>> getText) "no"
+ >>> parseYesOrNo) -< tree
+
+ summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
+ >>> getText
+ >>> deleteIfEmpty)) -< tree
+
+ otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
+ >>>
+ (getAttrValue0 "lang"
+ &&&
+ getAttrValue0 "page")) -< tree
+
+ textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
+ binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
+
+ let (isBinary, content)
+ = case (textData, binaryData) of
+ (Just text, Nothing ) -> (False, L.pack $ encode text )
+ (Nothing , Just binary) -> (True , L.pack $ B64.decode binary)
+ _ -> error "one of textData or binaryData is required"
+
+ returnA -< Entity {
+ pageName = name
+ , pageType = mimeType
+ , pageLanguage = lang
+ , pageFileName = fileName
+ , pageIsTheme = isTheme
+ , pageIsFeed = isFeed
+ , pageIsLocked = isLocked
+ , pageIsBoring = isBoring
+ , pageIsBinary = isBinary
+ , pageRevision = undefined
+ , pageLastMod = undefined
+ , pageSummary = summary
+ , pageOtherLang = M.fromList otherLang
+ , pageContent = content
+ }