X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Rakka%2FPage.hs;h=f845f7eee589b7d81a141ca653a7854a97213392;hb=42f51754dea02201aececaacbf194d714cd58aaf;hp=b293b1fb0258445edfec5261687c3996c1893e9a;hpb=b4a3d2cf3854b10d923cb4c546bf1fe32b021a68;p=Rakka.git diff --git a/Rakka/Page.hs b/Rakka/Page.hs index b293b1f..f845f7e 100644 --- a/Rakka/Page.hs +++ b/Rakka/Page.hs @@ -1,3 +1,8 @@ +{-# LANGUAGE + Arrows + , TypeOperators + , UnicodeSyntax + #-} module Rakka.Page ( PageName , Page(..) @@ -10,58 +15,64 @@ module Rakka.Page , pageName , pageUpdateInfo + , pageRevision , encodePageName , decodePageName - , entityFileName' - , defaultFileName - , mkPageURI , mkPageFragmentURI , mkObjectURI , mkFragmentURI , mkAuxiliaryURI + , mkFeedURI , mkRakkaURI , xmlizePage , parseXmlizedPage ) where - -import qualified Codec.Binary.Base64 as B64 -import Codec.Binary.UTF8.String -import Control.Arrow -import Control.Arrow.ArrowIO -import Control.Arrow.ArrowList +import Control.Applicative +import Control.Arrow +import Control.Arrow.ArrowIO +import Control.Arrow.ArrowList +import Control.Arrow.Unicode +import qualified Codec.Binary.UTF8.String as UTF8 +import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as L hiding (ByteString) +import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString) +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI import Data.Char import Data.Map (Map) import qualified Data.Map as M -import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding import Data.Time import Network.HTTP.Lucu hiding (redirect) import Network.URI hiding (fragment) +import OpenSSL.EVP.Base64 +import Prelude.Unicode import Rakka.Utils import Rakka.W3CDateTime import Subversion.Types import System.FilePath.Posix -import Text.XML.HXT.Arrow.XmlArrow -import Text.XML.HXT.Arrow.XmlNodeSet -import Text.XML.HXT.DOM.TypeDefs - +import Text.XML.HXT.Arrow.XmlArrow +import Text.XML.HXT.DOM.TypeDefs +import Text.XML.HXT.XPath -type PageName = String - -type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt -type LanguageName = String -- i.e. "日本語" +type PageName = Text +type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt +type LanguageName = Text -- i.e. "日本語" data Page = Redirection { redirName :: !PageName , redirDest :: !PageName + , redirIsLocked :: !Bool , redirRevision :: RevNum , redirLastMod :: UTCTime , redirUpdateInfo :: Maybe UpdateInfo @@ -70,11 +81,9 @@ data Page entityName :: !PageName , entityType :: !MIMEType , entityLanguage :: !(Maybe LanguageTag) - , entityFileName :: !(Maybe String) , entityIsTheme :: !Bool -- text/css 以外では無意味 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味 , entityIsLocked :: !Bool - , entityIsBoring :: !Bool , entityIsBinary :: !Bool , entityRevision :: RevNum , entityLastMod :: UTCTime @@ -95,85 +104,68 @@ data UpdateInfo isRedirect :: Page -> Bool -isRedirect (Redirection _ _ _ _ _) = True -isRedirect _ = False +isRedirect (Redirection _ _ _ _ _ _) = True +isRedirect _ = False isEntity :: Page -> Bool -isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _ _ _) = True -isEntity _ = False +isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True +isEntity _ = False pageName :: Page -> PageName pageName p | isRedirect p = redirName p | isEntity p = entityName p - | otherwise = fail "neither redirection nor entity" + | otherwise = error "neither redirection nor entity" pageUpdateInfo :: Page -> Maybe UpdateInfo pageUpdateInfo p | isRedirect p = redirUpdateInfo p | isEntity p = entityUpdateInfo p - | otherwise = fail "neither redirection nor entity" - - --- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 -encodePageName :: PageName -> FilePath -encodePageName = escapeURIString isSafeChar . encodeString . fixPageName - where - fixPageName :: PageName -> PageName - fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c) - + | otherwise = error "neither redirection nor entity" -isSafeChar :: Char -> Bool -isSafeChar c - | c == '/' = True - | isReserved c = False - | c > ' ' && c <= '~' = True - | otherwise = False +pageRevision :: Page -> RevNum +pageRevision p + | isRedirect p = redirRevision p + | isEntity p = entityRevision p + | otherwise = error "neither redirection nor entity" --- URI unescape して UTF-8 から decode する。 -decodePageName :: FilePath -> PageName -decodePageName = decodeString . unEscapeString +-- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 +encodePageName ∷ PageName → FilePath +encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack + where + fixPageName ∷ String → String + fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c) -encodeFragment :: String -> String -encodeFragment = escapeURIString isSafeChar . encodeString - - -entityFileName' :: Page -> String -entityFileName' page - = fromMaybe (defaultFileName (entityType page) (entityName page)) (entityFileName page) - + capitalizeHead ∷ String → String + capitalizeHead [] = (⊥) + capitalizeHead (x:xs) = toUpper x : xs -defaultFileName :: MIMEType -> PageName -> String -defaultFileName pType pName - = let baseName = takeFileName pName - in - case pType of - MIMEType "text" "x-rakka" _ -> baseName <.> "rakka" - MIMEType "text" "css" _ -> baseName <.> "css" - _ -> baseName +-- FIXME: use system-filepath +decodePageName ∷ FilePath → PageName +decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString +encodeFragment ∷ Text → String +encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8 -mkPageURI :: URI -> PageName -> URI +mkPageURI ∷ URI → PageName → URI mkPageURI baseURI name = baseURI { - uriPath = foldl () "/" [uriPath baseURI, encodePageName name ++ ".html"] + uriPath = uriPath baseURI encodePageName name <.> "html" } - -mkPageFragmentURI :: URI -> PageName -> String -> URI +mkPageFragmentURI ∷ URI → PageName → Text → URI mkPageFragmentURI baseURI name fragment = baseURI { - uriPath = foldl () "/" [uriPath baseURI, encodePageName name ++ ".html"] + uriPath = uriPath baseURI encodePageName name <.> "html" , uriFragment = ('#' : encodeFragment fragment) } - -mkFragmentURI :: String -> URI +mkFragmentURI ∷ Text → URI mkFragmentURI fragment = nullURI { uriFragment = ('#' : encodeFragment fragment) @@ -192,6 +184,13 @@ mkAuxiliaryURI baseURI basePath name } +mkFeedURI :: URI -> PageName -> URI +mkFeedURI baseURI name + = baseURI { + uriPath = uriPath baseURI encodePageName name <.> "rdf" + } + + mkRakkaURI :: PageName -> URI mkRakkaURI name = URI { uriScheme = "rakka:" @@ -206,12 +205,11 @@ mkRakkaURI name = URI { -- デフォルトでない場合のみ存在 + revision="112" lastModified="2000-01-01T00:00:00"> @@ -230,87 +228,107 @@ mkRakkaURI name = URI { SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS... + + -} xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree xmlizePage = proc page - -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page - ( eelem "/" - += ( eelem "page" - += sattr "name" (pageName page) - += sattr "type" (show $ entityType page) - += ( case entityLanguage page of - Just x -> sattr "lang" x - Nothing -> none - ) - += ( case entityFileName page of - Just x -> sattr "fileName" x - Nothing -> none - ) - += ( case entityType page of - MIMEType "text" "css" _ - -> sattr "isTheme" (yesOrNo $ entityIsTheme page) - MIMEType "text" "x-rakka" _ - -> sattr "isFeed" (yesOrNo $ entityIsFeed page) - _ - -> none - ) - += sattr "isLocked" (yesOrNo $ entityIsLocked page) - += sattr "isBoring" (yesOrNo $ entityIsBoring page) - += sattr "isBinary" (yesOrNo $ entityIsBinary page) - += sattr "revision" (show $ entityRevision page) - += sattr "lastModified" (formatW3CDateTime lastMod) - += ( case entitySummary page of - Just s -> eelem "summary" += txt s - Nothing -> none - ) - += ( if M.null (entityOtherLang page) then - none - else - selem "otherLang" - [ eelem "link" - += sattr "lang" lang - += sattr "page" name - | (lang, name) <- M.toList (entityOtherLang page) ] - ) - += ( if entityIsBinary page then - ( eelem "binaryData" - += txt (B64.encode $ L.unpack $ entityContent page) + -> if isRedirect page then + xmlizeRedirection -< page + else + xmlizeEntity -< page + where + xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree + xmlizeRedirection + = proc page + -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page + ( eelem "/" + += ( eelem "page" + += sattr "name" (T.unpack $ redirName page ) + += sattr "redirect" (T.unpack $ redirDest page ) + += sattr "isLocked" (yesOrNo $ redirIsLocked page) + += sattr "revision" (show $ redirRevision page) + += sattr "lastModified" (formatW3CDateTime lastMod) + )) -<< () + + xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree + xmlizeEntity + = proc page + -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page + ( eelem "/" + += ( eelem "page" + += sattr "name" (T.unpack $ pageName page) + += sattr "type" (show $ entityType page) + += ( case entityLanguage page of + Just x -> sattr "lang" (T.unpack $ CI.foldedCase x) + Nothing -> none ) - else - ( eelem "textData" - += txt (decode $ L.unpack $ entityContent page) + += ( case entityType page of + MIMEType "text" "css" _ + -> sattr "isTheme" (yesOrNo $ entityIsTheme page) + MIMEType "text" "x-rakka" _ + -> sattr "isFeed" (yesOrNo $ entityIsFeed page) + _ + -> none ) - ) - )) -<< () - + += sattr "isLocked" (yesOrNo $ entityIsLocked page) + += sattr "isBinary" (yesOrNo $ entityIsBinary page) + += sattr "revision" (show $ entityRevision page) + += sattr "lastModified" (formatW3CDateTime lastMod) + += ( case entitySummary page of + Just s -> eelem "summary" += txt s + Nothing -> none + ) + += ( if M.null (entityOtherLang page) then + none + else + selem "otherLang" + [ eelem "link" + += sattr "lang" (T.unpack $ CI.foldedCase lang) + += sattr "page" (T.unpack name) + | (lang, name) ← M.toList (entityOtherLang page) ] + ) + += ( if entityIsBinary page then + ( eelem "binaryData" + += txt (L8.unpack $ encodeBase64LBS $ entityContent page) + ) + else + ( eelem "textData" + += txt (UTF8.decode $ L.unpack $ entityContent page) + ) + ) + )) -<< () -parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page +parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page parseXmlizedPage = proc (name, tree) - -> do updateInfo <- maybeA parseUpdateInfo -< tree - redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree - case redirect of - Nothing -> parseEntity -< (name, tree) - Just dest -> returnA -< (Redirection { - redirName = name - , redirDest = dest - , redirRevision = undefined - , redirLastMod = undefined - , redirUpdateInfo = updateInfo - }) - + → do updateInfo ← maybeA parseUpdateInfo ⤙ tree + redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree + isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no" + ⋙ parseYesOrNo) ⤙ tree + case redirect of + Nothing → parseEntity ⤙ (name, tree) + Just dest → returnA ⤙ Redirection { + redirName = name + , redirDest = T.pack dest + , redirIsLocked = isLocked + , redirRevision = undefined + , redirLastMod = undefined + , redirUpdateInfo = updateInfo + } parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseEntity = proc (name, tree) -> do updateInfo <- maybeA parseUpdateInfo -< tree - mimeType <- (getXPathTreesInDoc "/page/@type/text()" >>> getText - >>> arr read) -< tree + mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree - fileName <- maybeA (getXPathTreesInDoc "/page/@filename/text()" >>> getText) -< tree isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no" >>> parseYesOrNo) -< tree @@ -318,8 +336,6 @@ parseEntity >>> parseYesOrNo) -< tree isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" >>> parseYesOrNo) -< tree - isBoring <- (withDefault (getXPathTreesInDoc "/page/@isBoring/text()" >>> getText) "no" - >>> parseYesOrNo) -< tree summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()" >>> getText @@ -336,38 +352,48 @@ parseEntity let (isBinary, content) = case (textData, binaryData) of - (Just text, Nothing ) -> (False, L.pack $ encode text ) - (Nothing , Just binary) -> (True , L.pack $ B64.decode binary) + (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text) + (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary) _ -> error "one of textData or binaryData is required" - - returnA -< Entity { + mimeType + = if isBinary then + if null mimeTypeStr then + guessMIMEType content + else + read mimeTypeStr + else + read mimeTypeStr + returnA ⤙ Entity { entityName = name , entityType = mimeType - , entityLanguage = lang - , entityFileName = fileName + , entityLanguage = CI.mk ∘ T.pack <$> lang , entityIsTheme = isTheme , entityIsFeed = isFeed , entityIsLocked = isLocked - , entityIsBoring = isBoring , entityIsBinary = isBinary , entityRevision = undefined , entityLastMod = undefined , entitySummary = summary - , entityOtherLang = M.fromList otherLang + , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang) , entityContent = content , entityUpdateInfo = updateInfo } - - -parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo + where + dropWhitespace :: String -> String + dropWhitespace [] = [] + dropWhitespace (x:xs) + | x == ' ' || x == '\t' || x == '\n' + = dropWhitespace xs + | otherwise + = x : dropWhitespace xs + +parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo parseUpdateInfo = proc tree - -> do uInfo <- getXPathTreesInDoc "/*/updateInfo" -< tree - oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo - oldName <- maybeA (getXPathTrees "/move/@from/text()" >>> getText) -< uInfo - returnA -< UpdateInfo { - uiOldRevision = oldRev - , uiOldName = oldName - } - - \ No newline at end of file + -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree + oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo + oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo + returnA ⤙ UpdateInfo { + uiOldRevision = oldRev + , uiOldName = T.pack <$> oldName + }