X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Rakka.git;a=blobdiff_plain;f=Rakka%2FPage.hs;fp=Rakka%2FPage.hs;h=b4c88fcc5f2fef07de28d67825e62b68f6c03112;hp=24f037bc4a31096cd9f7c60b527062fce3dda1e9;hb=45bce2c29948649f74ada71f2fa851bdb812e96c;hpb=9932fbe6504e8b812703291e2497a5f010880d3b diff --git a/Rakka/Page.hs b/Rakka/Page.hs index 24f037b..b4c88fc 100644 --- a/Rakka/Page.hs +++ b/Rakka/Page.hs @@ -1,6 +1,6 @@ --- -*- coding: utf-8 -*- {-# LANGUAGE Arrows + , TypeOperators , UnicodeSyntax #-} module Rakka.Page @@ -32,32 +32,40 @@ module Rakka.Page , parseXmlizedPage ) where +import Control.Applicative import Control.Arrow -import qualified Data.Ascii as Ascii -import qualified Data.Text as T +import Control.Arrow.ArrowIO +import Control.Arrow.ArrowList +import Control.Arrow.Unicode +import qualified Codec.Binary.UTF8.String as UTF8 +import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as L hiding (ByteString) import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString) -import Data.Char -import Data.Map (Map) +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI +import Data.Char +import Data.Map (Map) import qualified Data.Map as M -import Data.Time +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding +import Data.Time import qualified Data.Time.W3C as W3C -import Network.HTTP.Lucu hiding (redirect) -import Network.URI hiding (fragment) -import Rakka.Utils -import Subversion.Types -import System.FilePath.Posix +import Network.HTTP.Lucu hiding (redirect) +import Network.URI hiding (fragment) +import OpenSSL.EVP.Base64 +import Prelude.Unicode +import Rakka.Utils +import Subversion.Types +import System.FilePath.Posix +import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.DOM.TypeDefs import Text.XML.HXT.XPath -import Text.XML.HXT.Arrow.XmlArrow -import Prelude.Unicode - -type PageName = T.Text - -type LanguageTag = Ascii -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt -type LanguageName = T.Text -- i.e. "日本語" +type PageName = Text +type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt +type LanguageName = Text -- i.e. "日本語" data Page = Redirection { @@ -85,7 +93,6 @@ data Page } deriving (Show, Eq) - data UpdateInfo = UpdateInfo { uiOldRevision :: !RevNum @@ -126,37 +133,37 @@ pageRevision p -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。 -encodePageName :: PageName -> FilePath -encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName +encodePageName ∷ PageName → FilePath +encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack where - fixPageName :: PageName -> PageName - fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c) - - -decodePageName :: FilePath -> PageName -decodePageName = UTF8.decodeString . unEscapeString + fixPageName ∷ String → String + fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c) + capitalizeHead ∷ String → String + capitalizeHead [] = (⊥) + capitalizeHead (x:xs) = toUpper x : xs -encodeFragment :: String -> String -encodeFragment = escapeURIString isSafeChar . UTF8.encodeString +-- FIXME: use system-filepath +decodePageName ∷ FilePath → PageName +decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString +encodeFragment ∷ Text → String +encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8 -mkPageURI :: URI -> PageName -> URI +mkPageURI ∷ URI → PageName → URI mkPageURI baseURI name = baseURI { uriPath = uriPath baseURI encodePageName name <.> "html" } - -mkPageFragmentURI :: URI -> PageName -> String -> URI +mkPageFragmentURI ∷ URI → PageName → Text → URI mkPageFragmentURI baseURI name fragment = baseURI { uriPath = uriPath baseURI encodePageName name <.> "html" , uriFragment = ('#' : encodeFragment fragment) } - -mkFragmentURI :: String -> URI +mkFragmentURI ∷ Text → URI mkFragmentURI fragment = nullURI { uriFragment = ('#' : encodeFragment fragment) @@ -239,10 +246,10 @@ xmlizePage -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page ( eelem "/" += ( eelem "page" - += sattr "name" (redirName page) - += sattr "redirect" (redirDest page) - += sattr "isLocked" (yesOrNo $ redirIsLocked page) - += sattr "revision" (show $ redirRevision page) + += sattr "name" (T.unpack $ redirName page ) + += sattr "redirect" (T.unpack $ redirDest page ) + += sattr "isLocked" (yesOrNo $ redirIsLocked page) + += sattr "revision" (show $ redirRevision page) += sattr "lastModified" (W3C.format lastMod) )) -<< () @@ -252,10 +259,10 @@ xmlizePage -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page ( eelem "/" += ( eelem "page" - += sattr "name" (pageName page) + += sattr "name" (T.unpack $ pageName page) += sattr "type" (show $ entityType page) += ( case entityLanguage page of - Just x -> sattr "lang" x + Just x -> sattr "lang" (T.unpack $ CI.foldedCase x) Nothing -> none ) += ( case entityType page of @@ -279,9 +286,9 @@ xmlizePage else selem "otherLang" [ eelem "link" - += sattr "lang" lang - += sattr "page" name - | (lang, name) <- M.toList (entityOtherLang page) ] + += sattr "lang" (T.unpack $ CI.foldedCase lang) + += sattr "page" (T.unpack name) + | (lang, name) ← M.toList (entityOtherLang page) ] ) += ( if entityIsBinary page then ( eelem "binaryData" @@ -294,25 +301,23 @@ xmlizePage ) )) -<< () - -parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page +parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page parseXmlizedPage = proc (name, tree) - -> do updateInfo <- maybeA parseUpdateInfo -< tree - redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree - isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no" - >>> parseYesOrNo) -< tree - case redirect of - Nothing -> parseEntity -< (name, tree) - Just dest -> returnA -< (Redirection { - redirName = name - , redirDest = dest - , redirIsLocked = isLocked - , redirRevision = undefined - , redirLastMod = undefined - , redirUpdateInfo = updateInfo - }) - + → do updateInfo ← maybeA parseUpdateInfo ⤙ tree + redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree + isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no" + ⋙ parseYesOrNo) ⤙ tree + case redirect of + Nothing → parseEntity ⤙ (name, tree) + Just dest → returnA ⤙ Redirection { + redirName = name + , redirDest = T.pack dest + , redirIsLocked = isLocked + , redirRevision = undefined + , redirLastMod = undefined + , redirUpdateInfo = updateInfo + } parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page parseEntity @@ -349,18 +354,17 @@ parseEntity (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary) _ -> error "one of textData or binaryData is required" mimeType - = if isBinary then - if null mimeTypeStr then - guessMIMEType content - else - read mimeTypeStr - else - read mimeTypeStr - - returnA -< Entity { + = if isBinary then + if null mimeTypeStr then + guessMIMEType content + else + read mimeTypeStr + else + read mimeTypeStr + returnA ⤙ Entity { entityName = name , entityType = mimeType - , entityLanguage = lang + , entityLanguage = CI.mk ∘ T.pack <$> lang , entityIsTheme = isTheme , entityIsFeed = isFeed , entityIsLocked = isLocked @@ -368,22 +372,22 @@ parseEntity , entityRevision = undefined , entityLastMod = undefined , entitySummary = summary - , entityOtherLang = M.fromList otherLang + , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang) , entityContent = content , entityUpdateInfo = updateInfo } +parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo +parseUpdateInfo + = proc tree + -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree + oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo + oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo + returnA ⤙ UpdateInfo { + uiOldRevision = oldRev + , uiOldName = T.pack <$> oldName + } + dropWhitespace :: String -> String {-# INLINE dropWhitespace #-} dropWhitespace = filter ((¬) ∘ isSpace) - -parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo -parseUpdateInfo - = proc tree - -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree - oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo - oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo - returnA -< UpdateInfo { - uiOldRevision = oldRev - , uiOldName = oldName - }