+{-# LANGUAGE
+ Arrows
+ , TypeOperators
+ , UnicodeSyntax
+ #-}
module Rakka.Page
( PageName
, Page(..)
, mkObjectURI
, mkFragmentURI
, mkAuxiliaryURI
- , mkRDFURI
+ , mkFeedURI
, mkRakkaURI
, xmlizePage
, parseXmlizedPage
)
where
-
-import qualified Codec.Binary.Base64 as B64
+import Control.Applicative
+import Control.Arrow
+import Control.Arrow.ArrowIO
+import Control.Arrow.ArrowList
+import Control.Arrow.Unicode
import qualified Codec.Binary.UTF8.String as UTF8
-import Control.Arrow
-import Control.Arrow.ArrowIO
-import Control.Arrow.ArrowList
+import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as Lazy (ByteString)
import qualified Data.ByteString.Lazy as L hiding (ByteString)
+import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
import Data.Char
import Data.Map (Map)
import qualified Data.Map as M
-import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Text.Encoding
import Data.Time
+import qualified Data.Time.W3C as W3C
import Network.HTTP.Lucu hiding (redirect)
import Network.URI hiding (fragment)
+import OpenSSL.EVP.Base64
+import Prelude.Unicode
import Rakka.Utils
-import Rakka.W3CDateTime
import Subversion.Types
import System.FilePath.Posix
-import Text.XML.HXT.Arrow.XmlArrow
-import Text.XML.HXT.Arrow.XmlNodeSet
-import Text.XML.HXT.DOM.TypeDefs
-
-
-type PageName = String
-
-type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
-type LanguageName = String -- i.e. "日本語"
+import Text.XML.HXT.Arrow.XmlArrow
+import Text.XML.HXT.DOM.TypeDefs
+import Text.XML.HXT.XPath
+type PageName = Text
+type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
+type LanguageName = Text -- i.e. "日本語"
data Page
= Redirection {
}
deriving (Show, Eq)
-
data UpdateInfo
= UpdateInfo {
uiOldRevision :: !RevNum
-- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
-encodePageName :: PageName -> FilePath
-encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
+encodePageName ∷ PageName → FilePath
+encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack
where
- fixPageName :: PageName -> PageName
- fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
-
-
-isSafeChar :: Char -> Bool
-isSafeChar c
- | c == '/' = True
- | isReserved c = False
- | c > ' ' && c <= '~' = True
- | otherwise = False
-
-
--- URI unescape して UTF-8 から decode する。
-decodePageName :: FilePath -> PageName
-decodePageName = UTF8.decodeString . unEscapeString
+ fixPageName ∷ String → String
+ fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c)
+ capitalizeHead ∷ String → String
+ capitalizeHead [] = (⊥)
+ capitalizeHead (x:xs) = toUpper x : xs
-encodeFragment :: String -> String
-encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
+-- FIXME: use system-filepath
+decodePageName ∷ FilePath → PageName
+decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString
+encodeFragment ∷ Text → String
+encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8
-mkPageURI :: URI -> PageName -> URI
+mkPageURI ∷ URI → PageName → URI
mkPageURI baseURI name
= baseURI {
- uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
+ uriPath = uriPath baseURI </> encodePageName name <.> "html"
}
-
-mkPageFragmentURI :: URI -> PageName -> String -> URI
+mkPageFragmentURI ∷ URI → PageName → Text → URI
mkPageFragmentURI baseURI name fragment
= baseURI {
- uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
+ uriPath = uriPath baseURI </> encodePageName name <.> "html"
, uriFragment = ('#' : encodeFragment fragment)
}
-
-mkFragmentURI :: String -> URI
+mkFragmentURI ∷ Text → URI
mkFragmentURI fragment
= nullURI {
uriFragment = ('#' : encodeFragment fragment)
}
-mkRDFURI :: URI -> PageName -> URI
-mkRDFURI baseURI name
+mkFeedURI :: URI -> PageName -> URI
+mkFeedURI baseURI name
= baseURI {
- uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".rdf"]
+ uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
}
-> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
( eelem "/"
+= ( eelem "page"
- += sattr "name" (redirName page)
- += sattr "redirect" (redirDest page)
- += sattr "isLocked" (yesOrNo $ redirIsLocked page)
- += sattr "revision" (show $ redirRevision page)
- += sattr "lastModified" (formatW3CDateTime lastMod)
+ += sattr "name" (T.unpack $ redirName page )
+ += sattr "redirect" (T.unpack $ redirDest page )
+ += sattr "isLocked" (yesOrNo $ redirIsLocked page)
+ += sattr "revision" (show $ redirRevision page)
+ += sattr "lastModified" (W3C.format lastMod)
)) -<< ()
xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
-> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
( eelem "/"
+= ( eelem "page"
- += sattr "name" (pageName page)
+ += sattr "name" (T.unpack $ pageName page)
+= sattr "type" (show $ entityType page)
+= ( case entityLanguage page of
- Just x -> sattr "lang" x
+ Just x -> sattr "lang" (T.unpack $ CI.foldedCase x)
Nothing -> none
)
+= ( case entityType page of
+= sattr "isLocked" (yesOrNo $ entityIsLocked page)
+= sattr "isBinary" (yesOrNo $ entityIsBinary page)
+= sattr "revision" (show $ entityRevision page)
- += sattr "lastModified" (formatW3CDateTime lastMod)
+ += sattr "lastModified" (W3C.format lastMod)
+= ( case entitySummary page of
Just s -> eelem "summary" += txt s
Nothing -> none
else
selem "otherLang"
[ eelem "link"
- += sattr "lang" lang
- += sattr "page" name
- | (lang, name) <- M.toList (entityOtherLang page) ]
+ += sattr "lang" (T.unpack $ CI.foldedCase lang)
+ += sattr "page" (T.unpack name)
+ | (lang, name) ← M.toList (entityOtherLang page) ]
)
+= ( if entityIsBinary page then
( eelem "binaryData"
- += txt (B64.encode $ L.unpack $ entityContent page)
+ += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
)
else
( eelem "textData"
)
)) -<< ()
-
-parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
+parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page
parseXmlizedPage
= proc (name, tree)
- -> do updateInfo <- maybeA parseUpdateInfo -< tree
- redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
- isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
- >>> parseYesOrNo) -< tree
- case redirect of
- Nothing -> parseEntity -< (name, tree)
- Just dest -> returnA -< (Redirection {
- redirName = name
- , redirDest = dest
- , redirIsLocked = isLocked
- , redirRevision = undefined
- , redirLastMod = undefined
- , redirUpdateInfo = updateInfo
- })
-
+ → do updateInfo ← maybeA parseUpdateInfo ⤙ tree
+ redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree
+ isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no"
+ ⋙ parseYesOrNo) ⤙ tree
+ case redirect of
+ Nothing → parseEntity ⤙ (name, tree)
+ Just dest → returnA ⤙ Redirection {
+ redirName = name
+ , redirDest = T.pack dest
+ , redirIsLocked = isLocked
+ , redirRevision = undefined
+ , redirLastMod = undefined
+ , redirUpdateInfo = updateInfo
+ }
parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
parseEntity
let (isBinary, content)
= case (textData, binaryData) of
- (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text )
- (Nothing , Just binary) -> (True , L.pack $ fromJust $ B64.decode $ dropWhitespace binary)
+ (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
+ (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
_ -> error "one of textData or binaryData is required"
mimeType
- = if isBinary then
- if null mimeTypeStr then
- guessMIMEType content
- else
- read mimeTypeStr
- else
- read mimeTypeStr
-
- returnA -< Entity {
+ = if isBinary then
+ if null mimeTypeStr then
+ guessMIMEType content
+ else
+ read mimeTypeStr
+ else
+ read mimeTypeStr
+ returnA ⤙ Entity {
entityName = name
, entityType = mimeType
- , entityLanguage = lang
+ , entityLanguage = CI.mk ∘ T.pack <$> lang
, entityIsTheme = isTheme
, entityIsFeed = isFeed
, entityIsLocked = isLocked
, entityRevision = undefined
, entityLastMod = undefined
, entitySummary = summary
- , entityOtherLang = M.fromList otherLang
+ , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang)
, entityContent = content
, entityUpdateInfo = updateInfo
}
- where
- dropWhitespace :: String -> String
- dropWhitespace [] = []
- dropWhitespace (x:xs)
- | x == ' ' || x == '\t' || x == '\n'
- = dropWhitespace xs
- | otherwise
- = x : dropWhitespace xs
-
-parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
+parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo
parseUpdateInfo
= proc tree
- -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
- oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
- oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
- returnA -< UpdateInfo {
- uiOldRevision = oldRev
- , uiOldName = oldName
- }
-
-
\ No newline at end of file
+ -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree
+ oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo
+ oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo
+ returnA ⤙ UpdateInfo {
+ uiOldRevision = oldRev
+ , uiOldName = T.pack <$> oldName
+ }
+
+dropWhitespace :: String -> String
+{-# INLINE dropWhitespace #-}
+dropWhitespace = filter ((¬) ∘ isSpace)