)
where
-import qualified Codec.Binary.Base64 as B64
-import Codec.Binary.UTF8.String
import Control.Arrow
import Control.Arrow.ArrowIO
import Control.Arrow.ArrowList
-import qualified Data.ByteString.Lazy as L
-import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
-import Data.Time
import Data.Time.Clock.POSIX
import Paths_Rakka -- Cabal が用意する。
import Rakka.Page
-import Rakka.Utils
import System.Directory
import System.FilePath
import System.FilePath.Find hiding (fileName, modificationTime)
import System.Posix.Files
import Text.XML.HXT.Arrow.ReadDocument
-import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlIOStateArrow
-import Text.XML.HXT.Arrow.XmlNodeSet
-import Text.XML.HXT.DOM.TypeDefs
import Text.XML.HXT.DOM.XmlKeywords
>>=
return . posixSecondsToUTCTime . fromRational . toRational . modificationTime)
-< fpath
- parsePage -< (name, lastMod, tree)
-
-
-parsePage :: (ArrowXml a, ArrowChoice a) => a (PageName, UTCTime, XmlTree) Page
-parsePage
- = proc (name, lastMod, tree)
- -> do redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
- case redirect of
- Nothing -> parseEntity -< (name, lastMod, tree)
- Just dest -> returnA -< (Redirection {
- redirName = name
- , redirDest = dest
- , redirRevision = Nothing
- , redirLastMod = lastMod
- })
-
-
-parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, UTCTime, XmlTree) Page
-parseEntity
- = proc (name, lastMod, tree)
- -> do mimeType <- (getXPathTreesInDoc "/page/@type/text()" >>> getText
- >>> arr read) -< tree
-
- lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
- fileName <- maybeA (getXPathTreesInDoc "/page/@filename/text()" >>> getText) -< tree
-
- isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
- >>> parseYesOrNo) -< tree
- isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
- >>> parseYesOrNo) -< tree
- isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
- >>> parseYesOrNo) -< tree
- isBoring <- (withDefault (getXPathTreesInDoc "/page/@isBoring/text()" >>> getText) "no"
- >>> parseYesOrNo) -< tree
-
- summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
- >>> getText
- >>> deleteIfEmpty)) -< tree
-
- otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
- >>>
- (getAttrValue0 "lang"
- &&&
- getAttrValue0 "page")) -< tree
-
- textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
- binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
-
- let (isBinary, content)
- = case (textData, binaryData) of
- (Just text, Nothing ) -> (False, L.pack $ encode text )
- (Nothing , Just binary) -> (True , L.pack $ B64.decode binary)
- _ -> error "one of textData or binaryData is required"
-
- returnA -< Entity {
- pageName = name
- , pageType = mimeType
- , pageLanguage = lang
- , pageFileName = fileName
- , pageIsTheme = isTheme
- , pageIsFeed = isFeed
- , pageIsLocked = isLocked
- , pageIsBoring = isBoring
- , pageIsBinary = isBinary
- , pageRevision = 0
- , pageLastMod = lastMod
- , pageSummary = summary
- , pageOtherLang = M.fromList otherLang
- , pageContent = content
- }
\ No newline at end of file
+ page <- parseXmlizedPage -< (name, tree)
+
+ case page of
+ Redirection _ _ _ _
+ -> returnA -< page {
+ redirRevision = 0
+ , redirLastMod = lastMod
+ }
+
+ Entity _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ -> returnA -< page {
+ pageRevision = 0
+ , pageLastMod = lastMod
+ }