- = proc (name, fpath) ->
- do tree <- readFromDocument [ (a_validate , v_0)
- , (a_check_namespaces , v_1)
- , (a_remove_whitespace, v_1)
- ] -< fpath
- lastMod <- arrIO (\ x -> getModificationTime x >>= toCalendarTime) -< fpath
- parsePage -< (name, lastMod, tree)
-
-
-parsePage :: (ArrowXml a, ArrowChoice a) => a (PageName, CalendarTime, XmlTree) Page
-parsePage
- = proc (name, lastMod, tree)
- -> do redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
- case redirect of
- Nothing -> parseEntity -< (name, lastMod, tree)
- Just dest -> returnA -< (Redirection {
- redirName = name
- , redirDest = dest
- , redirRevision = Nothing
- , redirLastMod = lastMod
- })
-
-
-parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, CalendarTime, XmlTree) Page
-parseEntity
- = proc (name, lastMod, tree)
- -> do mimeType <- (getXPathTreesInDoc "/page/@type/text()" >>> getText
- >>> arr read) -< tree
-
- isTheme <- (maybeA (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText)
- >>> defaultTo "no"
- >>> parseYesOrNo) -< tree
- isFeed <- (maybeA (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText)
- >>> defaultTo "no"
- >>> parseYesOrNo) -< tree
- isLocked <- (maybeA (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText)
- >>> defaultTo "no"
- >>> parseYesOrNo) -< tree
- isBoring <- (maybeA (getXPathTreesInDoc "/page/@isBoring/text()" >>> getText)
- >>> defaultTo "no"
- >>> parseYesOrNo) -< tree
-
- summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
- >>> getText
- >>> deleteIfEmpty)) -< tree
-
- otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
- >>>
- (getAttrValue0 "lang"
- &&&
- getAttrValue0 "page")) -< tree
-
- textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
- binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
-
- let (isBinary, content)
- = case (textData, binaryData) of
- (Just text, _ ) -> (False, encodeLazy UTF8 text )
- (_ , Just binary) -> (True , L.pack $ B64.decode binary)
-
- returnA -< Entity {
- pageName = name
- , pageType = mimeType
- , pageIsTheme = isTheme
- , pageIsFeed = isFeed
- , pageIsLocked = isLocked
- , pageIsBoring = isBoring
- , pageIsBinary = isBinary
- , pageRevision = Nothing
- , pageLastMod = lastMod
- , pageSummary = summary
- , pageOtherLang = otherLang
- , pageContent = content
- }
\ No newline at end of file