30 import qualified Codec.Binary.Base64 as B64
31 import qualified Codec.Binary.UTF8.String as UTF8
33 import Control.Arrow.ArrowIO
34 import Control.Arrow.ArrowList
35 import qualified Data.ByteString.Lazy as Lazy (ByteString)
36 import qualified Data.ByteString.Lazy as L hiding (ByteString)
39 import qualified Data.Map as M
42 import Network.HTTP.Lucu hiding (redirect)
43 import Network.URI hiding (fragment)
45 import Rakka.W3CDateTime
46 import Subversion.Types
47 import System.FilePath.Posix
48 import Text.XML.HXT.Arrow.XmlArrow
49 import Text.XML.HXT.Arrow.XmlNodeSet
50 import Text.XML.HXT.DOM.TypeDefs
53 type PageName = String
55 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
56 type LanguageName = String -- i.e. "日本語"
61 redirName :: !PageName
62 , redirDest :: !PageName
63 , redirRevision :: RevNum
64 , redirLastMod :: UTCTime
65 , redirUpdateInfo :: Maybe UpdateInfo
68 entityName :: !PageName
69 , entityType :: !MIMEType
70 , entityLanguage :: !(Maybe LanguageTag)
71 , entityIsTheme :: !Bool -- text/css 以外では無意味
72 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
73 , entityIsLocked :: !Bool
74 , entityIsBoring :: !Bool
75 , entityIsBinary :: !Bool
76 , entityRevision :: RevNum
77 , entityLastMod :: UTCTime
78 , entitySummary :: !(Maybe String)
79 , entityOtherLang :: !(Map LanguageTag PageName)
80 , entityContent :: !Lazy.ByteString
81 , entityUpdateInfo :: Maybe UpdateInfo
88 uiOldRevision :: !RevNum
89 , uiOldName :: !(Maybe PageName)
94 isRedirect :: Page -> Bool
95 isRedirect (Redirection _ _ _ _ _) = True
99 isEntity :: Page -> Bool
100 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _ _) = True
104 pageName :: Page -> PageName
106 | isRedirect p = redirName p
107 | isEntity p = entityName p
108 | otherwise = error "neither redirection nor entity"
111 pageUpdateInfo :: Page -> Maybe UpdateInfo
113 | isRedirect p = redirUpdateInfo p
114 | isEntity p = entityUpdateInfo p
115 | otherwise = error "neither redirection nor entity"
118 pageRevision :: Page -> RevNum
120 | isRedirect p = redirRevision p
121 | isEntity p = entityRevision p
122 | otherwise = error "neither redirection nor entity"
125 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
126 encodePageName :: PageName -> FilePath
127 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
129 fixPageName :: PageName -> PageName
130 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
133 isSafeChar :: Char -> Bool
136 | isReserved c = False
137 | c > ' ' && c <= '~' = True
141 -- URI unescape して UTF-8 から decode する。
142 decodePageName :: FilePath -> PageName
143 decodePageName = UTF8.decodeString . unEscapeString
146 encodeFragment :: String -> String
147 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
150 mkPageURI :: URI -> PageName -> URI
151 mkPageURI baseURI name
153 uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
157 mkPageFragmentURI :: URI -> PageName -> String -> URI
158 mkPageFragmentURI baseURI name fragment
160 uriPath = foldl (</>) "/" [uriPath baseURI, encodePageName name ++ ".html"]
161 , uriFragment = ('#' : encodeFragment fragment)
165 mkFragmentURI :: String -> URI
166 mkFragmentURI fragment
168 uriFragment = ('#' : encodeFragment fragment)
172 mkObjectURI :: URI -> PageName -> URI
173 mkObjectURI baseURI name
174 = mkAuxiliaryURI baseURI ["object"] name
177 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
178 mkAuxiliaryURI baseURI basePath name
180 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
184 mkRakkaURI :: PageName -> URI
185 mkRakkaURI name = URI {
187 , uriAuthority = Nothing
188 , uriPath = encodePageName name
197 lang="ja" -- 存在しない場合もある
198 isTheme="no" -- text/css の場合のみ存在
199 isFeed="no" -- text/x-rakka の場合のみ存在
203 lastModified="2000-01-01T00:00:00">
207 </summary> -- 存在しない場合もある
209 <otherLang> -- 存在しない場合もある
210 <link lang="ja" page="Bar/Baz" />
218 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
225 lastModified="2000-01-01T00:00:00" />
227 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
230 -> if isRedirect page then
231 xmlizeRedirection -< page
235 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
238 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
241 += sattr "name" (redirName page)
242 += sattr "redirect" (redirDest page)
243 += sattr "revision" (show $ redirRevision page)
244 += sattr "lastModified" (formatW3CDateTime lastMod)
247 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
250 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
253 += sattr "name" (pageName page)
254 += sattr "type" (show $ entityType page)
255 += ( case entityLanguage page of
256 Just x -> sattr "lang" x
259 += ( case entityType page of
260 MIMEType "text" "css" _
261 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
262 MIMEType "text" "x-rakka" _
263 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
267 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
268 += sattr "isBoring" (yesOrNo $ entityIsBoring page)
269 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
270 += sattr "revision" (show $ entityRevision page)
271 += sattr "lastModified" (formatW3CDateTime lastMod)
272 += ( case entitySummary page of
273 Just s -> eelem "summary" += txt s
276 += ( if M.null (entityOtherLang page) then
283 | (lang, name) <- M.toList (entityOtherLang page) ]
285 += ( if entityIsBinary page then
287 += txt (B64.encode $ L.unpack $ entityContent page)
291 += txt (UTF8.decode $ L.unpack $ entityContent page)
297 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
300 -> do updateInfo <- maybeA parseUpdateInfo -< tree
301 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
303 Nothing -> parseEntity -< (name, tree)
304 Just dest -> returnA -< (Redirection {
307 , redirRevision = undefined
308 , redirLastMod = undefined
309 , redirUpdateInfo = updateInfo
313 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
316 -> do updateInfo <- maybeA parseUpdateInfo -< tree
318 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
320 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
322 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
323 >>> parseYesOrNo) -< tree
324 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
325 >>> parseYesOrNo) -< tree
326 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
327 >>> parseYesOrNo) -< tree
328 isBoring <- (withDefault (getXPathTreesInDoc "/page/@isBoring/text()" >>> getText) "no"
329 >>> parseYesOrNo) -< tree
331 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
333 >>> deleteIfEmpty)) -< tree
335 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
337 (getAttrValue0 "lang"
339 getAttrValue0 "page")) -< tree
341 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
342 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
344 let (isBinary, content)
345 = case (textData, binaryData) of
346 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text )
347 (Nothing , Just binary) -> (True , L.pack $ B64.decode binary)
348 _ -> error "one of textData or binaryData is required"
351 if null mimeTypeStr then
352 guessMIMEType content
360 , entityType = mimeType
361 , entityLanguage = lang
362 , entityIsTheme = isTheme
363 , entityIsFeed = isFeed
364 , entityIsLocked = isLocked
365 , entityIsBoring = isBoring
366 , entityIsBinary = isBinary
367 , entityRevision = undefined
368 , entityLastMod = undefined
369 , entitySummary = summary
370 , entityOtherLang = M.fromList otherLang
371 , entityContent = content
372 , entityUpdateInfo = updateInfo
376 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
379 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
380 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
381 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
382 returnA -< UpdateInfo {
383 uiOldRevision = oldRev
384 , uiOldName = oldName