32 import qualified Codec.Binary.Base64 as B64
33 import qualified Codec.Binary.UTF8.String as UTF8
35 import Control.Arrow.ArrowIO
36 import Control.Arrow.ArrowList
37 import qualified Data.ByteString.Lazy as Lazy (ByteString)
38 import qualified Data.ByteString.Lazy as L hiding (ByteString)
41 import qualified Data.Map as M
44 import Network.HTTP.Lucu hiding (redirect)
45 import Network.URI hiding (fragment)
47 import Rakka.W3CDateTime
48 import Subversion.Types
49 import System.FilePath.Posix
50 import Text.XML.HXT.Arrow.XmlArrow
51 import Text.XML.HXT.Arrow.XmlNodeSet
52 import Text.XML.HXT.DOM.TypeDefs
55 type PageName = String
57 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
58 type LanguageName = String -- i.e. "日本語"
63 redirName :: !PageName
64 , redirDest :: !PageName
65 , redirIsLocked :: !Bool
66 , redirRevision :: RevNum
67 , redirLastMod :: UTCTime
68 , redirUpdateInfo :: Maybe UpdateInfo
71 entityName :: !PageName
72 , entityType :: !MIMEType
73 , entityLanguage :: !(Maybe LanguageTag)
74 , entityIsTheme :: !Bool -- text/css 以外では無意味
75 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
76 , entityIsLocked :: !Bool
77 , entityIsBinary :: !Bool
78 , entityRevision :: RevNum
79 , entityLastMod :: UTCTime
80 , entitySummary :: !(Maybe String)
81 , entityOtherLang :: !(Map LanguageTag PageName)
82 , entityContent :: !Lazy.ByteString
83 , entityUpdateInfo :: Maybe UpdateInfo
90 uiOldRevision :: !RevNum
91 , uiOldName :: !(Maybe PageName)
96 isRedirect :: Page -> Bool
97 isRedirect (Redirection _ _ _ _ _ _) = True
101 isEntity :: Page -> Bool
102 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
106 pageName :: Page -> PageName
108 | isRedirect p = redirName p
109 | isEntity p = entityName p
110 | otherwise = error "neither redirection nor entity"
113 pageUpdateInfo :: Page -> Maybe UpdateInfo
115 | isRedirect p = redirUpdateInfo p
116 | isEntity p = entityUpdateInfo p
117 | otherwise = error "neither redirection nor entity"
120 pageRevision :: Page -> RevNum
122 | isRedirect p = redirRevision p
123 | isEntity p = entityRevision p
124 | otherwise = error "neither redirection nor entity"
127 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
128 encodePageName :: PageName -> FilePath
129 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
131 fixPageName :: PageName -> PageName
132 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
135 isSafeChar :: Char -> Bool
138 | isReserved c = False
139 | c > ' ' && c <= '~' = True
143 -- URI unescape して UTF-8 から decode する。
144 decodePageName :: FilePath -> PageName
145 decodePageName = UTF8.decodeString . unEscapeString
148 encodeFragment :: String -> String
149 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
152 mkPageURI :: URI -> PageName -> URI
153 mkPageURI baseURI name
155 uriPath = uriPath baseURI </> encodePageName name <.> "html"
159 mkPageFragmentURI :: URI -> PageName -> String -> URI
160 mkPageFragmentURI baseURI name fragment
162 uriPath = uriPath baseURI </> encodePageName name <.> "html"
163 , uriFragment = ('#' : encodeFragment fragment)
167 mkFragmentURI :: String -> URI
168 mkFragmentURI fragment
170 uriFragment = ('#' : encodeFragment fragment)
174 mkObjectURI :: URI -> PageName -> URI
175 mkObjectURI baseURI name
176 = mkAuxiliaryURI baseURI ["object"] name
179 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
180 mkAuxiliaryURI baseURI basePath name
182 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
186 mkFeedURI :: URI -> PageName -> URI
187 mkFeedURI baseURI name
189 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
193 mkRakkaURI :: PageName -> URI
194 mkRakkaURI name = URI {
196 , uriAuthority = Nothing
197 , uriPath = encodePageName name
206 lang="ja" -- 存在しない場合もある
207 isTheme="no" -- text/css の場合のみ存在
208 isFeed="no" -- text/x-rakka の場合のみ存在
212 lastModified="2000-01-01T00:00:00">
216 </summary> -- 存在しない場合もある
218 <otherLang> -- 存在しない場合もある
219 <link lang="ja" page="Bar/Baz" />
227 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
234 lastModified="2000-01-01T00:00:00" />
236 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
239 -> if isRedirect page then
240 xmlizeRedirection -< page
244 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
247 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
250 += sattr "name" (redirName page)
251 += sattr "redirect" (redirDest page)
252 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
253 += sattr "revision" (show $ redirRevision page)
254 += sattr "lastModified" (formatW3CDateTime lastMod)
257 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
260 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
263 += sattr "name" (pageName page)
264 += sattr "type" (show $ entityType page)
265 += ( case entityLanguage page of
266 Just x -> sattr "lang" x
269 += ( case entityType page of
270 MIMEType "text" "css" _
271 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
272 MIMEType "text" "x-rakka" _
273 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
277 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
278 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
279 += sattr "revision" (show $ entityRevision page)
280 += sattr "lastModified" (formatW3CDateTime lastMod)
281 += ( case entitySummary page of
282 Just s -> eelem "summary" += txt s
285 += ( if M.null (entityOtherLang page) then
292 | (lang, name) <- M.toList (entityOtherLang page) ]
294 += ( if entityIsBinary page then
296 += txt (B64.encode $ L.unpack $ entityContent page)
300 += txt (UTF8.decode $ L.unpack $ entityContent page)
306 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
309 -> do updateInfo <- maybeA parseUpdateInfo -< tree
310 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
311 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
312 >>> parseYesOrNo) -< tree
314 Nothing -> parseEntity -< (name, tree)
315 Just dest -> returnA -< (Redirection {
318 , redirIsLocked = isLocked
319 , redirRevision = undefined
320 , redirLastMod = undefined
321 , redirUpdateInfo = updateInfo
325 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
328 -> do updateInfo <- maybeA parseUpdateInfo -< tree
330 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
332 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
334 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
335 >>> parseYesOrNo) -< tree
336 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
337 >>> parseYesOrNo) -< tree
338 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
339 >>> parseYesOrNo) -< tree
341 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
343 >>> deleteIfEmpty)) -< tree
345 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
347 (getAttrValue0 "lang"
349 getAttrValue0 "page")) -< tree
351 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
352 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
354 let (isBinary, content)
355 = case (textData, binaryData) of
356 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text )
357 (Nothing , Just binary) -> (True , L.pack $ fromJust $ B64.decode $ dropWhitespace binary)
358 _ -> error "one of textData or binaryData is required"
361 if null mimeTypeStr then
362 guessMIMEType content
370 , entityType = mimeType
371 , entityLanguage = lang
372 , entityIsTheme = isTheme
373 , entityIsFeed = isFeed
374 , entityIsLocked = isLocked
375 , entityIsBinary = isBinary
376 , entityRevision = undefined
377 , entityLastMod = undefined
378 , entitySummary = summary
379 , entityOtherLang = M.fromList otherLang
380 , entityContent = content
381 , entityUpdateInfo = updateInfo
384 dropWhitespace :: String -> String
385 dropWhitespace [] = []
386 dropWhitespace (x:xs)
387 | x == ' ' || x == '\t' || x == '\n'
390 = x : dropWhitespace xs
393 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
396 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
397 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
398 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
399 returnA -< UpdateInfo {
400 uiOldRevision = oldRev
401 , uiOldName = oldName