30 import qualified Data.ByteString.Lazy as Lazy (ByteString)
31 import qualified Data.ByteString.Lazy as L hiding (ByteString)
32 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
35 import qualified Data.Map as M
37 import qualified Data.Time.W3C as W3C
38 import Network.HTTP.Lucu hiding (redirect)
39 import Network.URI hiding (fragment)
40 import OpenSSL.EVP.Base64
42 import Subversion.Types
43 import System.FilePath.Posix
44 import Text.XML.HXT.XPath
47 type PageName = String
49 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
50 type LanguageName = String -- i.e. "日本語"
55 redirName :: !PageName
56 , redirDest :: !PageName
57 , redirIsLocked :: !Bool
58 , redirRevision :: RevNum
59 , redirLastMod :: UTCTime
60 , redirUpdateInfo :: Maybe UpdateInfo
63 entityName :: !PageName
64 , entityType :: !MIMEType
65 , entityLanguage :: !(Maybe LanguageTag)
66 , entityIsTheme :: !Bool -- text/css 以外では無意味
67 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
68 , entityIsLocked :: !Bool
69 , entityIsBinary :: !Bool
70 , entityRevision :: RevNum
71 , entityLastMod :: UTCTime
72 , entitySummary :: !(Maybe String)
73 , entityOtherLang :: !(Map LanguageTag PageName)
74 , entityContent :: !Lazy.ByteString
75 , entityUpdateInfo :: Maybe UpdateInfo
82 uiOldRevision :: !RevNum
83 , uiOldName :: !(Maybe PageName)
88 isRedirect :: Page -> Bool
89 isRedirect (Redirection _ _ _ _ _ _) = True
93 isEntity :: Page -> Bool
94 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
98 pageName :: Page -> PageName
100 | isRedirect p = redirName p
101 | isEntity p = entityName p
102 | otherwise = error "neither redirection nor entity"
105 pageUpdateInfo :: Page -> Maybe UpdateInfo
107 | isRedirect p = redirUpdateInfo p
108 | isEntity p = entityUpdateInfo p
109 | otherwise = error "neither redirection nor entity"
112 pageRevision :: Page -> RevNum
114 | isRedirect p = redirRevision p
115 | isEntity p = entityRevision p
116 | otherwise = error "neither redirection nor entity"
119 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
120 encodePageName :: PageName -> FilePath
121 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
123 fixPageName :: PageName -> PageName
124 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
127 decodePageName :: FilePath -> PageName
128 decodePageName = UTF8.decodeString . unEscapeString
131 encodeFragment :: String -> String
132 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
135 mkPageURI :: URI -> PageName -> URI
136 mkPageURI baseURI name
138 uriPath = uriPath baseURI </> encodePageName name <.> "html"
142 mkPageFragmentURI :: URI -> PageName -> String -> URI
143 mkPageFragmentURI baseURI name fragment
145 uriPath = uriPath baseURI </> encodePageName name <.> "html"
146 , uriFragment = ('#' : encodeFragment fragment)
150 mkFragmentURI :: String -> URI
151 mkFragmentURI fragment
153 uriFragment = ('#' : encodeFragment fragment)
157 mkObjectURI :: URI -> PageName -> URI
158 mkObjectURI baseURI name
159 = mkAuxiliaryURI baseURI ["object"] name
162 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
163 mkAuxiliaryURI baseURI basePath name
165 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
169 mkFeedURI :: URI -> PageName -> URI
170 mkFeedURI baseURI name
172 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
176 mkRakkaURI :: PageName -> URI
177 mkRakkaURI name = URI {
179 , uriAuthority = Nothing
180 , uriPath = encodePageName name
189 lang="ja" -- 存在しない場合もある
190 isTheme="no" -- text/css の場合のみ存在
191 isFeed="no" -- text/x-rakka の場合のみ存在
195 lastModified="2000-01-01T00:00:00">
199 </summary> -- 存在しない場合もある
201 <otherLang> -- 存在しない場合もある
202 <link lang="ja" page="Bar/Baz" />
210 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
217 lastModified="2000-01-01T00:00:00" />
219 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
222 -> if isRedirect page then
223 xmlizeRedirection -< page
227 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
230 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
233 += sattr "name" (redirName page)
234 += sattr "redirect" (redirDest page)
235 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
236 += sattr "revision" (show $ redirRevision page)
237 += sattr "lastModified" (W3C.format lastMod)
240 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
243 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
246 += sattr "name" (pageName page)
247 += sattr "type" (show $ entityType page)
248 += ( case entityLanguage page of
249 Just x -> sattr "lang" x
252 += ( case entityType page of
253 MIMEType "text" "css" _
254 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
255 MIMEType "text" "x-rakka" _
256 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
260 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
261 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
262 += sattr "revision" (show $ entityRevision page)
263 += sattr "lastModified" (W3C.format lastMod)
264 += ( case entitySummary page of
265 Just s -> eelem "summary" += txt s
268 += ( if M.null (entityOtherLang page) then
275 | (lang, name) <- M.toList (entityOtherLang page) ]
277 += ( if entityIsBinary page then
279 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
283 += txt (UTF8.decode $ L.unpack $ entityContent page)
289 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
292 -> do updateInfo <- maybeA parseUpdateInfo -< tree
293 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
294 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
295 >>> parseYesOrNo) -< tree
297 Nothing -> parseEntity -< (name, tree)
298 Just dest -> returnA -< (Redirection {
301 , redirIsLocked = isLocked
302 , redirRevision = undefined
303 , redirLastMod = undefined
304 , redirUpdateInfo = updateInfo
308 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
311 -> do updateInfo <- maybeA parseUpdateInfo -< tree
313 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
315 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
317 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
318 >>> parseYesOrNo) -< tree
319 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
320 >>> parseYesOrNo) -< tree
321 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
322 >>> parseYesOrNo) -< tree
324 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
326 >>> deleteIfEmpty)) -< tree
328 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
330 (getAttrValue0 "lang"
332 getAttrValue0 "page")) -< tree
334 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
335 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
337 let (isBinary, content)
338 = case (textData, binaryData) of
339 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
340 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
341 _ -> error "one of textData or binaryData is required"
344 if null mimeTypeStr then
345 guessMIMEType content
353 , entityType = mimeType
354 , entityLanguage = lang
355 , entityIsTheme = isTheme
356 , entityIsFeed = isFeed
357 , entityIsLocked = isLocked
358 , entityIsBinary = isBinary
359 , entityRevision = undefined
360 , entityLastMod = undefined
361 , entitySummary = summary
362 , entityOtherLang = M.fromList otherLang
363 , entityContent = content
364 , entityUpdateInfo = updateInfo
367 dropWhitespace :: String -> String
368 dropWhitespace [] = []
369 dropWhitespace (x:xs)
370 | x == ' ' || x == '\t' || x == '\n'
373 = x : dropWhitespace xs
376 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
379 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
380 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
381 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
382 returnA -< UpdateInfo {
383 uiOldRevision = oldRev
384 , uiOldName = oldName