31 import qualified Codec.Binary.UTF8.String as UTF8
32 import qualified Data.ByteString.Lazy as Lazy (ByteString)
33 import qualified Data.ByteString.Lazy as L hiding (ByteString)
34 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
37 import qualified Data.Map as M
39 import qualified Data.Time.W3C as W3C
40 import Network.HTTP.Lucu hiding (redirect)
41 import Network.URI hiding (fragment)
42 import OpenSSL.EVP.Base64
44 import Subversion.Types
45 import System.FilePath.Posix
46 import Text.XML.HXT.Arrow
47 import Text.XML.HXT.XPath
50 type PageName = String
52 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
53 type LanguageName = String -- i.e. "日本語"
58 redirName :: !PageName
59 , redirDest :: !PageName
60 , redirIsLocked :: !Bool
61 , redirRevision :: RevNum
62 , redirLastMod :: UTCTime
63 , redirUpdateInfo :: Maybe UpdateInfo
66 entityName :: !PageName
67 , entityType :: !MIMEType
68 , entityLanguage :: !(Maybe LanguageTag)
69 , entityIsTheme :: !Bool -- text/css 以外では無意味
70 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
71 , entityIsLocked :: !Bool
72 , entityIsBinary :: !Bool
73 , entityRevision :: RevNum
74 , entityLastMod :: UTCTime
75 , entitySummary :: !(Maybe String)
76 , entityOtherLang :: !(Map LanguageTag PageName)
77 , entityContent :: !Lazy.ByteString
78 , entityUpdateInfo :: Maybe UpdateInfo
85 uiOldRevision :: !RevNum
86 , uiOldName :: !(Maybe PageName)
91 isRedirect :: Page -> Bool
92 isRedirect (Redirection _ _ _ _ _ _) = True
96 isEntity :: Page -> Bool
97 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
101 pageName :: Page -> PageName
103 | isRedirect p = redirName p
104 | isEntity p = entityName p
105 | otherwise = error "neither redirection nor entity"
108 pageUpdateInfo :: Page -> Maybe UpdateInfo
110 | isRedirect p = redirUpdateInfo p
111 | isEntity p = entityUpdateInfo p
112 | otherwise = error "neither redirection nor entity"
115 pageRevision :: Page -> RevNum
117 | isRedirect p = redirRevision p
118 | isEntity p = entityRevision p
119 | otherwise = error "neither redirection nor entity"
122 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
123 encodePageName :: PageName -> FilePath
124 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
126 fixPageName :: PageName -> PageName
127 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
130 decodePageName :: FilePath -> PageName
131 decodePageName = UTF8.decodeString . unEscapeString
134 encodeFragment :: String -> String
135 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
138 mkPageURI :: URI -> PageName -> URI
139 mkPageURI baseURI name
141 uriPath = uriPath baseURI </> encodePageName name <.> "html"
145 mkPageFragmentURI :: URI -> PageName -> String -> URI
146 mkPageFragmentURI baseURI name fragment
148 uriPath = uriPath baseURI </> encodePageName name <.> "html"
149 , uriFragment = ('#' : encodeFragment fragment)
153 mkFragmentURI :: String -> URI
154 mkFragmentURI fragment
156 uriFragment = ('#' : encodeFragment fragment)
160 mkObjectURI :: URI -> PageName -> URI
161 mkObjectURI baseURI name
162 = mkAuxiliaryURI baseURI ["object"] name
165 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
166 mkAuxiliaryURI baseURI basePath name
168 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
172 mkFeedURI :: URI -> PageName -> URI
173 mkFeedURI baseURI name
175 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
179 mkRakkaURI :: PageName -> URI
180 mkRakkaURI name = URI {
182 , uriAuthority = Nothing
183 , uriPath = encodePageName name
192 lang="ja" -- 存在しない場合もある
193 isTheme="no" -- text/css の場合のみ存在
194 isFeed="no" -- text/x-rakka の場合のみ存在
198 lastModified="2000-01-01T00:00:00">
202 </summary> -- 存在しない場合もある
204 <otherLang> -- 存在しない場合もある
205 <link lang="ja" page="Bar/Baz" />
213 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
220 lastModified="2000-01-01T00:00:00" />
222 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
225 -> if isRedirect page then
226 xmlizeRedirection -< page
230 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
233 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
236 += sattr "name" (redirName page)
237 += sattr "redirect" (redirDest page)
238 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
239 += sattr "revision" (show $ redirRevision page)
240 += sattr "lastModified" (W3C.format lastMod)
243 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
246 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
249 += sattr "name" (pageName page)
250 += sattr "type" (show $ entityType page)
251 += ( case entityLanguage page of
252 Just x -> sattr "lang" x
255 += ( case entityType page of
256 MIMEType "text" "css" _
257 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
258 MIMEType "text" "x-rakka" _
259 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
263 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
264 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
265 += sattr "revision" (show $ entityRevision page)
266 += sattr "lastModified" (W3C.format lastMod)
267 += ( case entitySummary page of
268 Just s -> eelem "summary" += txt s
271 += ( if M.null (entityOtherLang page) then
278 | (lang, name) <- M.toList (entityOtherLang page) ]
280 += ( if entityIsBinary page then
282 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
286 += txt (UTF8.decode $ L.unpack $ entityContent page)
292 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
295 -> do updateInfo <- maybeA parseUpdateInfo -< tree
296 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
297 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
298 >>> parseYesOrNo) -< tree
300 Nothing -> parseEntity -< (name, tree)
301 Just dest -> returnA -< (Redirection {
304 , redirIsLocked = isLocked
305 , redirRevision = undefined
306 , redirLastMod = undefined
307 , redirUpdateInfo = updateInfo
311 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
314 -> do updateInfo <- maybeA parseUpdateInfo -< tree
316 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
318 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
320 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
321 >>> parseYesOrNo) -< tree
322 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
323 >>> parseYesOrNo) -< tree
324 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
325 >>> parseYesOrNo) -< tree
327 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
329 >>> deleteIfEmpty)) -< tree
331 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
333 (getAttrValue0 "lang"
335 getAttrValue0 "page")) -< tree
337 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
338 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
340 let (isBinary, content)
341 = case (textData, binaryData) of
342 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
343 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
344 _ -> error "one of textData or binaryData is required"
347 if null mimeTypeStr then
348 guessMIMEType content
356 , entityType = mimeType
357 , entityLanguage = lang
358 , entityIsTheme = isTheme
359 , entityIsFeed = isFeed
360 , entityIsLocked = isLocked
361 , entityIsBinary = isBinary
362 , entityRevision = undefined
363 , entityLastMod = undefined
364 , entitySummary = summary
365 , entityOtherLang = M.fromList otherLang
366 , entityContent = content
367 , entityUpdateInfo = updateInfo
370 dropWhitespace :: String -> String
371 dropWhitespace [] = []
372 dropWhitespace (x:xs)
373 | x == ' ' || x == '\t' || x == '\n'
376 = x : dropWhitespace xs
379 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
382 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
383 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
384 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
385 returnA -< UpdateInfo {
386 uiOldRevision = oldRev
387 , uiOldName = oldName