1 -- -*- coding: utf-8 -*-
36 import qualified Data.Ascii as Ascii
37 import qualified Data.Text as T
38 import qualified Data.ByteString.Lazy as Lazy (ByteString)
39 import qualified Data.ByteString.Lazy as L hiding (ByteString)
40 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
43 import qualified Data.Map as M
45 import qualified Data.Time.W3C as W3C
46 import Network.HTTP.Lucu hiding (redirect)
47 import Network.URI hiding (fragment)
49 import Subversion.Types
50 import System.FilePath.Posix
51 import Text.XML.HXT.DOM.TypeDefs
52 import Text.XML.HXT.XPath
53 import Text.XML.HXT.Arrow.XmlArrow
54 import Prelude.Unicode
56 type PageName = T.Text
58 type LanguageTag = Ascii -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
59 type LanguageName = T.Text -- i.e. "日本語"
64 redirName :: !PageName
65 , redirDest :: !PageName
66 , redirIsLocked :: !Bool
67 , redirRevision :: RevNum
68 , redirLastMod :: UTCTime
69 , redirUpdateInfo :: Maybe UpdateInfo
72 entityName :: !PageName
73 , entityType :: !MIMEType
74 , entityLanguage :: !(Maybe LanguageTag)
75 , entityIsTheme :: !Bool -- text/css 以外では無意味
76 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
77 , entityIsLocked :: !Bool
78 , entityIsBinary :: !Bool
79 , entityRevision :: RevNum
80 , entityLastMod :: UTCTime
81 , entitySummary :: !(Maybe String)
82 , entityOtherLang :: !(Map LanguageTag PageName)
83 , entityContent :: !Lazy.ByteString
84 , entityUpdateInfo :: Maybe UpdateInfo
91 uiOldRevision :: !RevNum
92 , uiOldName :: !(Maybe PageName)
97 isRedirect :: Page -> Bool
98 isRedirect (Redirection _ _ _ _ _ _) = True
102 isEntity :: Page -> Bool
103 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
107 pageName :: Page -> PageName
109 | isRedirect p = redirName p
110 | isEntity p = entityName p
111 | otherwise = error "neither redirection nor entity"
114 pageUpdateInfo :: Page -> Maybe UpdateInfo
116 | isRedirect p = redirUpdateInfo p
117 | isEntity p = entityUpdateInfo p
118 | otherwise = error "neither redirection nor entity"
121 pageRevision :: Page -> RevNum
123 | isRedirect p = redirRevision p
124 | isEntity p = entityRevision p
125 | otherwise = error "neither redirection nor entity"
128 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
129 encodePageName :: PageName -> FilePath
130 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
132 fixPageName :: PageName -> PageName
133 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
136 decodePageName :: FilePath -> PageName
137 decodePageName = UTF8.decodeString . unEscapeString
140 encodeFragment :: String -> String
141 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
144 mkPageURI :: URI -> PageName -> URI
145 mkPageURI baseURI name
147 uriPath = uriPath baseURI </> encodePageName name <.> "html"
151 mkPageFragmentURI :: URI -> PageName -> String -> URI
152 mkPageFragmentURI baseURI name fragment
154 uriPath = uriPath baseURI </> encodePageName name <.> "html"
155 , uriFragment = ('#' : encodeFragment fragment)
159 mkFragmentURI :: String -> URI
160 mkFragmentURI fragment
162 uriFragment = ('#' : encodeFragment fragment)
166 mkObjectURI :: URI -> PageName -> URI
167 mkObjectURI baseURI name
168 = mkAuxiliaryURI baseURI ["object"] name
171 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
172 mkAuxiliaryURI baseURI basePath name
174 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
178 mkFeedURI :: URI -> PageName -> URI
179 mkFeedURI baseURI name
181 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
185 mkRakkaURI :: PageName -> URI
186 mkRakkaURI name = URI {
188 , uriAuthority = Nothing
189 , uriPath = encodePageName name
198 lang="ja" -- 存在しない場合もある
199 isTheme="no" -- text/css の場合のみ存在
200 isFeed="no" -- text/x-rakka の場合のみ存在
204 lastModified="2000-01-01T00:00:00">
208 </summary> -- 存在しない場合もある
210 <otherLang> -- 存在しない場合もある
211 <link lang="ja" page="Bar/Baz" />
219 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
226 lastModified="2000-01-01T00:00:00" />
228 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
231 -> if isRedirect page then
232 xmlizeRedirection -< page
236 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
239 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
242 += sattr "name" (redirName page)
243 += sattr "redirect" (redirDest page)
244 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
245 += sattr "revision" (show $ redirRevision page)
246 += sattr "lastModified" (W3C.format lastMod)
249 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
252 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
255 += sattr "name" (pageName page)
256 += sattr "type" (show $ entityType page)
257 += ( case entityLanguage page of
258 Just x -> sattr "lang" x
261 += ( case entityType page of
262 MIMEType "text" "css" _
263 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
264 MIMEType "text" "x-rakka" _
265 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
269 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
270 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
271 += sattr "revision" (show $ entityRevision page)
272 += sattr "lastModified" (W3C.format lastMod)
273 += ( case entitySummary page of
274 Just s -> eelem "summary" += txt s
277 += ( if M.null (entityOtherLang page) then
284 | (lang, name) <- M.toList (entityOtherLang page) ]
286 += ( if entityIsBinary page then
288 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
292 += txt (UTF8.decode $ L.unpack $ entityContent page)
298 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
301 -> do updateInfo <- maybeA parseUpdateInfo -< tree
302 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
303 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
304 >>> parseYesOrNo) -< tree
306 Nothing -> parseEntity -< (name, tree)
307 Just dest -> returnA -< (Redirection {
310 , redirIsLocked = isLocked
311 , redirRevision = undefined
312 , redirLastMod = undefined
313 , redirUpdateInfo = updateInfo
317 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
320 -> do updateInfo <- maybeA parseUpdateInfo -< tree
322 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
324 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
326 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
327 >>> parseYesOrNo) -< tree
328 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
329 >>> parseYesOrNo) -< tree
330 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
331 >>> parseYesOrNo) -< tree
333 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
335 >>> deleteIfEmpty)) -< tree
337 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
339 (getAttrValue0 "lang"
341 getAttrValue0 "page")) -< tree
343 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
344 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
346 let (isBinary, content)
347 = case (textData, binaryData) of
348 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
349 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
350 _ -> error "one of textData or binaryData is required"
353 if null mimeTypeStr then
354 guessMIMEType content
362 , entityType = mimeType
363 , entityLanguage = lang
364 , entityIsTheme = isTheme
365 , entityIsFeed = isFeed
366 , entityIsLocked = isLocked
367 , entityIsBinary = isBinary
368 , entityRevision = undefined
369 , entityLastMod = undefined
370 , entitySummary = summary
371 , entityOtherLang = M.fromList otherLang
372 , entityContent = content
373 , entityUpdateInfo = updateInfo
376 dropWhitespace :: String -> String
377 {-# INLINE dropWhitespace #-}
378 dropWhitespace = filter ((¬) ∘ isSpace)
380 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
383 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
384 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
385 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
386 returnA -< UpdateInfo {
387 uiOldRevision = oldRev
388 , uiOldName = oldName