31 import qualified Codec.Binary.UTF8.String as UTF8
33 import Control.Arrow.ArrowIO
34 import Control.Arrow.ArrowList
35 import qualified Data.ByteString.Lazy as Lazy (ByteString)
36 import qualified Data.ByteString.Lazy as L hiding (ByteString)
37 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
40 import qualified Data.Map as M
43 import Network.HTTP.Lucu hiding (redirect)
44 import Network.URI hiding (fragment)
45 import OpenSSL.EVP.Base64
47 import Rakka.W3CDateTime
48 import Subversion.Types
49 import System.FilePath.Posix
50 import Text.XML.HXT.Arrow.XmlArrow
51 import Text.XML.HXT.Arrow.XmlNodeSet
52 import Text.XML.HXT.DOM.TypeDefs
55 type PageName = String
57 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
58 type LanguageName = String -- i.e. "日本語"
63 redirName :: !PageName
64 , redirDest :: !PageName
65 , redirIsLocked :: !Bool
66 , redirRevision :: RevNum
67 , redirLastMod :: UTCTime
68 , redirUpdateInfo :: Maybe UpdateInfo
71 entityName :: !PageName
72 , entityType :: !MIMEType
73 , entityLanguage :: !(Maybe LanguageTag)
74 , entityIsTheme :: !Bool -- text/css 以外では無意味
75 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
76 , entityIsLocked :: !Bool
77 , entityIsBinary :: !Bool
78 , entityRevision :: RevNum
79 , entityLastMod :: UTCTime
80 , entitySummary :: !(Maybe String)
81 , entityOtherLang :: !(Map LanguageTag PageName)
82 , entityContent :: !Lazy.ByteString
83 , entityUpdateInfo :: Maybe UpdateInfo
90 uiOldRevision :: !RevNum
91 , uiOldName :: !(Maybe PageName)
96 isRedirect :: Page -> Bool
97 isRedirect (Redirection _ _ _ _ _ _) = True
101 isEntity :: Page -> Bool
102 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
106 pageName :: Page -> PageName
108 | isRedirect p = redirName p
109 | isEntity p = entityName p
110 | otherwise = error "neither redirection nor entity"
113 pageUpdateInfo :: Page -> Maybe UpdateInfo
115 | isRedirect p = redirUpdateInfo p
116 | isEntity p = entityUpdateInfo p
117 | otherwise = error "neither redirection nor entity"
120 pageRevision :: Page -> RevNum
122 | isRedirect p = redirRevision p
123 | isEntity p = entityRevision p
124 | otherwise = error "neither redirection nor entity"
127 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
128 encodePageName :: PageName -> FilePath
129 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
131 fixPageName :: PageName -> PageName
132 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
135 -- URI unescape して UTF-8 から decode する。
136 decodePageName :: FilePath -> PageName
137 decodePageName = UTF8.decodeString . unEscapeString
140 encodeFragment :: String -> String
141 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
144 mkPageURI :: URI -> PageName -> URI
145 mkPageURI baseURI name
147 uriPath = uriPath baseURI </> encodePageName name <.> "html"
151 mkPageFragmentURI :: URI -> PageName -> String -> URI
152 mkPageFragmentURI baseURI name fragment
154 uriPath = uriPath baseURI </> encodePageName name <.> "html"
155 , uriFragment = ('#' : encodeFragment fragment)
159 mkFragmentURI :: String -> URI
160 mkFragmentURI fragment
162 uriFragment = ('#' : encodeFragment fragment)
166 mkObjectURI :: URI -> PageName -> URI
167 mkObjectURI baseURI name
168 = mkAuxiliaryURI baseURI ["object"] name
171 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
172 mkAuxiliaryURI baseURI basePath name
174 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
178 mkFeedURI :: URI -> PageName -> URI
179 mkFeedURI baseURI name
181 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
185 mkRakkaURI :: PageName -> URI
186 mkRakkaURI name = URI {
188 , uriAuthority = Nothing
189 , uriPath = encodePageName name
198 lang="ja" -- 存在しない場合もある
199 isTheme="no" -- text/css の場合のみ存在
200 isFeed="no" -- text/x-rakka の場合のみ存在
204 lastModified="2000-01-01T00:00:00">
208 </summary> -- 存在しない場合もある
210 <otherLang> -- 存在しない場合もある
211 <link lang="ja" page="Bar/Baz" />
219 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
226 lastModified="2000-01-01T00:00:00" />
228 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
231 -> if isRedirect page then
232 xmlizeRedirection -< page
236 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
239 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
242 += sattr "name" (redirName page)
243 += sattr "redirect" (redirDest page)
244 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
245 += sattr "revision" (show $ redirRevision page)
246 += sattr "lastModified" (formatW3CDateTime lastMod)
249 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
252 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
255 += sattr "name" (pageName page)
256 += sattr "type" (show $ entityType page)
257 += ( case entityLanguage page of
258 Just x -> sattr "lang" x
261 += ( case entityType page of
262 MIMEType "text" "css" _
263 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
264 MIMEType "text" "x-rakka" _
265 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
269 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
270 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
271 += sattr "revision" (show $ entityRevision page)
272 += sattr "lastModified" (formatW3CDateTime lastMod)
273 += ( case entitySummary page of
274 Just s -> eelem "summary" += txt s
277 += ( if M.null (entityOtherLang page) then
284 | (lang, name) <- M.toList (entityOtherLang page) ]
286 += ( if entityIsBinary page then
288 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
292 += txt (UTF8.decode $ L.unpack $ entityContent page)
298 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
301 -> do updateInfo <- maybeA parseUpdateInfo -< tree
302 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
303 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
304 >>> parseYesOrNo) -< tree
306 Nothing -> parseEntity -< (name, tree)
307 Just dest -> returnA -< (Redirection {
310 , redirIsLocked = isLocked
311 , redirRevision = undefined
312 , redirLastMod = undefined
313 , redirUpdateInfo = updateInfo
317 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
320 -> do updateInfo <- maybeA parseUpdateInfo -< tree
322 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
324 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
326 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
327 >>> parseYesOrNo) -< tree
328 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
329 >>> parseYesOrNo) -< tree
330 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
331 >>> parseYesOrNo) -< tree
333 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
335 >>> deleteIfEmpty)) -< tree
337 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
339 (getAttrValue0 "lang"
341 getAttrValue0 "page")) -< tree
343 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
344 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
346 let (isBinary, content)
347 = case (textData, binaryData) of
348 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
349 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
350 _ -> error "one of textData or binaryData is required"
353 if null mimeTypeStr then
354 guessMIMEType content
362 , entityType = mimeType
363 , entityLanguage = lang
364 , entityIsTheme = isTheme
365 , entityIsFeed = isFeed
366 , entityIsLocked = isLocked
367 , entityIsBinary = isBinary
368 , entityRevision = undefined
369 , entityLastMod = undefined
370 , entitySummary = summary
371 , entityOtherLang = M.fromList otherLang
372 , entityContent = content
373 , entityUpdateInfo = updateInfo
376 dropWhitespace :: String -> String
377 dropWhitespace [] = []
378 dropWhitespace (x:xs)
379 | x == ' ' || x == '\t' || x == '\n'
382 = x : dropWhitespace xs
385 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
388 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
389 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
390 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
391 returnA -< UpdateInfo {
392 uiOldRevision = oldRev
393 , uiOldName = oldName