32 import qualified Codec.Binary.UTF8.String as UTF8
34 import Control.Arrow.ArrowIO
35 import Control.Arrow.ArrowList
36 import qualified Data.ByteString.Lazy as Lazy (ByteString)
37 import qualified Data.ByteString.Lazy as L hiding (ByteString)
38 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
41 import qualified Data.Map as M
44 import Network.HTTP.Lucu hiding (redirect)
45 import Network.URI hiding (fragment)
46 import OpenSSL.EVP.Base64
48 import Rakka.W3CDateTime
49 import Subversion.Types
50 import System.FilePath.Posix
51 import Text.XML.HXT.Arrow.XmlArrow
52 import Text.XML.HXT.Arrow.XmlNodeSet
53 import Text.XML.HXT.DOM.TypeDefs
56 type PageName = String
58 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
59 type LanguageName = String -- i.e. "日本語"
64 redirName :: !PageName
65 , redirDest :: !PageName
66 , redirIsLocked :: !Bool
67 , redirRevision :: RevNum
68 , redirLastMod :: UTCTime
69 , redirUpdateInfo :: Maybe UpdateInfo
72 entityName :: !PageName
73 , entityType :: !MIMEType
74 , entityLanguage :: !(Maybe LanguageTag)
75 , entityIsTheme :: !Bool -- text/css 以外では無意味
76 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
77 , entityIsLocked :: !Bool
78 , entityIsBinary :: !Bool
79 , entityRevision :: RevNum
80 , entityLastMod :: UTCTime
81 , entitySummary :: !(Maybe String)
82 , entityOtherLang :: !(Map LanguageTag PageName)
83 , entityContent :: !Lazy.ByteString
84 , entityUpdateInfo :: Maybe UpdateInfo
91 uiOldRevision :: !RevNum
92 , uiOldName :: !(Maybe PageName)
97 isRedirect :: Page -> Bool
98 isRedirect (Redirection _ _ _ _ _ _) = True
102 isEntity :: Page -> Bool
103 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
107 pageName :: Page -> PageName
109 | isRedirect p = redirName p
110 | isEntity p = entityName p
111 | otherwise = error "neither redirection nor entity"
114 pageUpdateInfo :: Page -> Maybe UpdateInfo
116 | isRedirect p = redirUpdateInfo p
117 | isEntity p = entityUpdateInfo p
118 | otherwise = error "neither redirection nor entity"
121 pageRevision :: Page -> RevNum
123 | isRedirect p = redirRevision p
124 | isEntity p = entityRevision p
125 | otherwise = error "neither redirection nor entity"
128 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
129 encodePageName :: PageName -> FilePath
130 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
132 fixPageName :: PageName -> PageName
133 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
136 isSafeChar :: Char -> Bool
139 | isReserved c = False
140 | c > ' ' && c <= '~' = True
144 -- URI unescape して UTF-8 から decode する。
145 decodePageName :: FilePath -> PageName
146 decodePageName = UTF8.decodeString . unEscapeString
149 encodeFragment :: String -> String
150 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
153 mkPageURI :: URI -> PageName -> URI
154 mkPageURI baseURI name
156 uriPath = uriPath baseURI </> encodePageName name <.> "html"
160 mkPageFragmentURI :: URI -> PageName -> String -> URI
161 mkPageFragmentURI baseURI name fragment
163 uriPath = uriPath baseURI </> encodePageName name <.> "html"
164 , uriFragment = ('#' : encodeFragment fragment)
168 mkFragmentURI :: String -> URI
169 mkFragmentURI fragment
171 uriFragment = ('#' : encodeFragment fragment)
175 mkObjectURI :: URI -> PageName -> URI
176 mkObjectURI baseURI name
177 = mkAuxiliaryURI baseURI ["object"] name
180 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
181 mkAuxiliaryURI baseURI basePath name
183 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
187 mkFeedURI :: URI -> PageName -> URI
188 mkFeedURI baseURI name
190 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
194 mkRakkaURI :: PageName -> URI
195 mkRakkaURI name = URI {
197 , uriAuthority = Nothing
198 , uriPath = encodePageName name
207 lang="ja" -- 存在しない場合もある
208 isTheme="no" -- text/css の場合のみ存在
209 isFeed="no" -- text/x-rakka の場合のみ存在
213 lastModified="2000-01-01T00:00:00">
217 </summary> -- 存在しない場合もある
219 <otherLang> -- 存在しない場合もある
220 <link lang="ja" page="Bar/Baz" />
228 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
235 lastModified="2000-01-01T00:00:00" />
237 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
240 -> if isRedirect page then
241 xmlizeRedirection -< page
245 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
248 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
251 += sattr "name" (redirName page)
252 += sattr "redirect" (redirDest page)
253 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
254 += sattr "revision" (show $ redirRevision page)
255 += sattr "lastModified" (formatW3CDateTime lastMod)
258 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
261 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
264 += sattr "name" (pageName page)
265 += sattr "type" (show $ entityType page)
266 += ( case entityLanguage page of
267 Just x -> sattr "lang" x
270 += ( case entityType page of
271 MIMEType "text" "css" _
272 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
273 MIMEType "text" "x-rakka" _
274 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
278 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
279 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
280 += sattr "revision" (show $ entityRevision page)
281 += sattr "lastModified" (formatW3CDateTime lastMod)
282 += ( case entitySummary page of
283 Just s -> eelem "summary" += txt s
286 += ( if M.null (entityOtherLang page) then
293 | (lang, name) <- M.toList (entityOtherLang page) ]
295 += ( if entityIsBinary page then
297 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
301 += txt (UTF8.decode $ L.unpack $ entityContent page)
307 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
310 -> do updateInfo <- maybeA parseUpdateInfo -< tree
311 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
312 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
313 >>> parseYesOrNo) -< tree
315 Nothing -> parseEntity -< (name, tree)
316 Just dest -> returnA -< (Redirection {
319 , redirIsLocked = isLocked
320 , redirRevision = undefined
321 , redirLastMod = undefined
322 , redirUpdateInfo = updateInfo
326 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
329 -> do updateInfo <- maybeA parseUpdateInfo -< tree
331 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
333 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
335 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
336 >>> parseYesOrNo) -< tree
337 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
338 >>> parseYesOrNo) -< tree
339 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
340 >>> parseYesOrNo) -< tree
342 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
344 >>> deleteIfEmpty)) -< tree
346 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
348 (getAttrValue0 "lang"
350 getAttrValue0 "page")) -< tree
352 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
353 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
355 let (isBinary, content)
356 = case (textData, binaryData) of
357 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
358 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
359 _ -> error "one of textData or binaryData is required"
362 if null mimeTypeStr then
363 guessMIMEType content
371 , entityType = mimeType
372 , entityLanguage = lang
373 , entityIsTheme = isTheme
374 , entityIsFeed = isFeed
375 , entityIsLocked = isLocked
376 , entityIsBinary = isBinary
377 , entityRevision = undefined
378 , entityLastMod = undefined
379 , entitySummary = summary
380 , entityOtherLang = M.fromList otherLang
381 , entityContent = content
382 , entityUpdateInfo = updateInfo
385 dropWhitespace :: String -> String
386 dropWhitespace [] = []
387 dropWhitespace (x:xs)
388 | x == ' ' || x == '\t' || x == '\n'
391 = x : dropWhitespace xs
394 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
397 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
398 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
399 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
400 returnA -< UpdateInfo {
401 uiOldRevision = oldRev
402 , uiOldName = oldName