31 import qualified Codec.Binary.UTF8.String as UTF8
32 import qualified Data.ByteString.Lazy as Lazy (ByteString)
33 import qualified Data.ByteString.Lazy as L hiding (ByteString)
34 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
37 import qualified Data.Map as M
39 import Network.HTTP.Lucu hiding (redirect)
40 import Network.URI hiding (fragment)
41 import OpenSSL.EVP.Base64
43 import Rakka.W3CDateTime
44 import Subversion.Types
45 import System.FilePath.Posix
46 import Text.XML.HXT.Arrow
47 import Text.XML.HXT.XPath
50 type PageName = String
52 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
53 type LanguageName = String -- i.e. "日本語"
58 redirName :: !PageName
59 , redirDest :: !PageName
60 , redirIsLocked :: !Bool
61 , redirRevision :: RevNum
62 , redirLastMod :: UTCTime
63 , redirUpdateInfo :: Maybe UpdateInfo
66 entityName :: !PageName
67 , entityType :: !MIMEType
68 , entityLanguage :: !(Maybe LanguageTag)
69 , entityIsTheme :: !Bool -- text/css 以外では無意味
70 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
71 , entityIsLocked :: !Bool
72 , entityIsBinary :: !Bool
73 , entityRevision :: RevNum
74 , entityLastMod :: UTCTime
75 , entitySummary :: !(Maybe String)
76 , entityOtherLang :: !(Map LanguageTag PageName)
77 , entityContent :: !Lazy.ByteString
78 , entityUpdateInfo :: Maybe UpdateInfo
85 uiOldRevision :: !RevNum
86 , uiOldName :: !(Maybe PageName)
91 isRedirect :: Page -> Bool
92 isRedirect (Redirection _ _ _ _ _ _) = True
96 isEntity :: Page -> Bool
97 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
101 pageName :: Page -> PageName
103 | isRedirect p = redirName p
104 | isEntity p = entityName p
105 | otherwise = error "neither redirection nor entity"
108 pageUpdateInfo :: Page -> Maybe UpdateInfo
110 | isRedirect p = redirUpdateInfo p
111 | isEntity p = entityUpdateInfo p
112 | otherwise = error "neither redirection nor entity"
115 pageRevision :: Page -> RevNum
117 | isRedirect p = redirRevision p
118 | isEntity p = entityRevision p
119 | otherwise = error "neither redirection nor entity"
122 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
123 encodePageName :: PageName -> FilePath
124 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
126 fixPageName :: PageName -> PageName
127 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
130 decodePageName :: FilePath -> PageName
131 decodePageName = UTF8.decodeString . unEscapeString
134 encodeFragment :: String -> String
135 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
138 mkPageURI :: URI -> PageName -> URI
139 mkPageURI baseURI name
141 uriPath = uriPath baseURI </> encodePageName name <.> "html"
145 mkPageFragmentURI :: URI -> PageName -> String -> URI
146 mkPageFragmentURI baseURI name fragment
148 uriPath = uriPath baseURI </> encodePageName name <.> "html"
149 , uriFragment = ('#' : encodeFragment fragment)
153 mkFragmentURI :: String -> URI
154 mkFragmentURI fragment
156 uriFragment = ('#' : encodeFragment fragment)
160 mkObjectURI :: URI -> PageName -> URI
161 mkObjectURI baseURI name
162 = mkAuxiliaryURI baseURI ["object"] name
165 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
166 mkAuxiliaryURI baseURI basePath name
168 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
172 mkFeedURI :: URI -> PageName -> URI
173 mkFeedURI baseURI name
175 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
179 mkRakkaURI :: PageName -> URI
180 mkRakkaURI name = URI {
182 , uriAuthority = Nothing
183 , uriPath = encodePageName name
192 lang="ja" -- 存在しない場合もある
193 isTheme="no" -- text/css の場合のみ存在
194 isFeed="no" -- text/x-rakka の場合のみ存在
198 lastModified="2000-01-01T00:00:00">
202 </summary> -- 存在しない場合もある
204 <otherLang> -- 存在しない場合もある
205 <link lang="ja" page="Bar/Baz" />
213 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
220 lastModified="2000-01-01T00:00:00" />
222 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
225 -> if isRedirect page then
226 xmlizeRedirection -< page
230 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
233 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
236 += sattr "name" (redirName page)
237 += sattr "redirect" (redirDest page)
238 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
239 += sattr "revision" (show $ redirRevision page)
240 += sattr "lastModified" (formatW3CDateTime lastMod)
243 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
246 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
249 += sattr "name" (pageName page)
250 += sattr "type" (show $ entityType page)
251 += ( case entityLanguage page of
252 Just x -> sattr "lang" x
255 += ( case entityType page of
256 MIMEType "text" "css" _
257 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
258 MIMEType "text" "x-rakka" _
259 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
263 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
264 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
265 += sattr "revision" (show $ entityRevision page)
266 += sattr "lastModified" (formatW3CDateTime lastMod)
267 += ( case entitySummary page of
268 Just s -> eelem "summary" += txt s
271 += ( if M.null (entityOtherLang page) then
278 | (lang, name) <- M.toList (entityOtherLang page) ]
280 += ( if entityIsBinary page then
282 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
286 += txt (UTF8.decode $ L.unpack $ entityContent page)
292 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
295 -> do updateInfo <- maybeA parseUpdateInfo -< tree
296 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
297 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
298 >>> parseYesOrNo) -< tree
300 Nothing -> parseEntity -< (name, tree)
301 Just dest -> returnA -< (Redirection {
304 , redirIsLocked = isLocked
305 , redirRevision = undefined
306 , redirLastMod = undefined
307 , redirUpdateInfo = updateInfo
311 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
314 -> do updateInfo <- maybeA parseUpdateInfo -< tree
316 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
318 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
320 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
321 >>> parseYesOrNo) -< tree
322 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
323 >>> parseYesOrNo) -< tree
324 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
325 >>> parseYesOrNo) -< tree
327 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
329 >>> deleteIfEmpty)) -< tree
331 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
333 (getAttrValue0 "lang"
335 getAttrValue0 "page")) -< tree
337 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
338 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
340 let (isBinary, content)
341 = case (textData, binaryData) of
342 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
343 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
344 _ -> error "one of textData or binaryData is required"
347 if null mimeTypeStr then
348 guessMIMEType content
356 , entityType = mimeType
357 , entityLanguage = lang
358 , entityIsTheme = isTheme
359 , entityIsFeed = isFeed
360 , entityIsLocked = isLocked
361 , entityIsBinary = isBinary
362 , entityRevision = undefined
363 , entityLastMod = undefined
364 , entitySummary = summary
365 , entityOtherLang = M.fromList otherLang
366 , entityContent = content
367 , entityUpdateInfo = updateInfo
370 dropWhitespace :: String -> String
371 dropWhitespace [] = []
372 dropWhitespace (x:xs)
373 | x == ' ' || x == '\t' || x == '\n'
376 = x : dropWhitespace xs
379 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
382 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
383 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
384 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
385 returnA -< UpdateInfo {
386 uiOldRevision = oldRev
387 , uiOldName = oldName