31 import qualified Codec.Binary.UTF8.String as UTF8
32 import qualified Data.ByteString.Lazy as Lazy (ByteString)
33 import qualified Data.ByteString.Lazy as L hiding (ByteString)
34 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
37 import qualified Data.Map as M
40 import Network.HTTP.Lucu hiding (redirect)
41 import Network.URI hiding (fragment)
42 import OpenSSL.EVP.Base64
44 import Rakka.W3CDateTime
45 import Subversion.Types
46 import System.FilePath.Posix
47 import Text.XML.HXT.Arrow
48 import Text.XML.HXT.DOM.TypeDefs
51 type PageName = String
53 type LanguageTag = String -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
54 type LanguageName = String -- i.e. "日本語"
59 redirName :: !PageName
60 , redirDest :: !PageName
61 , redirIsLocked :: !Bool
62 , redirRevision :: RevNum
63 , redirLastMod :: UTCTime
64 , redirUpdateInfo :: Maybe UpdateInfo
67 entityName :: !PageName
68 , entityType :: !MIMEType
69 , entityLanguage :: !(Maybe LanguageTag)
70 , entityIsTheme :: !Bool -- text/css 以外では無意味
71 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
72 , entityIsLocked :: !Bool
73 , entityIsBinary :: !Bool
74 , entityRevision :: RevNum
75 , entityLastMod :: UTCTime
76 , entitySummary :: !(Maybe String)
77 , entityOtherLang :: !(Map LanguageTag PageName)
78 , entityContent :: !Lazy.ByteString
79 , entityUpdateInfo :: Maybe UpdateInfo
86 uiOldRevision :: !RevNum
87 , uiOldName :: !(Maybe PageName)
92 isRedirect :: Page -> Bool
93 isRedirect (Redirection _ _ _ _ _ _) = True
97 isEntity :: Page -> Bool
98 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
102 pageName :: Page -> PageName
104 | isRedirect p = redirName p
105 | isEntity p = entityName p
106 | otherwise = error "neither redirection nor entity"
109 pageUpdateInfo :: Page -> Maybe UpdateInfo
111 | isRedirect p = redirUpdateInfo p
112 | isEntity p = entityUpdateInfo p
113 | otherwise = error "neither redirection nor entity"
116 pageRevision :: Page -> RevNum
118 | isRedirect p = redirRevision p
119 | isEntity p = entityRevision p
120 | otherwise = error "neither redirection nor entity"
123 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
124 encodePageName :: PageName -> FilePath
125 encodePageName = escapeURIString isSafeChar . UTF8.encodeString . fixPageName
127 fixPageName :: PageName -> PageName
128 fixPageName = (\ (x:xs) -> toUpper x : xs) . map (\ c -> if c == ' ' then '_' else c)
131 -- URI unescape して UTF-8 から decode する。
132 decodePageName :: FilePath -> PageName
133 decodePageName = UTF8.decodeString . unEscapeString
136 encodeFragment :: String -> String
137 encodeFragment = escapeURIString isSafeChar . UTF8.encodeString
140 mkPageURI :: URI -> PageName -> URI
141 mkPageURI baseURI name
143 uriPath = uriPath baseURI </> encodePageName name <.> "html"
147 mkPageFragmentURI :: URI -> PageName -> String -> URI
148 mkPageFragmentURI baseURI name fragment
150 uriPath = uriPath baseURI </> encodePageName name <.> "html"
151 , uriFragment = ('#' : encodeFragment fragment)
155 mkFragmentURI :: String -> URI
156 mkFragmentURI fragment
158 uriFragment = ('#' : encodeFragment fragment)
162 mkObjectURI :: URI -> PageName -> URI
163 mkObjectURI baseURI name
164 = mkAuxiliaryURI baseURI ["object"] name
167 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
168 mkAuxiliaryURI baseURI basePath name
170 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
174 mkFeedURI :: URI -> PageName -> URI
175 mkFeedURI baseURI name
177 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
181 mkRakkaURI :: PageName -> URI
182 mkRakkaURI name = URI {
184 , uriAuthority = Nothing
185 , uriPath = encodePageName name
194 lang="ja" -- 存在しない場合もある
195 isTheme="no" -- text/css の場合のみ存在
196 isFeed="no" -- text/x-rakka の場合のみ存在
200 lastModified="2000-01-01T00:00:00">
204 </summary> -- 存在しない場合もある
206 <otherLang> -- 存在しない場合もある
207 <link lang="ja" page="Bar/Baz" />
215 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
222 lastModified="2000-01-01T00:00:00" />
224 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
227 -> if isRedirect page then
228 xmlizeRedirection -< page
232 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
235 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
238 += sattr "name" (redirName page)
239 += sattr "redirect" (redirDest page)
240 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
241 += sattr "revision" (show $ redirRevision page)
242 += sattr "lastModified" (formatW3CDateTime lastMod)
245 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
248 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
251 += sattr "name" (pageName page)
252 += sattr "type" (show $ entityType page)
253 += ( case entityLanguage page of
254 Just x -> sattr "lang" x
257 += ( case entityType page of
258 MIMEType "text" "css" _
259 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
260 MIMEType "text" "x-rakka" _
261 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
265 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
266 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
267 += sattr "revision" (show $ entityRevision page)
268 += sattr "lastModified" (formatW3CDateTime lastMod)
269 += ( case entitySummary page of
270 Just s -> eelem "summary" += txt s
273 += ( if M.null (entityOtherLang page) then
280 | (lang, name) <- M.toList (entityOtherLang page) ]
282 += ( if entityIsBinary page then
284 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
288 += txt (UTF8.decode $ L.unpack $ entityContent page)
294 parseXmlizedPage :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
297 -> do updateInfo <- maybeA parseUpdateInfo -< tree
298 redirect <- maybeA (getXPathTreesInDoc "/page/@redirect/text()" >>> getText) -< tree
299 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
300 >>> parseYesOrNo) -< tree
302 Nothing -> parseEntity -< (name, tree)
303 Just dest -> returnA -< (Redirection {
306 , redirIsLocked = isLocked
307 , redirRevision = undefined
308 , redirLastMod = undefined
309 , redirUpdateInfo = updateInfo
313 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
316 -> do updateInfo <- maybeA parseUpdateInfo -< tree
318 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
320 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
322 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
323 >>> parseYesOrNo) -< tree
324 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
325 >>> parseYesOrNo) -< tree
326 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
327 >>> parseYesOrNo) -< tree
329 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
331 >>> deleteIfEmpty)) -< tree
333 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
335 (getAttrValue0 "lang"
337 getAttrValue0 "page")) -< tree
339 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
340 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
342 let (isBinary, content)
343 = case (textData, binaryData) of
344 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
345 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
346 _ -> error "one of textData or binaryData is required"
349 if null mimeTypeStr then
350 guessMIMEType content
358 , entityType = mimeType
359 , entityLanguage = lang
360 , entityIsTheme = isTheme
361 , entityIsFeed = isFeed
362 , entityIsLocked = isLocked
363 , entityIsBinary = isBinary
364 , entityRevision = undefined
365 , entityLastMod = undefined
366 , entitySummary = summary
367 , entityOtherLang = M.fromList otherLang
368 , entityContent = content
369 , entityUpdateInfo = updateInfo
372 dropWhitespace :: String -> String
373 dropWhitespace [] = []
374 dropWhitespace (x:xs)
375 | x == ' ' || x == '\t' || x == '\n'
378 = x : dropWhitespace xs
381 parseUpdateInfo :: (ArrowXml a, ArrowChoice a) => a XmlTree UpdateInfo
384 -> do uInfo <- getXPathTreesInDoc "/page/updateInfo" -< tree
385 oldRev <- (getAttrValue0 "oldRevision" >>> arr read) -< uInfo
386 oldName <- maybeA (getXPathTrees "/updateInfo/move/@from/text()" >>> getText) -< uInfo
387 returnA -< UpdateInfo {
388 uiOldRevision = oldRev
389 , uiOldName = oldName