35 import Control.Applicative
37 import Control.Arrow.ArrowIO
38 import Control.Arrow.ArrowList
39 import Control.Arrow.Unicode
40 import qualified Codec.Binary.UTF8.String as UTF8
41 import qualified Data.ByteString.Char8 as B8
42 import qualified Data.ByteString.Lazy as Lazy (ByteString)
43 import qualified Data.ByteString.Lazy as L hiding (ByteString)
44 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
45 import Data.CaseInsensitive (CI)
46 import qualified Data.CaseInsensitive as CI
49 import qualified Data.Map as M
50 import Data.Text (Text)
51 import qualified Data.Text as T
52 import Data.Text.Encoding
54 import Network.HTTP.Lucu hiding (redirect)
55 import Network.URI hiding (fragment)
56 import OpenSSL.EVP.Base64
57 import Prelude.Unicode
59 import Rakka.W3CDateTime
60 import Subversion.Types
61 import System.FilePath.Posix
62 import Text.XML.HXT.Arrow.XmlArrow
63 import Text.XML.HXT.DOM.TypeDefs
64 import Text.XML.HXT.XPath
67 type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
68 type LanguageName = Text -- i.e. "日本語"
73 redirName :: !PageName
74 , redirDest :: !PageName
75 , redirIsLocked :: !Bool
76 , redirRevision :: RevNum
77 , redirLastMod :: UTCTime
78 , redirUpdateInfo :: Maybe UpdateInfo
81 entityName :: !PageName
82 , entityType :: !MIMEType
83 , entityLanguage :: !(Maybe LanguageTag)
84 , entityIsTheme :: !Bool -- text/css 以外では無意味
85 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
86 , entityIsLocked :: !Bool
87 , entityIsBinary :: !Bool
88 , entityRevision :: RevNum
89 , entityLastMod :: UTCTime
90 , entitySummary :: !(Maybe String)
91 , entityOtherLang :: !(Map LanguageTag PageName)
92 , entityContent :: !Lazy.ByteString
93 , entityUpdateInfo :: Maybe UpdateInfo
100 uiOldRevision :: !RevNum
101 , uiOldName :: !(Maybe PageName)
106 isRedirect :: Page -> Bool
107 isRedirect (Redirection _ _ _ _ _ _) = True
111 isEntity :: Page -> Bool
112 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
116 pageName :: Page -> PageName
118 | isRedirect p = redirName p
119 | isEntity p = entityName p
120 | otherwise = error "neither redirection nor entity"
123 pageUpdateInfo :: Page -> Maybe UpdateInfo
125 | isRedirect p = redirUpdateInfo p
126 | isEntity p = entityUpdateInfo p
127 | otherwise = error "neither redirection nor entity"
130 pageRevision :: Page -> RevNum
132 | isRedirect p = redirRevision p
133 | isEntity p = entityRevision p
134 | otherwise = error "neither redirection nor entity"
137 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
138 encodePageName ∷ PageName → FilePath
139 encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack
141 fixPageName ∷ String → String
142 fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c)
144 capitalizeHead ∷ String → String
145 capitalizeHead [] = (⊥)
146 capitalizeHead (x:xs) = toUpper x : xs
148 -- FIXME: use system-filepath
149 decodePageName ∷ FilePath → PageName
150 decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString
152 encodeFragment ∷ Text → String
153 encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8
155 mkPageURI ∷ URI → PageName → URI
156 mkPageURI baseURI name
158 uriPath = uriPath baseURI </> encodePageName name <.> "html"
161 mkPageFragmentURI ∷ URI → PageName → Text → URI
162 mkPageFragmentURI baseURI name fragment
164 uriPath = uriPath baseURI </> encodePageName name <.> "html"
165 , uriFragment = ('#' : encodeFragment fragment)
168 mkFragmentURI ∷ Text → URI
169 mkFragmentURI fragment
171 uriFragment = ('#' : encodeFragment fragment)
175 mkObjectURI :: URI -> PageName -> URI
176 mkObjectURI baseURI name
177 = mkAuxiliaryURI baseURI ["object"] name
180 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
181 mkAuxiliaryURI baseURI basePath name
183 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
187 mkFeedURI :: URI -> PageName -> URI
188 mkFeedURI baseURI name
190 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
194 mkRakkaURI :: PageName -> URI
195 mkRakkaURI name = URI {
197 , uriAuthority = Nothing
198 , uriPath = encodePageName name
207 lang="ja" -- 存在しない場合もある
208 isTheme="no" -- text/css の場合のみ存在
209 isFeed="no" -- text/x-rakka の場合のみ存在
213 lastModified="2000-01-01T00:00:00">
217 </summary> -- 存在しない場合もある
219 <otherLang> -- 存在しない場合もある
220 <link lang="ja" page="Bar/Baz" />
228 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
235 lastModified="2000-01-01T00:00:00" />
237 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
240 -> if isRedirect page then
241 xmlizeRedirection -< page
245 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
248 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
251 += sattr "name" (T.unpack $ redirName page )
252 += sattr "redirect" (T.unpack $ redirDest page )
253 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
254 += sattr "revision" (show $ redirRevision page)
255 += sattr "lastModified" (formatW3CDateTime lastMod)
258 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
261 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
264 += sattr "name" (T.unpack $ pageName page)
265 += sattr "type" (show $ entityType page)
266 += ( case entityLanguage page of
267 Just x -> sattr "lang" (T.unpack $ CI.foldedCase x)
270 += ( case entityType page of
271 MIMEType "text" "css" _
272 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
273 MIMEType "text" "x-rakka" _
274 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
278 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
279 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
280 += sattr "revision" (show $ entityRevision page)
281 += sattr "lastModified" (formatW3CDateTime lastMod)
282 += ( case entitySummary page of
283 Just s -> eelem "summary" += txt s
286 += ( if M.null (entityOtherLang page) then
291 += sattr "lang" (T.unpack $ CI.foldedCase lang)
292 += sattr "page" (T.unpack name)
293 | (lang, name) ← M.toList (entityOtherLang page) ]
295 += ( if entityIsBinary page then
297 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
301 += txt (UTF8.decode $ L.unpack $ entityContent page)
306 parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page
309 → do updateInfo ← maybeA parseUpdateInfo ⤙ tree
310 redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree
311 isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no"
312 ⋙ parseYesOrNo) ⤙ tree
314 Nothing → parseEntity ⤙ (name, tree)
315 Just dest → returnA ⤙ Redirection {
317 , redirDest = T.pack dest
318 , redirIsLocked = isLocked
319 , redirRevision = undefined
320 , redirLastMod = undefined
321 , redirUpdateInfo = updateInfo
324 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
327 -> do updateInfo <- maybeA parseUpdateInfo -< tree
329 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
331 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
333 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
334 >>> parseYesOrNo) -< tree
335 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
336 >>> parseYesOrNo) -< tree
337 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
338 >>> parseYesOrNo) -< tree
340 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
342 >>> deleteIfEmpty)) -< tree
344 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
346 (getAttrValue0 "lang"
348 getAttrValue0 "page")) -< tree
350 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
351 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
353 let (isBinary, content)
354 = case (textData, binaryData) of
355 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
356 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
357 _ -> error "one of textData or binaryData is required"
360 if null mimeTypeStr then
361 guessMIMEType content
368 , entityType = mimeType
369 , entityLanguage = CI.mk ∘ T.pack <$> lang
370 , entityIsTheme = isTheme
371 , entityIsFeed = isFeed
372 , entityIsLocked = isLocked
373 , entityIsBinary = isBinary
374 , entityRevision = undefined
375 , entityLastMod = undefined
376 , entitySummary = summary
377 , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang)
378 , entityContent = content
379 , entityUpdateInfo = updateInfo
382 dropWhitespace :: String -> String
383 dropWhitespace [] = []
384 dropWhitespace (x:xs)
385 | x == ' ' || x == '\t' || x == '\n'
388 = x : dropWhitespace xs
390 parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo
393 -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree
394 oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo
395 oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo
396 returnA ⤙ UpdateInfo {
397 uiOldRevision = oldRev
398 , uiOldName = T.pack <$> oldName