35 import Control.Applicative
37 import Control.Arrow.ArrowIO
38 import Control.Arrow.ArrowList
39 import Control.Arrow.Unicode
40 import qualified Codec.Binary.UTF8.String as UTF8
41 import qualified Data.ByteString.Char8 as B8
42 import qualified Data.ByteString.Lazy as Lazy (ByteString)
43 import qualified Data.ByteString.Lazy as L hiding (ByteString)
44 import qualified Data.ByteString.Lazy.Char8 as L8 hiding (ByteString)
45 import Data.CaseInsensitive (CI)
46 import qualified Data.CaseInsensitive as CI
49 import qualified Data.Map as M
50 import Data.Text (Text)
51 import qualified Data.Text as T
52 import Data.Text.Encoding
54 import qualified Data.Time.W3C as W3C
55 import Network.HTTP.Lucu hiding (redirect)
56 import Network.URI hiding (fragment)
57 import OpenSSL.EVP.Base64
58 import Prelude.Unicode
60 import Subversion.Types
61 import System.FilePath.Posix
62 import Text.XML.HXT.Arrow.XmlArrow
63 import Text.XML.HXT.DOM.TypeDefs
64 import Text.XML.HXT.XPath
67 type LanguageTag = CI Text -- See RFC 3066: http://www.ietf.org/rfc/rfc3066.txt
68 type LanguageName = Text -- i.e. "日本語"
72 redirName :: !PageName
73 , redirDest :: !PageName
74 , redirIsLocked :: !Bool
75 , redirRevision :: RevNum
76 , redirLastMod :: UTCTime
77 , redirUpdateInfo :: Maybe UpdateInfo
80 entityName :: !PageName
81 , entityType :: !MIMEType
82 , entityLanguage :: !(Maybe LanguageTag)
83 , entityIsTheme :: !Bool -- text/css 以外では無意味
84 , entityIsFeed :: !Bool -- text/x-rakka 以外では無意味
85 , entityIsLocked :: !Bool
86 , entityIsBinary :: !Bool
87 , entityRevision :: RevNum
88 , entityLastMod :: UTCTime
89 , entitySummary :: !(Maybe String)
90 , entityOtherLang :: !(Map LanguageTag PageName)
91 , entityContent :: !Lazy.ByteString
92 , entityUpdateInfo :: Maybe UpdateInfo
98 uiOldRevision :: !RevNum
99 , uiOldName :: !(Maybe PageName)
104 isRedirect :: Page -> Bool
105 isRedirect (Redirection _ _ _ _ _ _) = True
109 isEntity :: Page -> Bool
110 isEntity (Entity _ _ _ _ _ _ _ _ _ _ _ _ _) = True
114 pageName :: Page -> PageName
116 | isRedirect p = redirName p
117 | isEntity p = entityName p
118 | otherwise = error "neither redirection nor entity"
121 pageUpdateInfo :: Page -> Maybe UpdateInfo
123 | isRedirect p = redirUpdateInfo p
124 | isEntity p = entityUpdateInfo p
125 | otherwise = error "neither redirection nor entity"
128 pageRevision :: Page -> RevNum
130 | isRedirect p = redirRevision p
131 | isEntity p = entityRevision p
132 | otherwise = error "neither redirection nor entity"
135 -- UTF-8 に encode してから 0x20 - 0x7E の範圍を除いて URI escape する。
136 encodePageName ∷ PageName → FilePath
137 encodePageName = escapeURIString isSafeChar ∘ UTF8.encodeString ∘ fixPageName ∘ T.unpack
139 fixPageName ∷ String → String
140 fixPageName = capitalizeHead ∘ map (\c → if c ≡ ' ' then '_' else c)
142 capitalizeHead ∷ String → String
143 capitalizeHead [] = (⊥)
144 capitalizeHead (x:xs) = toUpper x : xs
146 -- FIXME: use system-filepath
147 decodePageName ∷ FilePath → PageName
148 decodePageName = T.pack ∘ UTF8.decodeString ∘ unEscapeString
150 encodeFragment ∷ Text → String
151 encodeFragment = escapeURIString isSafeChar ∘ B8.unpack ∘ encodeUtf8
153 mkPageURI ∷ URI → PageName → URI
154 mkPageURI baseURI name
156 uriPath = uriPath baseURI </> encodePageName name <.> "html"
159 mkPageFragmentURI ∷ URI → PageName → Text → URI
160 mkPageFragmentURI baseURI name fragment
162 uriPath = uriPath baseURI </> encodePageName name <.> "html"
163 , uriFragment = ('#' : encodeFragment fragment)
166 mkFragmentURI ∷ Text → URI
167 mkFragmentURI fragment
169 uriFragment = ('#' : encodeFragment fragment)
173 mkObjectURI :: URI -> PageName -> URI
174 mkObjectURI baseURI name
175 = mkAuxiliaryURI baseURI ["object"] name
178 mkAuxiliaryURI :: URI -> [String] -> PageName -> URI
179 mkAuxiliaryURI baseURI basePath name
181 uriPath = foldl (</>) "/" ([uriPath baseURI] ++ basePath ++ [encodePageName name])
185 mkFeedURI :: URI -> PageName -> URI
186 mkFeedURI baseURI name
188 uriPath = uriPath baseURI </> encodePageName name <.> "rdf"
192 mkRakkaURI :: PageName -> URI
193 mkRakkaURI name = URI {
195 , uriAuthority = Nothing
196 , uriPath = encodePageName name
205 lang="ja" -- 存在しない場合もある
206 isTheme="no" -- text/css の場合のみ存在
207 isFeed="no" -- text/x-rakka の場合のみ存在
211 lastModified="2000-01-01T00:00:00">
215 </summary> -- 存在しない場合もある
217 <otherLang> -- 存在しない場合もある
218 <link lang="ja" page="Bar/Baz" />
226 SKJaHKS8JK/DH8KS43JDK2aKKaSFLLS...
233 lastModified="2000-01-01T00:00:00" />
235 xmlizePage :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
238 -> if isRedirect page then
239 xmlizeRedirection -< page
243 xmlizeRedirection :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
246 -> do lastMod <- arrIO (utcToLocalZonedTime . redirLastMod) -< page
249 += sattr "name" (T.unpack $ redirName page )
250 += sattr "redirect" (T.unpack $ redirDest page )
251 += sattr "isLocked" (yesOrNo $ redirIsLocked page)
252 += sattr "revision" (show $ redirRevision page)
253 += sattr "lastModified" (W3C.format lastMod)
256 xmlizeEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => a Page XmlTree
259 -> do lastMod <- arrIO (utcToLocalZonedTime . entityLastMod) -< page
262 += sattr "name" (T.unpack $ pageName page)
263 += sattr "type" (show $ entityType page)
264 += ( case entityLanguage page of
265 Just x -> sattr "lang" (T.unpack $ CI.foldedCase x)
268 += ( case entityType page of
269 MIMEType "text" "css" _
270 -> sattr "isTheme" (yesOrNo $ entityIsTheme page)
271 MIMEType "text" "x-rakka" _
272 -> sattr "isFeed" (yesOrNo $ entityIsFeed page)
276 += sattr "isLocked" (yesOrNo $ entityIsLocked page)
277 += sattr "isBinary" (yesOrNo $ entityIsBinary page)
278 += sattr "revision" (show $ entityRevision page)
279 += sattr "lastModified" (W3C.format lastMod)
280 += ( case entitySummary page of
281 Just s -> eelem "summary" += txt s
284 += ( if M.null (entityOtherLang page) then
289 += sattr "lang" (T.unpack $ CI.foldedCase lang)
290 += sattr "page" (T.unpack name)
291 | (lang, name) ← M.toList (entityOtherLang page) ]
293 += ( if entityIsBinary page then
295 += txt (L8.unpack $ encodeBase64LBS $ entityContent page)
299 += txt (UTF8.decode $ L.unpack $ entityContent page)
304 parseXmlizedPage ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ (PageName, XmlTree) ⇝ Page
307 → do updateInfo ← maybeA parseUpdateInfo ⤙ tree
308 redirect ← maybeA (getXPathTreesInDoc "/page/@redirect/text()" ⋙ getText) ⤙ tree
309 isLocked ← (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" ⋙ getText) "no"
310 ⋙ parseYesOrNo) ⤙ tree
312 Nothing → parseEntity ⤙ (name, tree)
313 Just dest → returnA ⤙ Redirection {
315 , redirDest = T.pack dest
316 , redirIsLocked = isLocked
317 , redirRevision = undefined
318 , redirLastMod = undefined
319 , redirUpdateInfo = updateInfo
322 parseEntity :: (ArrowXml a, ArrowChoice a) => a (PageName, XmlTree) Page
325 -> do updateInfo <- maybeA parseUpdateInfo -< tree
327 mimeTypeStr <- withDefault (getXPathTreesInDoc "/page/@type/text()" >>> getText) "" -< tree
329 lang <- maybeA (getXPathTreesInDoc "/page/@lang/text()" >>> getText) -< tree
331 isTheme <- (withDefault (getXPathTreesInDoc "/page/@isTheme/text()" >>> getText) "no"
332 >>> parseYesOrNo) -< tree
333 isFeed <- (withDefault (getXPathTreesInDoc "/page/@isFeed/text()" >>> getText) "no"
334 >>> parseYesOrNo) -< tree
335 isLocked <- (withDefault (getXPathTreesInDoc "/page/@isLocked/text()" >>> getText) "no"
336 >>> parseYesOrNo) -< tree
338 summary <- (maybeA (getXPathTreesInDoc "/page/summary/text()"
340 >>> deleteIfEmpty)) -< tree
342 otherLang <- listA (getXPathTreesInDoc "/page/otherLang/link"
344 (getAttrValue0 "lang"
346 getAttrValue0 "page")) -< tree
348 textData <- maybeA (getXPathTreesInDoc "/page/textData/text()" >>> getText) -< tree
349 binaryData <- maybeA (getXPathTreesInDoc "/page/binaryData/text()" >>> getText) -< tree
351 let (isBinary, content)
352 = case (textData, binaryData) of
353 (Just text, Nothing ) -> (False, L.pack $ UTF8.encode text)
354 (Nothing , Just binary) -> (True , L8.pack $ decodeBase64 $ dropWhitespace binary)
355 _ -> error "one of textData or binaryData is required"
358 if null mimeTypeStr then
359 guessMIMEType content
366 , entityType = mimeType
367 , entityLanguage = CI.mk ∘ T.pack <$> lang
368 , entityIsTheme = isTheme
369 , entityIsFeed = isFeed
370 , entityIsLocked = isLocked
371 , entityIsBinary = isBinary
372 , entityRevision = undefined
373 , entityLastMod = undefined
374 , entitySummary = summary
375 , entityOtherLang = M.fromList ((CI.mk ∘ T.pack ⁂ T.pack) <$> otherLang)
376 , entityContent = content
377 , entityUpdateInfo = updateInfo
380 parseUpdateInfo ∷ (ArrowXml (⇝), ArrowChoice (⇝)) ⇒ XmlTree ⇝ UpdateInfo
383 -> do uInfo ← getXPathTreesInDoc "/page/updateInfo" ⤙ tree
384 oldRev ← (getAttrValue0 "oldRevision" ⋙ arr read) ⤙ uInfo
385 oldName ← maybeA (getXPathTrees "/updateInfo/move/@from/text()" ⋙ getText) ⤙ uInfo
386 returnA ⤙ UpdateInfo {
387 uiOldRevision = oldRev
388 , uiOldName = T.pack <$> oldName
391 dropWhitespace :: String -> String
392 {-# INLINE dropWhitespace #-}
393 dropWhitespace = filter ((¬) ∘ isSpace)