From e751af5e3d23d7757f363bf4e86f9d732d90be7f Mon Sep 17 00:00:00 2001 From: pho Date: Sun, 3 Feb 2008 14:57:57 +0900 Subject: [PATCH] continue working on page search darcs-hash:20080203055757-62b54-3e75f47b24b53a26de98d03e0c6d786872b24d66.gz --- Main.hs | 16 +++-- Rakka.cabal | 1 + Rakka/Resource/PageEntity.hs | 18 ++--- Rakka/Resource/Search.hs | 102 +++++++++++++++++++++++++++++ Rakka/Storage.hs | 6 +- Rakka/Storage/Impl.hs | 31 ++++++--- Rakka/Storage/Types.hs | 19 +++++- Rakka/Wiki/Interpreter/PageList.hs | 14 ++-- 8 files changed, 171 insertions(+), 36 deletions(-) create mode 100644 Rakka/Resource/Search.hs diff --git a/Main.hs b/Main.hs index 4328e70..1e670c8 100644 --- a/Main.hs +++ b/Main.hs @@ -11,6 +11,7 @@ import Rakka.Resource.JavaScript import Rakka.Resource.PageEntity import Rakka.Resource.Object import Rakka.Resource.Render +import Rakka.Resource.Search import Rakka.Resource.TrackBack import Rakka.Storage import Subversion @@ -138,12 +139,15 @@ main = withSubversion $ resTree :: Environment -> ResTree resTree env - = mkResTree [ ([] , resIndex env) - , (["checkAuth"], resCheckAuth env) - , (["js" ], javaScript ) - , (["object" ], resObject env) - , (["render" ], resRender env) - , (["trackback"], resTrackBack env) + = mkResTree [ ([] , resIndex env) + , (["checkAuth" ], resCheckAuth env) + , (["js" ], javaScript ) + , (["object" ], resObject env) + , (["render" ], resRender env) + , (["search" ], resSearch env) + , (["search.html"], resSearch env) + , (["search.xml" ], resSearch env) + , (["trackback" ], resTrackBack env) ] diff --git a/Rakka.cabal b/Rakka.cabal index 32a2c2a..7425209 100644 --- a/Rakka.cabal +++ b/Rakka.cabal @@ -71,6 +71,7 @@ Executable rakka Rakka.Resource.Object Rakka.Resource.PageEntity Rakka.Resource.Render + Rakka.Resource.Search Rakka.Resource.TrackBack Rakka.Storage Rakka.Storage.DefaultPage diff --git a/Rakka/Resource/PageEntity.hs b/Rakka/Resource/PageEntity.hs index d71b53a..1dd185f 100644 --- a/Rakka/Resource/PageEntity.hs +++ b/Rakka/Resource/PageEntity.hs @@ -103,19 +103,9 @@ handleGetEntity :: (ArrowXml a, ArrowChoice a, ArrowIO a) => Environment -> a Pa handleGetEntity env = proc page -> do tree <- xmlizePage -< page - returnA -< do -- text/x-rakka の場合は、内容が動的に生成され - -- てゐる可能性があるので、ETag も - -- Last-Modified も返す事が出來ない。 - case entityType page of - MIMEType "text" "x-rakka" _ - -> return () - _ -> case entityRevision page of - 0 -> foundTimeStamp (entityLastMod page) -- 0 はデフォルトページ - rev -> foundEntity (strongETag $ show rev) (entityLastMod page) - - outputXmlPage tree [ (MIMEType "application" "xhtml+xml" [], entityToXHTML env) - , (MIMEType "application" "rss+xml" [], entityToRSS env) - ] + returnA -< outputXmlPage tree [ (MIMEType "application" "xhtml+xml" [], entityToXHTML env) + , (MIMEType "application" "rss+xml" [], entityToRSS env) + ] entityToXHTML :: (ArrowXml a, ArrowChoice a, ArrowIO a) => Environment -> a XmlTree XmlTree @@ -560,7 +550,7 @@ findFeeds sto addAttrCond cond "rakka:isFeed STREQ yes" setOrder cond "@uri STRA" result <- searchPages sto cond - return (map fst result) + return (map srPageName result) mkFeedURIStr :: URI -> PageName -> String diff --git a/Rakka/Resource/Search.hs b/Rakka/Resource/Search.hs new file mode 100644 index 0000000..8271640 --- /dev/null +++ b/Rakka/Resource/Search.hs @@ -0,0 +1,102 @@ +module Rakka.Resource.Search + ( resSearch + ) + where + +import Control.Arrow +import Control.Arrow.ArrowIO +import Control.Arrow.ArrowList +import Control.Monad.Trans +import Data.Maybe +import Network.HTTP.Lucu +import Rakka.Environment +import Rakka.Resource +import Rakka.Storage +import Text.HyperEstraier +import Text.XML.HXT.Arrow.XmlArrow +import Text.XML.HXT.DOM.TypeDefs + + +resSearch :: Environment -> ResourceDef +resSearch env + = ResourceDef { + resUsesNativeThread = False + , resIsGreedy = False + , resGet = Just $ handleSearch env + , resHead = Nothing + , resPost = Nothing + , resPut = Nothing + , resDelete = Nothing + } + + +{- + + + + aaa foo bbb + + + ... + +-} +handleSearch :: Environment -> Resource () +handleSearch env + = do params <- getQueryForm + + let query = fromMaybe "" $ lookup "q" params + from = read $ fromMaybe "0" $ lookup "from" params + to = read $ fromMaybe "20" $ lookup "to" params + + cond <- liftIO $ mkCond query from to + result <- searchPages (envStorage env) cond + + let to' = min (from + length result) to + + runIdempotentA $ proc () + -> do tree <- ( eelem "/" + += ( eelem "searchResult" + += sattr "query" query + += sattr "from" (show from) + += sattr "to" (show to') + += sattr "total" (show $ length result) + += ( constL result + >>> + mkPageElem + ) + ) + ) -< () + returnA -< outputXmlPage' tree (searchResultToXHTML env) + where + mkCond :: String -> Int -> Int -> IO Condition + mkCond query from to + = do cond <- newCondition + setPhrase cond query + setSkip cond from + setMax cond (to - from) + return cond + + mkPageElem :: ArrowXml a => a SearchResult XmlTree + mkPageElem = ( eelem "page" + += attr "name" (arr srPageName >>> mkText) + += ( arrL srSnippet + >>> + mkSnippetTree + ) + ) + + mkSnippetTree :: ArrowXml a => a SnippetFragment XmlTree + mkSnippetTree = proc fragment + -> case fragment of + NormalText t -> txt t + HighlightedWord w -> eelem "hit" += txt w + -<< () + + +searchResultToXHTML :: (ArrowXml a, ArrowChoice a, ArrowIO a) => Environment -> a XmlTree XmlTree +searchResultToXHTML env + = proc tree + -> this -< tree diff --git a/Rakka/Storage.hs b/Rakka/Storage.hs index d26f713..03b8c54 100644 --- a/Rakka/Storage.hs +++ b/Rakka/Storage.hs @@ -1,6 +1,10 @@ module Rakka.Storage ( Storage + -- re-export from Rakka.Storage.Types + , SearchResult(..) + , SnippetFragment(..) + , mkStorage -- private , getPage @@ -88,7 +92,7 @@ getDirContentsA :: ArrowIO a => Storage -> a (PageName, Maybe RevNum) [PageName] getDirContentsA = arrIO2 . getDirContents -searchPages :: MonadIO m => Storage -> Condition -> m [(PageName, RevNum)] +searchPages :: MonadIO m => Storage -> Condition -> m [SearchResult] searchPages sto cond = liftIO $ do var <- newEmptyTMVarIO diff --git a/Rakka/Storage/Impl.hs b/Rakka/Storage/Impl.hs index fed687f..c763a30 100644 --- a/Rakka/Storage/Impl.hs +++ b/Rakka/Storage/Impl.hs @@ -18,6 +18,7 @@ import Data.Set (Set) import qualified Data.Set as S import Network.HTTP.Lucu import Network.URI +import Prelude hiding (words) import Rakka.Attachment import Rakka.Page import Rakka.Storage.DefaultPage @@ -29,6 +30,7 @@ import Subversion.Repository import System.Directory import System.FilePath import System.IO +import System.IO.Unsafe import System.Log.Logger import Text.HyperEstraier hiding (WriteLock) @@ -183,16 +185,29 @@ syncIndex' index revFile repos mkDraft mapM_ (updateIndex index repos mkDraft newRev) (S.toList pages) -searchIndex :: Database -> Condition -> IO [(PageName, RevNum)] +searchIndex :: Database -> Condition -> IO [SearchResult] searchIndex index cond - = searchDatabase index cond >>= mapM fromId + = do (ids, hint) <- searchDatabase' index cond + mapM (fromId $ map fst hint) ids where - fromId :: DocumentID -> IO (PageName, RevNum) - fromId docId - = do uri <- getDocURI index docId - rev <- getDocAttr index docId "rakka:revision" - >>= return . read . fromJust - return (decodePageName $ uriPath uri, rev) + fromId :: [String] -> DocumentID -> IO SearchResult + fromId words docId + = do uri <- getDocURI index docId + rev <- getDocAttr index docId "rakka:revision" + >>= return . read . fromJust + snippet <- unsafeInterleaveIO $ + do doc <- getDocument index docId [NoAttributes, NoKeywords] + sn <- makeSnippet doc words 300 80 80 + return (map toFragment sn) + return SearchResult { + srPageName = decodePageName $ uriPath uri + , srPageRev = rev + , srSnippet = snippet + } + + toFragment :: Either String (String, String) -> SnippetFragment + toFragment (Left t) = NormalText t + toFragment (Right (w, _)) = HighlightedWord w updateIndex :: Database diff --git a/Rakka/Storage/Types.hs b/Rakka/Storage/Types.hs index 06870e6..2944f28 100644 --- a/Rakka/Storage/Types.hs +++ b/Rakka/Storage/Types.hs @@ -1,6 +1,8 @@ module Rakka.Storage.Types ( Storage(..) , IndexReq(..) + , SearchResult(..) + , SnippetFragment(..) ) where @@ -21,4 +23,19 @@ data Storage data IndexReq = RebuildIndex | SyncIndex - | SearchIndex !Condition !(TMVar [(PageName, RevNum)]) + | SearchIndex !Condition !(TMVar [SearchResult]) + + +data SearchResult + = SearchResult { + srPageName :: !PageName + , srPageRev :: !RevNum + , srSnippet :: [SnippetFragment] + } + deriving (Show, Eq) + + +data SnippetFragment + = NormalText !String + | HighlightedWord !String + deriving (Show, Eq) \ No newline at end of file diff --git a/Rakka/Wiki/Interpreter/PageList.hs b/Rakka/Wiki/Interpreter/PageList.hs index 1ad6728..6afae0f 100644 --- a/Rakka/Wiki/Interpreter/PageList.hs +++ b/Rakka/Wiki/Interpreter/PageList.hs @@ -44,22 +44,24 @@ recentUpdatesInterp sto = ctxStorage ctx cond <- newCondition - setPhrase cond "[UVSET]" when onlyEntity $ addAttrCond cond "@type STRNE application/x-rakka-redirection" when onlySummarized $ addAttrCond cond "rakka:summary STRNE" -- summary が空でない - setOrder cond "@mdate NUMD" - setMax cond items + setPhrase cond "[UVSET]" + setOrder cond "@mdate NUMD" + setMax cond items result <- searchPages sto cond - pages <- mapM ( \ (name, rev) - -> getPage sto name (Just rev) >>= return . fromJust - ) result + pages <- mapM (getPageBySR sto) result mkPageList showSummary pages } where + getPageBySR :: Storage -> SearchResult -> IO Page + getPageBySR sto sr + = getPage sto (srPageName sr) (Just (srPageRev sr)) >>= return . fromJust + mkPageList :: Bool -> [Page] -> IO BlockElement mkPageList showSummary pages = do items <- mapM (mkListItem showSummary) pages -- 2.40.0