X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Rakka%2FWiki%2FParser.hs;h=e7ca8ebbc456e063587cfed6767e0ee1bff92adb;hb=HEAD;hp=4d936f95f2995929ae22b03d9a3a90ed08bddc35;hpb=fa3a9d0aecede2431e669ef33885a116f7d5f0be;p=Rakka.git diff --git a/Rakka/Wiki/Parser.hs b/Rakka/Wiki/Parser.hs index 4d936f9..e7ca8eb 100644 --- a/Rakka/Wiki/Parser.hs +++ b/Rakka/Wiki/Parser.hs @@ -1,17 +1,30 @@ +{-# LANGUAGE + OverloadedStrings + , RankNTypes + , UnicodeSyntax + , ViewPatterns + #-} module Rakka.Wiki.Parser ( CommandTypeOf , wikiPage ) where - -import Data.Maybe -import Network.URI -import Rakka.Wiki -import Text.ParserCombinators.Parsec - - -type CommandTypeOf = String -> Maybe CommandType - +-- FIXME: use attoparsec +import Control.Applicative hiding ((<|>), many) +import Control.Applicative.Unicode +import Control.Monad +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI +import Data.Maybe +import Data.Monoid.Unicode ((⊕)) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI hiding (fragment) +import Prelude.Unicode +import Rakka.Wiki +import Text.ParserCombinators.Parsec hiding (label) + +type CommandTypeOf = Alternative f ⇒ Text → f CommandType wikiPage :: CommandTypeOf -> Parser WikiPage wikiPage cmdTypeOf @@ -40,30 +53,29 @@ blockElement cmdTypeOf , blockCmd cmdTypeOf ] - -heading :: Parser BlockElement +heading ∷ Parser BlockElement heading = foldr (<|>) pzero (map heading' [1..5]) "heading" where - heading' :: Int -> Parser BlockElement - heading' n = do try $ do count n (char '=') - notFollowedBy (char '=') + heading' ∷ Int → Parser BlockElement + heading' n = do try ( void (count n (char '=')) *> + notFollowedBy (char '=') + ) ws - x <- notFollowedBy (char '=') >> anyChar - xs <- manyTill anyChar (try $ ws >> ( count n (char '=') - - ("trailing " ++ take n (repeat '=')) - ) - ) + x ← notFollowedBy (char '=') *> anyChar + xs ← manyTill anyChar (try $ ws *> ( count n (char '=') + + ("trailing " ++ replicate n '=') + ) + ) ws eol - return (Heading n (x:xs)) - + pure ∘ Heading n $ T.pack (x:xs) horizontalLine :: Parser BlockElement -horizontalLine = try ( do count 4 (char '-') - many (char '-') +horizontalLine = try ( do _ <- count 4 (char '-') + _ <- many (char '-') ws eol return HorizontalLine @@ -86,15 +98,15 @@ listElement cmdTypeOf = listElement' [] items stack = do xs <- many1 $ inlineElement cmdTypeOf nested <- option Nothing $ try $ do skipMany comment - newline - string stack - listElement' stack >>= return . Just + _ <- newline + _ <- string stack + liftM Just (listElement' stack) rest <- items stack return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest <|> (try $ do skipMany comment - newline - string stack + _ <- newline + _ <- string stack ws items stack ) @@ -104,14 +116,15 @@ listElement cmdTypeOf = listElement' [] toType :: Char -> ListType toType '*' = Bullet toType '#' = Numbered + toType _ = undefined definitionList :: CommandTypeOf -> Parser BlockElement -definitionList cmdTypeOf = many1 definition >>= return . DefinitionList +definitionList cmdTypeOf = liftM DefinitionList (many1 definition) where definition :: Parser Definition - definition = do char ';' - ws + definition = do _ <- char ';' + _ <- ws tHead <- inlineElement cmdTypeOf tRest <- term d <- description @@ -135,9 +148,9 @@ definitionList cmdTypeOf = many1 definition >>= return . DefinitionList xs <- description return (x:xs) <|> - try ( do newline - char ':' - ws + try ( do _ <- newline + _ <- char ':' + _ <- ws xs <- description return (Text "\n" : xs) ) @@ -149,23 +162,19 @@ definitionList cmdTypeOf = many1 definition >>= return . DefinitionList "description of term" -verbatim :: Parser BlockElement -verbatim = do try (string " + many (oneOf " \t\n") *> + (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim') where verbatim' :: Parser String - verbatim' = do try (many (oneOf " \t\n") >> string "]>") - return [] + verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure [] <|> - do x <- anyChar - xs <- verbatim' - return (x:xs) + ((:) <$> anyChar ⊛ verbatim') leadingSpaced :: CommandTypeOf -> Parser BlockElement -leadingSpaced cmdTypeOf = (char ' ' >> leadingSpaced' >>= return . Preformatted) +leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced') "leading space" where @@ -174,42 +183,39 @@ leadingSpaced cmdTypeOf = (char ' ' >> leadingSpaced' >>= return . Preformatted) xs <- leadingSpaced' return (x:xs) <|> - try ( newline - >> - char ' ' - >> - leadingSpaced' - >>= - return . (Text "\n" :) + try ( liftM (Text "\n" :) ( newline + >> + char ' ' + >> + leadingSpaced' + ) ) <|> return [] -blockCommand :: Parser BlockElement -blockCommand = pzero -- not implemented - - paragraph :: CommandTypeOf -> Parser BlockElement -paragraph cmdTypeOf = paragraph' >>= return . Paragraph +paragraph cmdTypeOf = liftM Paragraph paragraph' where paragraph' :: Parser [InlineElement] paragraph' = do x <- inlineElement cmdTypeOf - xs <- try ( do newline - eof + xs <- try ( do _ <- newline + _ <- eof return [] -- \n で文字列が終はってゐたら、ここ -- で終了。 ) <|> - try ( do newline - ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) + try ( do _ <- newline + _ <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) ys <- (paragraph' <|> return []) return (Text "\n" : ys) -- \n があり、その次に \n または - -- blockSymbols があれば、fail して - -- 最初の newline を讀んだ所まで卷き - -- 戻す。 + -- blockSymbols があれば、fail して最 + -- 初の newline を讀んだ所まで卷き戻 + -- す。oneOf が一文字消費しているので、 + -- <|> は右辺を適用せずに try まで戻 + -- る。 ) <|> paragraph' @@ -227,55 +233,48 @@ blockCmd cmdTypeOf Just BlockCommandType -> do xs <- contents closeTag tagName - return $ BlockCmd $ BlockCommand { + return $ BlockCmd BlockCommand { bCmdName = tagName , bCmdAttributes = tagAttrs , bCmdContents = xs } - Just InlineCommandType -> pzero - _ -> return $ undefinedCmdErr tagName ) <|> (try $ do (tagName, tagAttrs) <- emptyTag case cmdTypeOf tagName of Just BlockCommandType - -> return $ BlockCmd $ BlockCommand { + -> return $ BlockCmd BlockCommand { bCmdName = tagName , bCmdAttributes = tagAttrs , bCmdContents = [] } - Just InlineCommandType -> pzero - _ -> return $ undefinedCmdErr tagName ) "block command" where - contents :: Parser [BlockElement] - contents = do x <- blockElement cmdTypeOf - xs <- contents - return (x:xs) + contents ∷ Parser [BlockElement] + contents = ((:) <$> blockElement cmdTypeOf ⊛ contents) <|> - (newline >> contents) + (newline *> contents) <|> - (comment >> contents) + (comment *> contents) <|> - return [] + pure [] - undefinedCmdErr :: String -> BlockElement + undefinedCmdErr ∷ Text → BlockElement undefinedCmdErr name = Div [("class", "error")] - [ Block (Paragraph [Text ("The command `" ++ name ++ "' is not defined. " ++ + [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕ "Make sure you haven't mistyped.") ]) ] - inlineElement :: CommandTypeOf -> Parser InlineElement inlineElement cmdTypeOf = try $ do skipMany comment @@ -288,36 +287,24 @@ inlineElement cmdTypeOf , inlineCmd cmdTypeOf ] - -nowiki :: Parser InlineElement -nowiki = try (string "> nowiki' >>= return . Text +nowiki ∷ Parser InlineElement +nowiki = Text ∘ T.pack <$> (try (string " nowiki') where - nowiki' :: Parser String - nowiki' = do try (string "]>") - return [] + nowiki' ∷ Parser String + nowiki' = (try (string "]>") *> pure []) <|> - do x <- anyChar - xs <- nowiki' - return (x:xs) - - -text :: Parser InlineElement -text = ( char ':' - >> - many (noneOf ('\n':inlineSymbols)) - >>= - return . Text . (':' :) - -- 定義リストとの關係上、コロンは先頭にしか來れない。 - ) + ((:) <$> anyChar ⊛ nowiki') + +text ∷ Parser InlineElement +text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *> + many (noneOf ('\n':inlineSymbols)) + )) + -- 定義リストとの關係上、コロンは先頭にしか來られない。 <|> - ( many1 (noneOf ('\n':inlineSymbols)) - >>= - return . Text - ) + (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols)))) "text" - apostrophes :: CommandTypeOf -> Parser InlineElement apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5]) where @@ -344,69 +331,63 @@ apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos n = count n (char '\'') >> notFollowedBy (char '\'') -objLink :: Parser InlineElement -objLink = do try (string "[[[") - page <- many1 (noneOf "|]") - text <- option Nothing - (char '|' >> many1 (satisfy (/= ']')) >>= return . Just) - string "]]]" - return $ ObjectLink page text +objLink ∷ Parser InlineElement +objLink = do void $ try (string "[[[") + page ← many1 (noneOf "|]") + label ← option Nothing $ + Just <$> (char '|' *> many1 (satisfy (≠ ']'))) + void $ string "]]]" + pure $ ObjectLink (T.pack page) (T.pack <$> label) "object link" - -pageLink :: Parser InlineElement -pageLink = do try (string "[[") - page <- option Nothing - (many1 (noneOf "#|]") >>= return . Just) - fragment <- option Nothing - (char '#' >> many1 (noneOf "|]") >>= return . Just) - text <- option Nothing - (char '|' >> many1 (satisfy (/= ']')) >>= return . Just) - - case (page, fragment) of - (Nothing, Nothing) -> pzero - (_, _) -> return () - - string "]]" - return $ PageLink page fragment text +pageLink ∷ Parser InlineElement +pageLink = do void $ try (string "[[") + page ← option Nothing $ + Just <$> many1 (noneOf "#|]") + fragment ← option Nothing $ + Just <$> (char '#' *> many1 (noneOf "|]")) + label ← option Nothing $ + Just <$> (char '|' *> many1 (satisfy (≠ ']'))) + when (isNothing page ∧ isNothing fragment) (∅) + void $ string "]]" + pure $ PageLink (T.pack <$> page ) + (T.pack <$> fragment) + (T.pack <$> label ) "page link" - -extLink :: Parser InlineElement -extLink = do char '[' - uriStr <- many1 (noneOf " \t]") - skipMany (oneOf " \t") - text <- option Nothing - (many1 (noneOf "]") >>= return . Just) - +extLink ∷ Parser InlineElement +extLink = do void $ char '[' + uriStr ← many1 (noneOf " \t]") + void $ skipMany (oneOf " \t") + label ← option Nothing $ + Just <$> many1 (noneOf "]") case parseURI uriStr of - Just uri -> char ']' >> return (ExternalLink uri text) - Nothing -> pzero "absolute URI" + Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label)) + Nothing → pzero "absolute URI" "external link" - -inlineCmd :: CommandTypeOf -> Parser InlineElement +inlineCmd ∷ CommandTypeOf → Parser InlineElement inlineCmd cmdTypeOf - = (try $ do (tagName, tagAttrs) <- openTag + = (try $ do (tagName, tagAttrs) ← openTag case cmdTypeOf tagName of Just InlineCommandType - -> do xs <- contents - closeTag tagName - return $ InlineCmd $ InlineCommand { + → do xs ← contents + closeTag tagName + pure $ InlineCmd InlineCommand { iCmdName = tagName , iCmdAttributes = tagAttrs , iCmdContents = xs } - _ -> pzero + _ → pzero ) <|> (try $ do (tagName, tagAttrs) <- emptyTag case cmdTypeOf tagName of Just InlineCommandType - -> return $ InlineCmd $ InlineCommand { + -> return $ InlineCmd InlineCommand { iCmdName = tagName , iCmdAttributes = tagAttrs , iCmdContents = [] @@ -416,62 +397,58 @@ inlineCmd cmdTypeOf "inline command" where - contents :: Parser [InlineElement] - contents = do x <- inlineElement cmdTypeOf - xs <- contents - return (x:xs) + contents ∷ Parser [InlineElement] + contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents) <|> - (comment >> contents) + (comment *> contents) <|> - (newline >> contents >>= return . (Text "\n" :)) + ((Text "\n" :) <$> (newline *> contents)) <|> - return [] - - -openTag :: Parser (String, [Attribute]) -openTag = try $ do char '<' - many space - name <- many1 letter - many space - attrs <- many $ do attr <- tagAttr - many space - return attr - char '>' - return (name, attrs) - - -emptyTag :: Parser (String, [Attribute]) -emptyTag = try $ do char '<' - many space - name <- many1 letter - many space - attrs <- many $ do attr <- tagAttr - many space - return attr - char '/' - many space - char '>' - return (name, attrs) - - -closeTag :: String -> Parser () -closeTag name = try $ do char '<' - many space - char '/' - many space - string name - many space - char '>' - return () - - -tagAttr :: Parser (String, String) -tagAttr = do name <- many1 letter - char '=' - char '"' - value <- many (satisfy (/= '"')) - char '"' - return (name, value) + pure [] + +openTag ∷ Parser (Text, [Attribute]) +openTag = try $ do void $ char '<' + void $ many space + name ← many1 letter + void $ many space + attrs ← many $ do attr ← tagAttr + void $ many space + pure attr + void $ char '>' + return (T.pack name, attrs) + +emptyTag ∷ Parser (Text, [Attribute]) +emptyTag = try $ do void $ char '<' + void $ many space + name ← many1 letter + void $ many space + attrs ← many $ do attr ← tagAttr + void $ many space + pure attr + void $ char '/' + void $ many space + void $ char '>' + return (T.pack name, attrs) + +closeTag ∷ Text → Parser () +closeTag (T.unpack → name) + = try ( char '<' *> + many space *> + char '/' *> + many space *> + string name *> + many space *> + char '>' *> + pure () + ) + +tagAttr ∷ Parser (CI Text, Text) +tagAttr = do name ← many1 letter + void $ char '=' + void $ char '"' + value ← many (satisfy (≠ '"')) + void $ char '"' + return (CI.mk $ T.pack name, T.pack value) comment :: Parser () @@ -480,14 +457,13 @@ comment = (try (string "") >> case level of - 1 -> return () - n -> skipTillEnd (n - 1)) - <|> - (anyChar >> skipTillEnd level) - ) + skipTillEnd level = (try (string "") >> case level of + 1 -> return () + n -> skipTillEnd (n - 1)) + <|> + (anyChar >> skipTillEnd level) blockSymbols :: [Char] @@ -506,7 +482,6 @@ ws = skipMany ( (oneOf " \t" >> return ()) -- end of line eol :: Parser () -eol = ( (newline >> return ()) - <|> - eof - ) +eol = (newline >> return ()) + <|> + eof