X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Rakka%2FWiki%2FParser.hs;h=e7ca8ebbc456e063587cfed6767e0ee1bff92adb;hb=45bce2c29948649f74ada71f2fa851bdb812e96c;hp=db26a497b7eb66e15107b12b8fcd023f4eddfaa5;hpb=c2cc1241c50c8ff3843002526011574e77f669aa;p=Rakka.git diff --git a/Rakka/Wiki/Parser.hs b/Rakka/Wiki/Parser.hs index db26a49..e7ca8eb 100644 --- a/Rakka/Wiki/Parser.hs +++ b/Rakka/Wiki/Parser.hs @@ -1,68 +1,81 @@ +{-# LANGUAGE + OverloadedStrings + , RankNTypes + , UnicodeSyntax + , ViewPatterns + #-} module Rakka.Wiki.Parser - ( wikiPage + ( CommandTypeOf + , wikiPage ) where - -import Data.Maybe -import Rakka.Wiki -import Text.ParserCombinators.Parsec - - -wikiPage :: Parser WikiPage -wikiPage = do xs <- many (try blockElement) - skipMany ( comment - <|> - (newline >> return ()) - ) - eof - return xs +-- FIXME: use attoparsec +import Control.Applicative hiding ((<|>), many) +import Control.Applicative.Unicode +import Control.Monad +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI +import Data.Maybe +import Data.Monoid.Unicode ((⊕)) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI hiding (fragment) +import Prelude.Unicode +import Rakka.Wiki +import Text.ParserCombinators.Parsec hiding (label) + +type CommandTypeOf = Alternative f ⇒ Text → f CommandType + +wikiPage :: CommandTypeOf -> Parser WikiPage +wikiPage cmdTypeOf + = do xs <- many (blockElement cmdTypeOf) + skipMany ( comment + <|> + (newline >> return ()) + ) + eof + return xs -blockElement :: Parser BlockElement -blockElement = skipMany ( comment +blockElement :: CommandTypeOf -> Parser BlockElement +blockElement cmdTypeOf + = try $ do skipMany ( comment <|> (newline >> return ()) ) - >> - ( heading - <|> - horizontalLine - <|> - listElement - <|> - definitionList - <|> - pdata - <|> - leadingSpaced - <|> - paragraph - ) - - -heading :: Parser BlockElement + foldr (<|>) pzero [ heading + , horizontalLine + , listElement cmdTypeOf + , definitionList cmdTypeOf + , verbatim + , leadingSpaced cmdTypeOf + , paragraph cmdTypeOf + , blockCmd cmdTypeOf + ] + +heading ∷ Parser BlockElement heading = foldr (<|>) pzero (map heading' [1..5]) "heading" where - heading' :: Int -> Parser BlockElement - heading' n = do try $ do count n (char '=') - notFollowedBy (char '=') + heading' ∷ Int → Parser BlockElement + heading' n = do try ( void (count n (char '=')) *> + notFollowedBy (char '=') + ) ws - x <- notFollowedBy (char '=') >> anyChar - xs <- manyTill anyChar (try $ ws >> ( count n (char '=') - - ("trailing " ++ take n (repeat '=')) - ) - ) + x ← notFollowedBy (char '=') *> anyChar + xs ← manyTill anyChar (try $ ws *> ( count n (char '=') + + ("trailing " ++ replicate n '=') + ) + ) ws eol - return (Heading n (x:xs)) - + pure ∘ Heading n $ T.pack (x:xs) horizontalLine :: Parser BlockElement -horizontalLine = try ( do count 4 (char '-') - many (char '-') +horizontalLine = try ( do _ <- count 4 (char '-') + _ <- many (char '-') ws eol return HorizontalLine @@ -71,28 +84,29 @@ horizontalLine = try ( do count 4 (char '-') "horizontal line" -listElement :: Parser BlockElement -listElement = listElement' [] >>= return . List +listElement :: CommandTypeOf -> Parser BlockElement +listElement cmdTypeOf = listElement' [] where - listElement' :: [Char] -> Parser ListElement + listElement' :: [Char] -> Parser BlockElement listElement' stack - = try $ do t <- oneOf "*#" - ws - xs <- items (stack ++ [t]) - return (ListElement (toType t) xs) + = do t <- oneOf "*#" + ws + xs <- items (stack ++ [t]) + return (List (toType t) xs) - -- ListItem の終了條件は、 items :: [Char] -> Parser [ListItem] - items stack = do xs <- many1 inlineElement + items stack = do xs <- many1 $ inlineElement cmdTypeOf nested <- option Nothing - $ try $ do newline - string stack - listElement' stack >>= return . Just + $ try $ do skipMany comment + _ <- newline + _ <- string stack + liftM Just (listElement' stack) rest <- items stack - return $ (map Right xs ++ map Left (catMaybes [nested])) : rest + return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest <|> - (try $ do newline - string stack + (try $ do skipMany comment + _ <- newline + _ <- string stack ws items stack ) @@ -102,15 +116,16 @@ listElement = listElement' [] >>= return . List toType :: Char -> ListType toType '*' = Bullet toType '#' = Numbered + toType _ = undefined -definitionList :: Parser BlockElement -definitionList = many1 definition >>= return . DefinitionList +definitionList :: CommandTypeOf -> Parser BlockElement +definitionList cmdTypeOf = liftM DefinitionList (many1 definition) where definition :: Parser Definition - definition = do char ';' - ws - tHead <- inlineElement + definition = do _ <- char ';' + _ <- ws + tHead <- inlineElement cmdTypeOf tRest <- term d <- description return (Definition (tHead:tRest) d) @@ -122,20 +137,20 @@ definitionList = many1 definition >>= return . DefinitionList <|> (newline >> char ':' >> ws >> return []) <|> - do x <- inlineElement + do x <- inlineElement cmdTypeOf xs <- term return (x:xs) "term to be defined" description :: Parser [InlineElement] - description = do x <- inlineElement + description = do x <- inlineElement cmdTypeOf xs <- description return (x:xs) <|> - try ( do newline - char ':' - ws + try ( do _ <- newline + _ <- char ':' + _ <- ws xs <- description return (Text "\n" : xs) ) @@ -147,71 +162,63 @@ definitionList = many1 definition >>= return . DefinitionList "description of term" -pdata :: Parser BlockElement -pdata = do try (string " + many (oneOf " \t\n") *> + (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim') where - pdata' :: Parser String - pdata' = do try (many (oneOf " \t\n") >> string "]]>") - return [] - <|> - do x <- anyChar - xs <- pdata' - return (x:xs) - - -leadingSpaced :: Parser BlockElement -leadingSpaced = (char ' ' >> leadingSpaced' >>= return . Preformatted) - - "leading space" + verbatim' :: Parser String + verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure [] + <|> + ((:) <$> anyChar ⊛ verbatim') + + +leadingSpaced :: CommandTypeOf -> Parser BlockElement +leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced') + + "leading space" where leadingSpaced' :: Parser [InlineElement] - leadingSpaced' = do x <- inlineElement + leadingSpaced' = do x <- inlineElement cmdTypeOf xs <- leadingSpaced' return (x:xs) <|> - try ( newline - >> - char ' ' - >> - leadingSpaced' - >>= - return . (Text "\n" :) + try ( liftM (Text "\n" :) ( newline + >> + char ' ' + >> + leadingSpaced' + ) ) <|> return [] -blockTag :: Parser BlockElement -blockTag = pzero -- not implemented - - -paragraph :: Parser BlockElement -paragraph = paragraph' >>= return . Paragraph +paragraph :: CommandTypeOf -> Parser BlockElement +paragraph cmdTypeOf = liftM Paragraph paragraph' where paragraph' :: Parser [InlineElement] - paragraph' = do x <- inlineElement - xs <- try ( do newline - eof + paragraph' = do x <- inlineElement cmdTypeOf + xs <- try ( do _ <- newline + _ <- eof return [] -- \n で文字列が終はってゐたら、ここ -- で終了。 ) <|> - try ( do newline - ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) - ((blockTag >> pzero) <|> return ()) + try ( do _ <- newline + _ <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) ys <- (paragraph' <|> return []) return (Text "\n" : ys) - -- \n があり、その次に \n、ブロックタ - -- グまたは blockSymbols があれば、 - -- fail して 最初の newline を讀んだ - -- 所まで卷き戻す。 + -- \n があり、その次に \n または + -- blockSymbols があれば、fail して最 + -- 初の newline を讀んだ所まで卷き戻 + -- す。oneOf が一文字消費しているので、 + -- <|> は右辺を適用せずに try まで戻 + -- る。 ) <|> - try paragraph' + paragraph' -- それ以外の場合は次の inlineElement から -- を讀んで見る。但し一つも無くても良い。 <|> @@ -219,67 +226,104 @@ paragraph = paragraph' >>= return . Paragraph return (x:xs) -inlineElement :: Parser InlineElement -inlineElement = skipMany comment - >> - ( cdata - <|> - apostrophes - <|> - text - <|> - pageLink - ) - - -cdata :: Parser InlineElement -cdata = try (string "> cdata' >>= return . Text +blockCmd :: CommandTypeOf -> Parser BlockElement +blockCmd cmdTypeOf + = (try $ do (tagName, tagAttrs) <- openTag + case cmdTypeOf tagName of + Just BlockCommandType + -> do xs <- contents + closeTag tagName + return $ BlockCmd BlockCommand { + bCmdName = tagName + , bCmdAttributes = tagAttrs + , bCmdContents = xs + } + Just InlineCommandType + -> pzero + _ -> return $ undefinedCmdErr tagName + ) + <|> + (try $ do (tagName, tagAttrs) <- emptyTag + case cmdTypeOf tagName of + Just BlockCommandType + -> return $ BlockCmd BlockCommand { + bCmdName = tagName + , bCmdAttributes = tagAttrs + , bCmdContents = [] + } + Just InlineCommandType + -> pzero + _ -> return $ undefinedCmdErr tagName + ) + + "block command" where - cdata' :: Parser String - cdata' = do try (string "]]>") - return [] - <|> - do x <- anyChar - xs <- cdata' - return (x:xs) - - -text :: Parser InlineElement -text = ( char ':' - >> - many (noneOf ('\n':inlineSymbols)) - >>= - return . Text . (':' :) - -- 定義リストとの關係上、コロンは先頭にしか來れない。 - ) + contents ∷ Parser [BlockElement] + contents = ((:) <$> blockElement cmdTypeOf ⊛ contents) + <|> + (newline *> contents) + <|> + (comment *> contents) + <|> + pure [] + + undefinedCmdErr ∷ Text → BlockElement + undefinedCmdErr name + = Div [("class", "error")] + [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕ + "Make sure you haven't mistyped.") + ]) + ] + +inlineElement :: CommandTypeOf -> Parser InlineElement +inlineElement cmdTypeOf + = try $ do skipMany comment + foldr (<|>) pzero [ nowiki + , apostrophes cmdTypeOf + , text + , objLink + , pageLink + , extLink + , inlineCmd cmdTypeOf + ] + +nowiki ∷ Parser InlineElement +nowiki = Text ∘ T.pack <$> (try (string " nowiki') + where + nowiki' ∷ Parser String + nowiki' = (try (string "]>") *> pure []) + <|> + ((:) <$> anyChar ⊛ nowiki') + +text ∷ Parser InlineElement +text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *> + many (noneOf ('\n':inlineSymbols)) + )) + -- 定義リストとの關係上、コロンは先頭にしか來られない。 <|> - ( many1 (noneOf ('\n':inlineSymbols)) - >>= - return . Text - ) + (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols)))) "text" - -apostrophes :: Parser InlineElement -apostrophes = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5]) +apostrophes :: CommandTypeOf -> Parser InlineElement +apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5]) where apos1 = apos 1 >> return (Text "'") apos2 = do apos 2 - xs <- many1 inlineElement + xs <- many1 $ inlineElement cmdTypeOf apos 2 return (Italic xs) apos3 = do apos 3 - xs <- many1 inlineElement + xs <- many1 $ inlineElement cmdTypeOf apos 3 return (Bold xs) apos4 = apos 4 >> return (Text "'") apos5 = do apos 5 - xs <- many1 inlineElement + xs <- many1 $ inlineElement cmdTypeOf apos 5 return (Italic [Bold xs]) @@ -287,24 +331,125 @@ apostrophes = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5]) apos n = count n (char '\'') >> notFollowedBy (char '\'') -pageLink :: Parser InlineElement -pageLink = do try (string "[[") - page <- option Nothing - (many1 (noneOf "#|]") >>= return . Just) - fragment <- option Nothing - (char '#' >> many1 (noneOf "|]") >>= return . Just) - text <- option Nothing - (char '|' >> many1 (noneOf "]") >>= return . Just) - - case (page, fragment) of - (Nothing, Nothing) -> pzero - (_, _) -> return () - - string "]]" - return $ PageLink page fragment text +objLink ∷ Parser InlineElement +objLink = do void $ try (string "[[[") + page ← many1 (noneOf "|]") + label ← option Nothing $ + Just <$> (char '|' *> many1 (satisfy (≠ ']'))) + void $ string "]]]" + pure $ ObjectLink (T.pack page) (T.pack <$> label) + + "object link" + +pageLink ∷ Parser InlineElement +pageLink = do void $ try (string "[[") + page ← option Nothing $ + Just <$> many1 (noneOf "#|]") + fragment ← option Nothing $ + Just <$> (char '#' *> many1 (noneOf "|]")) + label ← option Nothing $ + Just <$> (char '|' *> many1 (satisfy (≠ ']'))) + when (isNothing page ∧ isNothing fragment) (∅) + void $ string "]]" + pure $ PageLink (T.pack <$> page ) + (T.pack <$> fragment) + (T.pack <$> label ) "page link" +extLink ∷ Parser InlineElement +extLink = do void $ char '[' + uriStr ← many1 (noneOf " \t]") + void $ skipMany (oneOf " \t") + label ← option Nothing $ + Just <$> many1 (noneOf "]") + case parseURI uriStr of + Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label)) + Nothing → pzero "absolute URI" + + "external link" + +inlineCmd ∷ CommandTypeOf → Parser InlineElement +inlineCmd cmdTypeOf + = (try $ do (tagName, tagAttrs) ← openTag + case cmdTypeOf tagName of + Just InlineCommandType + → do xs ← contents + closeTag tagName + pure $ InlineCmd InlineCommand { + iCmdName = tagName + , iCmdAttributes = tagAttrs + , iCmdContents = xs + } + _ → pzero + ) + <|> + (try $ do (tagName, tagAttrs) <- emptyTag + case cmdTypeOf tagName of + Just InlineCommandType + -> return $ InlineCmd InlineCommand { + iCmdName = tagName + , iCmdAttributes = tagAttrs + , iCmdContents = [] + } + _ -> pzero + ) + + "inline command" + where + contents ∷ Parser [InlineElement] + contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents) + <|> + (comment *> contents) + <|> + ((Text "\n" :) <$> (newline *> contents)) + <|> + pure [] + +openTag ∷ Parser (Text, [Attribute]) +openTag = try $ do void $ char '<' + void $ many space + name ← many1 letter + void $ many space + attrs ← many $ do attr ← tagAttr + void $ many space + pure attr + void $ char '>' + return (T.pack name, attrs) + +emptyTag ∷ Parser (Text, [Attribute]) +emptyTag = try $ do void $ char '<' + void $ many space + name ← many1 letter + void $ many space + attrs ← many $ do attr ← tagAttr + void $ many space + pure attr + void $ char '/' + void $ many space + void $ char '>' + return (T.pack name, attrs) + +closeTag ∷ Text → Parser () +closeTag (T.unpack → name) + = try ( char '<' *> + many space *> + char '/' *> + many space *> + string name *> + many space *> + char '>' *> + pure () + ) + +tagAttr ∷ Parser (CI Text, Text) +tagAttr = do name ← many1 letter + void $ char '=' + void $ char '"' + value ← many (satisfy (≠ '"')) + void $ char '"' + return (CI.mk $ T.pack name, T.pack value) + comment :: Parser () comment = (try (string "") >> case level of - 1 -> return () - n -> skipTillEnd (n - 1)) - <|> - (anyChar >> skipTillEnd level) - ) + skipTillEnd level = (try (string "") >> case level of + 1 -> return () + n -> skipTillEnd (n - 1)) + <|> + (anyChar >> skipTillEnd level) blockSymbols :: [Char] -blockSymbols = " =-*#;" +blockSymbols = " =-*#;<" inlineSymbols :: [Char] @@ -338,7 +482,6 @@ ws = skipMany ( (oneOf " \t" >> return ()) -- end of line eol :: Parser () -eol = ( (newline >> return ()) - <|> - eof - ) +eol = (newline >> return ()) + <|> + eof