X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Rakka%2FWiki%2FParser.hs;h=3b3d7c401260b1efe09ec2901f9c6d2885bb2d58;hb=42f51754dea02201aececaacbf194d714cd58aaf;hp=0433612ba5746bcd98312f5993b6847831336717;hpb=7c3065043cdfbd96539a9bf6bff9b1d4281c0b2a;p=Rakka.git diff --git a/Rakka/Wiki/Parser.hs b/Rakka/Wiki/Parser.hs index 0433612..3b3d7c4 100644 --- a/Rakka/Wiki/Parser.hs +++ b/Rakka/Wiki/Parser.hs @@ -1,16 +1,30 @@ +{-# LANGUAGE + OverloadedStrings + , RankNTypes + , UnicodeSyntax + , ViewPatterns + #-} module Rakka.Wiki.Parser ( CommandTypeOf , wikiPage ) where - -import Data.Maybe -import Rakka.Wiki -import Text.ParserCombinators.Parsec - - -type CommandTypeOf = String -> Maybe CommandType - +-- FIXME: use attoparsec +import Control.Applicative hiding ((<|>), many) +import Control.Applicative.Unicode +import Control.Monad +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI +import Data.Maybe +import Data.Monoid.Unicode ((⊕)) +import Data.Text (Text) +import qualified Data.Text as T +import Network.URI hiding (fragment) +import Prelude.Unicode +import Rakka.Wiki +import Text.ParserCombinators.Parsec hiding (label) + +type CommandTypeOf = Alternative f ⇒ Text → f CommandType wikiPage :: CommandTypeOf -> Parser WikiPage wikiPage cmdTypeOf @@ -33,36 +47,35 @@ blockElement cmdTypeOf , horizontalLine , listElement cmdTypeOf , definitionList cmdTypeOf - , pdata + , verbatim , leadingSpaced cmdTypeOf , paragraph cmdTypeOf , blockCmd cmdTypeOf ] - -heading :: Parser BlockElement +heading ∷ Parser BlockElement heading = foldr (<|>) pzero (map heading' [1..5]) "heading" where - heading' :: Int -> Parser BlockElement - heading' n = do try $ do count n (char '=') - notFollowedBy (char '=') + heading' ∷ Int → Parser BlockElement + heading' n = do try ( void (count n (char '=')) *> + notFollowedBy (char '=') + ) ws - x <- notFollowedBy (char '=') >> anyChar - xs <- manyTill anyChar (try $ ws >> ( count n (char '=') - - ("trailing " ++ take n (repeat '=')) - ) - ) + x ← notFollowedBy (char '=') *> anyChar + xs ← manyTill anyChar (try $ ws *> ( count n (char '=') + + ("trailing " ++ replicate n '=') + ) + ) ws eol - return (Heading n (x:xs)) - + pure ∘ Heading n $ T.pack (x:xs) horizontalLine :: Parser BlockElement -horizontalLine = try ( do count 4 (char '-') - many (char '-') +horizontalLine = try ( do _ <- count 4 (char '-') + _ <- many (char '-') ws eol return HorizontalLine @@ -72,29 +85,28 @@ horizontalLine = try ( do count 4 (char '-') listElement :: CommandTypeOf -> Parser BlockElement -listElement cmdTypeOf = listElement' [] >>= return . List +listElement cmdTypeOf = listElement' [] where - listElement' :: [Char] -> Parser ListElement + listElement' :: [Char] -> Parser BlockElement listElement' stack = do t <- oneOf "*#" ws xs <- items (stack ++ [t]) - return (ListElement (toType t) xs) + return (List (toType t) xs) - -- ListItem の終了條件は、 items :: [Char] -> Parser [ListItem] items stack = do xs <- many1 $ inlineElement cmdTypeOf nested <- option Nothing $ try $ do skipMany comment - newline - string stack - listElement' stack >>= return . Just + _ <- newline + _ <- string stack + liftM Just (listElement' stack) rest <- items stack - return $ (map Right xs ++ map Left (catMaybes [nested])) : rest + return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest <|> (try $ do skipMany comment - newline - string stack + _ <- newline + _ <- string stack ws items stack ) @@ -104,14 +116,15 @@ listElement cmdTypeOf = listElement' [] >>= return . List toType :: Char -> ListType toType '*' = Bullet toType '#' = Numbered + toType _ = undefined definitionList :: CommandTypeOf -> Parser BlockElement -definitionList cmdTypeOf = many1 definition >>= return . DefinitionList +definitionList cmdTypeOf = liftM DefinitionList (many1 definition) where definition :: Parser Definition - definition = do char ';' - ws + definition = do _ <- char ';' + _ <- ws tHead <- inlineElement cmdTypeOf tRest <- term d <- description @@ -135,9 +148,9 @@ definitionList cmdTypeOf = many1 definition >>= return . DefinitionList xs <- description return (x:xs) <|> - try ( do newline - char ':' - ws + try ( do _ <- newline + _ <- char ':' + _ <- ws xs <- description return (Text "\n" : xs) ) @@ -149,23 +162,19 @@ definitionList cmdTypeOf = many1 definition >>= return . DefinitionList "description of term" -pdata :: Parser BlockElement -pdata = do try (string " + many (oneOf " \t\n") *> + (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim') where - pdata' :: Parser String - pdata' = do try (many (oneOf " \t\n") >> string "]]>") - return [] - <|> - do x <- anyChar - xs <- pdata' - return (x:xs) + verbatim' :: Parser String + verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure [] + <|> + ((:) <$> anyChar ⊛ verbatim') leadingSpaced :: CommandTypeOf -> Parser BlockElement -leadingSpaced cmdTypeOf = (char ' ' >> leadingSpaced' >>= return . Preformatted) +leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced') "leading space" where @@ -174,42 +183,43 @@ leadingSpaced cmdTypeOf = (char ' ' >> leadingSpaced' >>= return . Preformatted) xs <- leadingSpaced' return (x:xs) <|> - try ( newline - >> - char ' ' - >> - leadingSpaced' - >>= - return . (Text "\n" :) + try ( liftM (Text "\n" :) ( newline + >> + char ' ' + >> + leadingSpaced' + ) ) <|> return [] -blockCommand :: Parser BlockElement -blockCommand = pzero -- not implemented - - paragraph :: CommandTypeOf -> Parser BlockElement -paragraph cmdTypeOf = paragraph' >>= return . Paragraph +paragraph cmdTypeOf = liftM Paragraph paragraph' where paragraph' :: Parser [InlineElement] paragraph' = do x <- inlineElement cmdTypeOf - xs <- try ( do newline - eof + xs <- try ( do _ <- newline + _ <- eof return [] -- \n で文字列が終はってゐたら、ここ -- で終了。 ) <|> - try ( do newline - ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) + try ( do _ <- newline + _ <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) ys <- (paragraph' <|> return []) return (Text "\n" : ys) -- \n があり、その次に \n または -- blockSymbols があれば、fail して -- 最初の newline を讀んだ所まで卷き -- 戻す。 + + -- FIXME: 本當にそのやうな動作になつ + -- てゐるか?偶然動いてゐるだけではな + -- いか?確かにこの實裝でユニットテス + -- トは通るのだが、私の理解を越えてし + -- まったやうだ。 ) <|> paragraph' @@ -227,79 +237,78 @@ blockCmd cmdTypeOf Just BlockCommandType -> do xs <- contents closeTag tagName - return $ BlockCmd $ BlockCommand { + return $ BlockCmd BlockCommand { bCmdName = tagName , bCmdAttributes = tagAttrs , bCmdContents = xs } - _ -> pzero + Just InlineCommandType + -> pzero + _ -> return $ undefinedCmdErr tagName ) <|> (try $ do (tagName, tagAttrs) <- emptyTag case cmdTypeOf tagName of Just BlockCommandType - -> return $ BlockCmd $ BlockCommand { + -> return $ BlockCmd BlockCommand { bCmdName = tagName , bCmdAttributes = tagAttrs , bCmdContents = [] } - _ -> pzero + Just InlineCommandType + -> pzero + _ -> return $ undefinedCmdErr tagName ) "block command" where - contents :: Parser [BlockElement] - contents = do x <- blockElement cmdTypeOf - xs <- contents - return (x:xs) + contents ∷ Parser [BlockElement] + contents = ((:) <$> blockElement cmdTypeOf ⊛ contents) <|> - (newline >> contents) + (newline *> contents) <|> - (comment >> contents) + (comment *> contents) <|> - return [] + pure [] + undefinedCmdErr ∷ Text → BlockElement + undefinedCmdErr name + = Div [("class", "error")] + [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕ + "Make sure you haven't mistyped.") + ]) + ] inlineElement :: CommandTypeOf -> Parser InlineElement inlineElement cmdTypeOf = try $ do skipMany comment - foldr (<|>) pzero [ cdata + foldr (<|>) pzero [ nowiki , apostrophes cmdTypeOf , text + , objLink , pageLink + , extLink , inlineCmd cmdTypeOf ] - -cdata :: Parser InlineElement -cdata = try (string "> cdata' >>= return . Text +nowiki ∷ Parser InlineElement +nowiki = Text ∘ T.pack <$> (try (string " nowiki') where - cdata' :: Parser String - cdata' = do try (string "]]>") - return [] - <|> - do x <- anyChar - xs <- cdata' - return (x:xs) - - -text :: Parser InlineElement -text = ( char ':' - >> - many (noneOf ('\n':inlineSymbols)) - >>= - return . Text . (':' :) - -- 定義リストとの關係上、コロンは先頭にしか來れない。 - ) + nowiki' ∷ Parser String + nowiki' = (try (string "]>") *> pure []) + <|> + ((:) <$> anyChar ⊛ nowiki') + +text ∷ Parser InlineElement +text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *> + many (noneOf ('\n':inlineSymbols)) + )) + -- 定義リストとの關係上、コロンは先頭にしか來られない。 <|> - ( many1 (noneOf ('\n':inlineSymbols)) - >>= - return . Text - ) + (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols)))) "text" - apostrophes :: CommandTypeOf -> Parser InlineElement apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5]) where @@ -326,44 +335,63 @@ apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos n = count n (char '\'') >> notFollowedBy (char '\'') -pageLink :: Parser InlineElement -pageLink = do try (string "[[") - page <- option Nothing - (many1 (noneOf "#|]") >>= return . Just) - fragment <- option Nothing - (char '#' >> many1 (noneOf "|]") >>= return . Just) - text <- option Nothing - (char '|' >> many1 (satisfy (/= ']')) >>= return . Just) - - case (page, fragment) of - (Nothing, Nothing) -> pzero - (_, _) -> return () - - string "]]" - return $ PageLink page fragment text +objLink ∷ Parser InlineElement +objLink = do void $ try (string "[[[") + page ← many1 (noneOf "|]") + label ← option Nothing $ + Just <$> (char '|' *> many1 (satisfy (≠ ']'))) + void $ string "]]]" + pure $ ObjectLink (T.pack page) (T.pack <$> label) + + "object link" + +pageLink ∷ Parser InlineElement +pageLink = do void $ try (string "[[") + page ← option Nothing $ + Just <$> many1 (noneOf "#|]") + fragment ← option Nothing $ + Just <$> (char '#' *> many1 (noneOf "|]")) + label ← option Nothing $ + Just <$> (char '|' *> many1 (satisfy (≠ ']'))) + when (isNothing page ∧ isNothing fragment) (∅) + void $ string "]]" + pure $ PageLink (T.pack <$> page ) + (T.pack <$> fragment) + (T.pack <$> label ) "page link" +extLink ∷ Parser InlineElement +extLink = do void $ char '[' + uriStr ← many1 (noneOf " \t]") + void $ skipMany (oneOf " \t") + label ← option Nothing $ + Just <$> many1 (noneOf "]") + case parseURI uriStr of + Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label)) + Nothing → pzero "absolute URI" + + "external link" -inlineCmd :: CommandTypeOf -> Parser InlineElement +inlineCmd ∷ CommandTypeOf → Parser InlineElement inlineCmd cmdTypeOf - = (try $ do (tagName, tagAttrs) <- openTag + = (try $ do (tagName, tagAttrs) ← openTag case cmdTypeOf tagName of Just InlineCommandType - -> do xs <- contents - closeTag tagName - return $ InlineCmd $ InlineCommand { + → do xs ← contents + closeTag tagName + pure $ InlineCmd InlineCommand { iCmdName = tagName , iCmdAttributes = tagAttrs , iCmdContents = xs } - _ -> pzero + _ → pzero ) <|> (try $ do (tagName, tagAttrs) <- emptyTag case cmdTypeOf tagName of Just InlineCommandType - -> return $ InlineCmd $ InlineCommand { + -> return $ InlineCmd InlineCommand { iCmdName = tagName , iCmdAttributes = tagAttrs , iCmdContents = [] @@ -373,62 +401,58 @@ inlineCmd cmdTypeOf "inline command" where - contents :: Parser [InlineElement] - contents = do x <- inlineElement cmdTypeOf - xs <- contents - return (x:xs) + contents ∷ Parser [InlineElement] + contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents) <|> - (comment >> contents) + (comment *> contents) <|> - (newline >> contents >>= return . (Text "\n" :)) + ((Text "\n" :) <$> (newline *> contents)) <|> - return [] - - -openTag :: Parser (String, [Attribute]) -openTag = try $ do char '<' - many space - name <- many1 letter - many space - attrs <- many $ do attr <- tagAttr - many space - return attr - char '>' - return (name, attrs) - - -emptyTag :: Parser (String, [Attribute]) -emptyTag = try $ do char '<' - many space - name <- many1 letter - many space - attrs <- many $ do attr <- tagAttr - many space - return attr - char '/' - many space - char '>' - return (name, attrs) - - -closeTag :: String -> Parser () -closeTag name = try $ do char '<' - many space - char '/' - many space - string name - many space - char '>' - return () - - -tagAttr :: Parser (String, String) -tagAttr = do name <- many1 letter - char '=' - char '"' - value <- many (satisfy (/= '"')) - char '"' - return (name, value) + pure [] + +openTag ∷ Parser (Text, [Attribute]) +openTag = try $ do void $ char '<' + void $ many space + name ← many1 letter + void $ many space + attrs ← many $ do attr ← tagAttr + void $ many space + pure attr + void $ char '>' + return (T.pack name, attrs) + +emptyTag ∷ Parser (Text, [Attribute]) +emptyTag = try $ do void $ char '<' + void $ many space + name ← many1 letter + void $ many space + attrs ← many $ do attr ← tagAttr + void $ many space + pure attr + void $ char '/' + void $ many space + void $ char '>' + return (T.pack name, attrs) + +closeTag ∷ Text → Parser () +closeTag (T.unpack → name) + = try ( char '<' *> + many space *> + char '/' *> + many space *> + string name *> + many space *> + char '>' *> + pure () + ) + +tagAttr ∷ Parser (CI Text, Text) +tagAttr = do name ← many1 letter + void $ char '=' + void $ char '"' + value ← many (satisfy (≠ '"')) + void $ char '"' + return (CI.mk $ T.pack name, T.pack value) comment :: Parser () @@ -437,14 +461,13 @@ comment = (try (string "") >> case level of - 1 -> return () - n -> skipTillEnd (n - 1)) - <|> - (anyChar >> skipTillEnd level) - ) + skipTillEnd level = (try (string "") >> case level of + 1 -> return () + n -> skipTillEnd (n - 1)) + <|> + (anyChar >> skipTillEnd level) blockSymbols :: [Char] @@ -463,7 +486,6 @@ ws = skipMany ( (oneOf " \t" >> return ()) -- end of line eol :: Parser () -eol = ( (newline >> return ()) - <|> - eof - ) +eol = (newline >> return ()) + <|> + eof