{-# LANGUAGE OverloadedStrings , RankNTypes , UnicodeSyntax , ViewPatterns #-} module Rakka.Wiki.Parser ( CommandTypeOf , wikiPage ) where -- FIXME: use attoparsec import Control.Applicative hiding ((<|>), many) import Control.Applicative.Unicode import Control.Monad import Data.CaseInsensitive (CI) import qualified Data.CaseInsensitive as CI import Data.Maybe import Data.Monoid.Unicode ((⊕)) import Data.Text (Text) import qualified Data.Text as T import Network.URI hiding (fragment) import Prelude.Unicode import Rakka.Wiki import Text.ParserCombinators.Parsec hiding (label) type CommandTypeOf = Alternative f ⇒ Text → f CommandType wikiPage :: CommandTypeOf -> Parser WikiPage wikiPage cmdTypeOf = do xs <- many (blockElement cmdTypeOf) skipMany ( comment <|> (newline >> return ()) ) eof return xs blockElement :: CommandTypeOf -> Parser BlockElement blockElement cmdTypeOf = try $ do skipMany ( comment <|> (newline >> return ()) ) foldr (<|>) pzero [ heading , horizontalLine , listElement cmdTypeOf , definitionList cmdTypeOf , verbatim , leadingSpaced cmdTypeOf , paragraph cmdTypeOf , blockCmd cmdTypeOf ] heading ∷ Parser BlockElement heading = foldr (<|>) pzero (map heading' [1..5]) "heading" where heading' ∷ Int → Parser BlockElement heading' n = do try ( void (count n (char '=')) *> notFollowedBy (char '=') ) ws x ← notFollowedBy (char '=') *> anyChar xs ← manyTill anyChar (try $ ws *> ( count n (char '=') ("trailing " ++ replicate n '=') ) ) ws eol pure ∘ Heading n $ T.pack (x:xs) horizontalLine :: Parser BlockElement horizontalLine = try ( do _ <- count 4 (char '-') _ <- many (char '-') ws eol return HorizontalLine ) "horizontal line" listElement :: CommandTypeOf -> Parser BlockElement listElement cmdTypeOf = listElement' [] where listElement' :: [Char] -> Parser BlockElement listElement' stack = do t <- oneOf "*#" ws xs <- items (stack ++ [t]) return (List (toType t) xs) items :: [Char] -> Parser [ListItem] items stack = do xs <- many1 $ inlineElement cmdTypeOf nested <- option Nothing $ try $ do skipMany comment _ <- newline _ <- string stack liftM Just (listElement' stack) rest <- items stack return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest <|> (try $ do skipMany comment _ <- newline _ <- string stack ws items stack ) <|> return [] toType :: Char -> ListType toType '*' = Bullet toType '#' = Numbered toType _ = undefined definitionList :: CommandTypeOf -> Parser BlockElement definitionList cmdTypeOf = liftM DefinitionList (many1 definition) where definition :: Parser Definition definition = do _ <- char ';' _ <- ws tHead <- inlineElement cmdTypeOf tRest <- term d <- description return (Definition (tHead:tRest) d) "definition list" term :: Parser [InlineElement] term = (char ':' >> ws >> return []) <|> (newline >> char ':' >> ws >> return []) <|> do x <- inlineElement cmdTypeOf xs <- term return (x:xs) "term to be defined" description :: Parser [InlineElement] description = do x <- inlineElement cmdTypeOf xs <- description return (x:xs) <|> try ( do _ <- newline _ <- char ':' _ <- ws xs <- description return (Text "\n" : xs) ) <|> (newline >> return []) <|> (eof >> return []) "description of term" verbatim ∷ Parser BlockElement verbatim = try (string " many (oneOf " \t\n") *> (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim') where verbatim' :: Parser String verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure [] <|> ((:) <$> anyChar ⊛ verbatim') leadingSpaced :: CommandTypeOf -> Parser BlockElement leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced') "leading space" where leadingSpaced' :: Parser [InlineElement] leadingSpaced' = do x <- inlineElement cmdTypeOf xs <- leadingSpaced' return (x:xs) <|> try ( liftM (Text "\n" :) ( newline >> char ' ' >> leadingSpaced' ) ) <|> return [] paragraph :: CommandTypeOf -> Parser BlockElement paragraph cmdTypeOf = liftM Paragraph paragraph' where paragraph' :: Parser [InlineElement] paragraph' = do x <- inlineElement cmdTypeOf xs <- try ( do _ <- newline _ <- eof return [] -- \n で文字列が終はってゐたら、ここ -- で終了。 ) <|> try ( do _ <- newline _ <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ()) ys <- (paragraph' <|> return []) return (Text "\n" : ys) -- \n があり、その次に \n または -- blockSymbols があれば、fail して最 -- 初の newline を讀んだ所まで卷き戻 -- す。oneOf が一文字消費しているので、 -- <|> は右辺を適用せずに try まで戻 -- る。 ) <|> paragraph' -- それ以外の場合は次の inlineElement から -- を讀んで見る。但し一つも無くても良い。 <|> return [] -- 全部失敗したらここで終了。 return (x:xs) blockCmd :: CommandTypeOf -> Parser BlockElement blockCmd cmdTypeOf = (try $ do (tagName, tagAttrs) <- openTag case cmdTypeOf tagName of Just BlockCommandType -> do xs <- contents closeTag tagName return $ BlockCmd BlockCommand { bCmdName = tagName , bCmdAttributes = tagAttrs , bCmdContents = xs } Just InlineCommandType -> pzero _ -> return $ undefinedCmdErr tagName ) <|> (try $ do (tagName, tagAttrs) <- emptyTag case cmdTypeOf tagName of Just BlockCommandType -> return $ BlockCmd BlockCommand { bCmdName = tagName , bCmdAttributes = tagAttrs , bCmdContents = [] } Just InlineCommandType -> pzero _ -> return $ undefinedCmdErr tagName ) "block command" where contents ∷ Parser [BlockElement] contents = ((:) <$> blockElement cmdTypeOf ⊛ contents) <|> (newline *> contents) <|> (comment *> contents) <|> pure [] undefinedCmdErr ∷ Text → BlockElement undefinedCmdErr name = Div [("class", "error")] [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕ "Make sure you haven't mistyped.") ]) ] inlineElement :: CommandTypeOf -> Parser InlineElement inlineElement cmdTypeOf = try $ do skipMany comment foldr (<|>) pzero [ nowiki , apostrophes cmdTypeOf , text , objLink , pageLink , extLink , inlineCmd cmdTypeOf ] nowiki ∷ Parser InlineElement nowiki = Text ∘ T.pack <$> (try (string " nowiki') where nowiki' ∷ Parser String nowiki' = (try (string "]>") *> pure []) <|> ((:) <$> anyChar ⊛ nowiki') text ∷ Parser InlineElement text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *> many (noneOf ('\n':inlineSymbols)) )) -- 定義リストとの關係上、コロンは先頭にしか來られない。 <|> (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols)))) "text" apostrophes :: CommandTypeOf -> Parser InlineElement apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5]) where apos1 = apos 1 >> return (Text "'") apos2 = do apos 2 xs <- many1 $ inlineElement cmdTypeOf apos 2 return (Italic xs) apos3 = do apos 3 xs <- many1 $ inlineElement cmdTypeOf apos 3 return (Bold xs) apos4 = apos 4 >> return (Text "'") apos5 = do apos 5 xs <- many1 $ inlineElement cmdTypeOf apos 5 return (Italic [Bold xs]) apos :: Int -> Parser () apos n = count n (char '\'') >> notFollowedBy (char '\'') objLink ∷ Parser InlineElement objLink = do void $ try (string "[[[") page ← many1 (noneOf "|]") label ← option Nothing $ Just <$> (char '|' *> many1 (satisfy (≠ ']'))) void $ string "]]]" pure $ ObjectLink (T.pack page) (T.pack <$> label) "object link" pageLink ∷ Parser InlineElement pageLink = do void $ try (string "[[") page ← option Nothing $ Just <$> many1 (noneOf "#|]") fragment ← option Nothing $ Just <$> (char '#' *> many1 (noneOf "|]")) label ← option Nothing $ Just <$> (char '|' *> many1 (satisfy (≠ ']'))) when (isNothing page ∧ isNothing fragment) (∅) void $ string "]]" pure $ PageLink (T.pack <$> page ) (T.pack <$> fragment) (T.pack <$> label ) "page link" extLink ∷ Parser InlineElement extLink = do void $ char '[' uriStr ← many1 (noneOf " \t]") void $ skipMany (oneOf " \t") label ← option Nothing $ Just <$> many1 (noneOf "]") case parseURI uriStr of Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label)) Nothing → pzero "absolute URI" "external link" inlineCmd ∷ CommandTypeOf → Parser InlineElement inlineCmd cmdTypeOf = (try $ do (tagName, tagAttrs) ← openTag case cmdTypeOf tagName of Just InlineCommandType → do xs ← contents closeTag tagName pure $ InlineCmd InlineCommand { iCmdName = tagName , iCmdAttributes = tagAttrs , iCmdContents = xs } _ → pzero ) <|> (try $ do (tagName, tagAttrs) <- emptyTag case cmdTypeOf tagName of Just InlineCommandType -> return $ InlineCmd InlineCommand { iCmdName = tagName , iCmdAttributes = tagAttrs , iCmdContents = [] } _ -> pzero ) "inline command" where contents ∷ Parser [InlineElement] contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents) <|> (comment *> contents) <|> ((Text "\n" :) <$> (newline *> contents)) <|> pure [] openTag ∷ Parser (Text, [Attribute]) openTag = try $ do void $ char '<' void $ many space name ← many1 letter void $ many space attrs ← many $ do attr ← tagAttr void $ many space pure attr void $ char '>' return (T.pack name, attrs) emptyTag ∷ Parser (Text, [Attribute]) emptyTag = try $ do void $ char '<' void $ many space name ← many1 letter void $ many space attrs ← many $ do attr ← tagAttr void $ many space pure attr void $ char '/' void $ many space void $ char '>' return (T.pack name, attrs) closeTag ∷ Text → Parser () closeTag (T.unpack → name) = try ( char '<' *> many space *> char '/' *> many space *> string name *> many space *> char '>' *> pure () ) tagAttr ∷ Parser (CI Text, Text) tagAttr = do name ← many1 letter void $ char '=' void $ char '"' value ← many (satisfy (≠ '"')) void $ char '"' return (CI.mk $ T.pack name, T.pack value) comment :: Parser () comment = (try (string "") >> case level of 1 -> return () n -> skipTillEnd (n - 1)) <|> (anyChar >> skipTillEnd level) blockSymbols :: [Char] blockSymbols = " =-*#;<" inlineSymbols :: [Char] inlineSymbols = "<[:'" -- white space ws :: Parser () ws = skipMany ( (oneOf " \t" >> return ()) <|> comment ) -- end of line eol :: Parser () eol = (newline >> return ()) <|> eof