+{-# LANGUAGE
+ OverloadedStrings
+ , RankNTypes
+ , UnicodeSyntax
+ , ViewPatterns
+ #-}
module Rakka.Wiki.Parser
- ( wikiPage
+ ( CommandTypeOf
+ , wikiPage
)
where
-
-import Data.Maybe
-import Rakka.Wiki
-import Text.ParserCombinators.Parsec
-
-
-wikiPage :: Parser WikiPage
-wikiPage = do xs <- many (try blockElement)
- skipMany ( comment
- <|>
- (newline >> return ())
- )
- eof
- return xs
+-- FIXME: use attoparsec
+import Control.Applicative hiding ((<|>), many)
+import Control.Applicative.Unicode
+import Control.Monad
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
+import Data.Maybe
+import Data.Monoid.Unicode ((⊕))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI hiding (fragment)
+import Prelude.Unicode
+import Rakka.Wiki
+import Text.ParserCombinators.Parsec hiding (label)
+
+type CommandTypeOf = Alternative f ⇒ Text → f CommandType
+
+wikiPage :: CommandTypeOf -> Parser WikiPage
+wikiPage cmdTypeOf
+ = do xs <- many (blockElement cmdTypeOf)
+ skipMany ( comment
+ <|>
+ (newline >> return ())
+ )
+ eof
+ return xs
-blockElement :: Parser BlockElement
-blockElement = skipMany ( comment
+blockElement :: CommandTypeOf -> Parser BlockElement
+blockElement cmdTypeOf
+ = try $ do skipMany ( comment
<|>
(newline >> return ())
)
- >>
- ( heading
- <|>
- horizontalLine
- <|>
- listElement
- <|>
- leadingSpaced
- <|>
- paragraph
- )
-
-
-heading :: Parser BlockElement
+ foldr (<|>) pzero [ heading
+ , horizontalLine
+ , listElement cmdTypeOf
+ , definitionList cmdTypeOf
+ , verbatim
+ , leadingSpaced cmdTypeOf
+ , paragraph cmdTypeOf
+ , blockCmd cmdTypeOf
+ ]
+
+heading ∷ Parser BlockElement
heading = foldr (<|>) pzero (map heading' [1..5])
<?>
"heading"
where
- heading' :: Int -> Parser BlockElement
- heading' n = do try $ do count n (char '=')
- notFollowedBy (char '=')
+ heading' ∷ Int → Parser BlockElement
+ heading' n = do try ( void (count n (char '=')) *>
+ notFollowedBy (char '=')
+ )
ws
- x <- notFollowedBy (char '=') >> anyChar
- xs <- manyTill anyChar (try $ ws >> ( count n (char '=')
- <?>
- ("trailing " ++ take n (repeat '='))
- )
- )
+ x ← notFollowedBy (char '=') *> anyChar
+ xs ← manyTill anyChar (try $ ws *> ( count n (char '=')
+ <?>
+ ("trailing " ++ replicate n '=')
+ )
+ )
ws
eol
- return (Heading n (x:xs))
-
+ pure ∘ Heading n $ T.pack (x:xs)
horizontalLine :: Parser BlockElement
-horizontalLine = try $ do count 4 (char '-')
- many (char '-')
+horizontalLine = try ( do _ <- count 4 (char '-')
+ _ <- many (char '-')
ws
eol
return HorizontalLine
+ )
+ <?>
+ "horizontal line"
-listElement :: Parser BlockElement
-listElement = listElement' [] >>= return . List
+listElement :: CommandTypeOf -> Parser BlockElement
+listElement cmdTypeOf = listElement' []
where
- listElement' :: [Char] -> Parser ListElement
+ listElement' :: [Char] -> Parser BlockElement
listElement' stack
- = try $ do t <- oneOf "*#"
- ws
- xs <- items (stack ++ [t])
- return (ListElement (toType t) xs)
+ = do t <- oneOf "*#"
+ ws
+ xs <- items (stack ++ [t])
+ return (List (toType t) xs)
- -- ListItem の終了條件は、
items :: [Char] -> Parser [ListItem]
- items stack = do xs <- many1 inlineElement
+ items stack = do xs <- many1 $ inlineElement cmdTypeOf
nested <- option Nothing
- $ try $ do newline
- string stack
- listElement' stack >>= return . Just
+ $ try $ do skipMany comment
+ _ <- newline
+ _ <- string stack
+ liftM Just (listElement' stack)
rest <- items stack
- return $ (map Right xs ++ map Left (catMaybes [nested])) : rest
+ return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest
<|>
- (try $ do newline
- string stack
+ (try $ do skipMany comment
+ _ <- newline
+ _ <- string stack
ws
items stack
)
<|>
return []
-{-
- items stack = do nested <- listElement' stack
- rest <- items stack
- return (Left nested : rest)
- <|>
- do xs <- many1 inlineElement
- rest <- items stack
- return (Right xs : rest)
- <|>
- try ( newline
- >>
- string stack
- >>
- items stack
- )
- <|>
- return []
--}
toType :: Char -> ListType
toType '*' = Bullet
toType '#' = Numbered
+ toType _ = undefined
+
+
+definitionList :: CommandTypeOf -> Parser BlockElement
+definitionList cmdTypeOf = liftM DefinitionList (many1 definition)
+ where
+ definition :: Parser Definition
+ definition = do _ <- char ';'
+ _ <- ws
+ tHead <- inlineElement cmdTypeOf
+ tRest <- term
+ d <- description
+ return (Definition (tHead:tRest) d)
+ <?>
+ "definition list"
+
+ term :: Parser [InlineElement]
+ term = (char ':' >> ws >> return [])
+ <|>
+ (newline >> char ':' >> ws >> return [])
+ <|>
+ do x <- inlineElement cmdTypeOf
+ xs <- term
+ return (x:xs)
+ <?>
+ "term to be defined"
+
+ description :: Parser [InlineElement]
+ description = do x <- inlineElement cmdTypeOf
+ xs <- description
+ return (x:xs)
+ <|>
+ try ( do _ <- newline
+ _ <- char ':'
+ _ <- ws
+ xs <- description
+ return (Text "\n" : xs)
+ )
+ <|>
+ (newline >> return [])
+ <|>
+ (eof >> return [])
+ <?>
+ "description of term"
+
+
+verbatim ∷ Parser BlockElement
+verbatim = try (string "<!verbatim[") *>
+ many (oneOf " \t\n") *>
+ (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim')
+ where
+ verbatim' :: Parser String
+ verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure []
+ <|>
+ ((:) <$> anyChar ⊛ verbatim')
-leadingSpaced :: Parser BlockElement
-leadingSpaced = char ' ' >> leadingSpaced' >>= return . LeadingSpaced
+leadingSpaced :: CommandTypeOf -> Parser BlockElement
+leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced')
+ <?>
+ "leading space"
where
leadingSpaced' :: Parser [InlineElement]
- leadingSpaced' = do x <- inlineElement
+ leadingSpaced' = do x <- inlineElement cmdTypeOf
xs <- leadingSpaced'
return (x:xs)
<|>
- try ( newline
- >>
- char ' '
- >>
- leadingSpaced'
- >>=
- return . (Text "\n" :)
+ try ( liftM (Text "\n" :) ( newline
+ >>
+ char ' '
+ >>
+ leadingSpaced'
+ )
)
<|>
return []
-blockTag :: Parser BlockElement
-blockTag = pzero -- not implemented
-
-
-paragraph :: Parser BlockElement
-paragraph = paragraph' >>= return . Paragraph
+paragraph :: CommandTypeOf -> Parser BlockElement
+paragraph cmdTypeOf = liftM Paragraph paragraph'
where
paragraph' :: Parser [InlineElement]
- paragraph' = do x <- inlineElement
- xs <- try ( do newline
- eof
+ paragraph' = do x <- inlineElement cmdTypeOf
+ xs <- try ( do _ <- newline
+ _ <- eof
return []
-- \n で文字列が終はってゐたら、ここ
-- で終了。
)
<|>
- try ( do newline
- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
- ((blockTag >> pzero) <|> return ())
+ try ( do _ <- newline
+ _ <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
ys <- (paragraph' <|> return [])
return (Text "\n" : ys)
- -- \n があり、その次に \n、ブロックタ
- -- グまたは blockSymbols があれば、
- -- fail して 最初の newline を讀んだ
- -- 所まで卷き戻す。
+ -- \n があり、その次に \n または
+ -- blockSymbols があれば、fail して
+ -- 最初の newline を讀んだ所まで卷き
+ -- 戻す。
+
+ -- FIXME: 本當にそのやうな動作になつ
+ -- てゐるか?偶然動いてゐるだけではな
+ -- いか?確かにこの實裝でユニットテス
+ -- トは通るのだが、私の理解を越えてし
+ -- まったやうだ。
)
<|>
- try paragraph'
+ paragraph'
-- それ以外の場合は次の inlineElement から
-- を讀んで見る。但し一つも無くても良い。
<|>
return (x:xs)
-inlineElement :: Parser InlineElement
-inlineElement = skipMany comment
- >>
- ( try text
- <|>
- try pageLink
- )
+blockCmd :: CommandTypeOf -> Parser BlockElement
+blockCmd cmdTypeOf
+ = (try $ do (tagName, tagAttrs) <- openTag
+ case cmdTypeOf tagName of
+ Just BlockCommandType
+ -> do xs <- contents
+ closeTag tagName
+ return $ BlockCmd BlockCommand {
+ bCmdName = tagName
+ , bCmdAttributes = tagAttrs
+ , bCmdContents = xs
+ }
+ Just InlineCommandType
+ -> pzero
+ _ -> return $ undefinedCmdErr tagName
+ )
+ <|>
+ (try $ do (tagName, tagAttrs) <- emptyTag
+ case cmdTypeOf tagName of
+ Just BlockCommandType
+ -> return $ BlockCmd BlockCommand {
+ bCmdName = tagName
+ , bCmdAttributes = tagAttrs
+ , bCmdContents = []
+ }
+ Just InlineCommandType
+ -> pzero
+ _ -> return $ undefinedCmdErr tagName
+ )
+ <?>
+ "block command"
+ where
+ contents ∷ Parser [BlockElement]
+ contents = ((:) <$> blockElement cmdTypeOf ⊛ contents)
+ <|>
+ (newline *> contents)
+ <|>
+ (comment *> contents)
+ <|>
+ pure []
+
+ undefinedCmdErr ∷ Text → BlockElement
+ undefinedCmdErr name
+ = Div [("class", "error")]
+ [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕
+ "Make sure you haven't mistyped.")
+ ])
+ ]
+
+inlineElement :: CommandTypeOf -> Parser InlineElement
+inlineElement cmdTypeOf
+ = try $ do skipMany comment
+ foldr (<|>) pzero [ nowiki
+ , apostrophes cmdTypeOf
+ , text
+ , objLink
+ , pageLink
+ , extLink
+ , inlineCmd cmdTypeOf
+ ]
+
+nowiki ∷ Parser InlineElement
+nowiki = Text ∘ T.pack <$> (try (string "<!nowiki[") *> nowiki')
+ where
+ nowiki' ∷ Parser String
+ nowiki' = (try (string "]>") *> pure [])
+ <|>
+ ((:) <$> anyChar ⊛ nowiki')
+
+text ∷ Parser InlineElement
+text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *>
+ many (noneOf ('\n':inlineSymbols))
+ ))
+ -- 定義リストとの關係上、コロンは先頭にしか來られない。
+ <|>
+ (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols))))
+ <?>
+ "text"
+
+apostrophes :: CommandTypeOf -> Parser InlineElement
+apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
+ where
+ apos1 = apos 1 >> return (Text "'")
+
+ apos2 = do apos 2
+ xs <- many1 $ inlineElement cmdTypeOf
+ apos 2
+ return (Italic xs)
+ apos3 = do apos 3
+ xs <- many1 $ inlineElement cmdTypeOf
+ apos 3
+ return (Bold xs)
-text :: Parser InlineElement
-text = many1 (noneOf ('\n':inlineSymbols)) >>= return . Text
+ apos4 = apos 4 >> return (Text "'")
+ apos5 = do apos 5
+ xs <- many1 $ inlineElement cmdTypeOf
+ apos 5
+ return (Italic [Bold xs])
-pageLink :: Parser InlineElement
-pageLink = do string "[["
- page <- option Nothing
- (many1 (noneOf "#|]") >>= return . Just)
- fragment <- option Nothing
- (char '#' >> many1 (noneOf "|]") >>= return . Just)
- text <- option Nothing
- (char '|' >> many1 (noneOf "]") >>= return . Just)
+ apos :: Int -> Parser ()
+ apos n = count n (char '\'') >> notFollowedBy (char '\'')
- case (page, fragment) of
- (Nothing, Nothing) -> pzero
- (_, _) -> return ()
- string "]]"
- return $ PageLink page fragment text
+objLink ∷ Parser InlineElement
+objLink = do void $ try (string "[[[")
+ page ← many1 (noneOf "|]")
+ label ← option Nothing $
+ Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+ void $ string "]]]"
+ pure $ ObjectLink (T.pack page) (T.pack <$> label)
+ <?>
+ "object link"
+
+pageLink ∷ Parser InlineElement
+pageLink = do void $ try (string "[[")
+ page ← option Nothing $
+ Just <$> many1 (noneOf "#|]")
+ fragment ← option Nothing $
+ Just <$> (char '#' *> many1 (noneOf "|]"))
+ label ← option Nothing $
+ Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+ when (isNothing page ∧ isNothing fragment) (∅)
+ void $ string "]]"
+ pure $ PageLink (T.pack <$> page )
+ (T.pack <$> fragment)
+ (T.pack <$> label )
<?>
"page link"
+extLink ∷ Parser InlineElement
+extLink = do void $ char '['
+ uriStr ← many1 (noneOf " \t]")
+ void $ skipMany (oneOf " \t")
+ label ← option Nothing $
+ Just <$> many1 (noneOf "]")
+ case parseURI uriStr of
+ Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label))
+ Nothing → pzero <?> "absolute URI"
+ <?>
+ "external link"
+
+inlineCmd ∷ CommandTypeOf → Parser InlineElement
+inlineCmd cmdTypeOf
+ = (try $ do (tagName, tagAttrs) ← openTag
+ case cmdTypeOf tagName of
+ Just InlineCommandType
+ → do xs ← contents
+ closeTag tagName
+ pure $ InlineCmd InlineCommand {
+ iCmdName = tagName
+ , iCmdAttributes = tagAttrs
+ , iCmdContents = xs
+ }
+ _ → pzero
+ )
+ <|>
+ (try $ do (tagName, tagAttrs) <- emptyTag
+ case cmdTypeOf tagName of
+ Just InlineCommandType
+ -> return $ InlineCmd InlineCommand {
+ iCmdName = tagName
+ , iCmdAttributes = tagAttrs
+ , iCmdContents = []
+ }
+ _ -> pzero
+ )
+ <?>
+ "inline command"
+ where
+ contents ∷ Parser [InlineElement]
+ contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents)
+ <|>
+ (comment *> contents)
+ <|>
+ ((Text "\n" :) <$> (newline *> contents))
+ <|>
+ pure []
+
+openTag ∷ Parser (Text, [Attribute])
+openTag = try $ do void $ char '<'
+ void $ many space
+ name ← many1 letter
+ void $ many space
+ attrs ← many $ do attr ← tagAttr
+ void $ many space
+ pure attr
+ void $ char '>'
+ return (T.pack name, attrs)
+
+emptyTag ∷ Parser (Text, [Attribute])
+emptyTag = try $ do void $ char '<'
+ void $ many space
+ name ← many1 letter
+ void $ many space
+ attrs ← many $ do attr ← tagAttr
+ void $ many space
+ pure attr
+ void $ char '/'
+ void $ many space
+ void $ char '>'
+ return (T.pack name, attrs)
+
+closeTag ∷ Text → Parser ()
+closeTag (T.unpack → name)
+ = try ( char '<' *>
+ many space *>
+ char '/' *>
+ many space *>
+ string name *>
+ many space *>
+ char '>' *>
+ pure ()
+ )
+
+tagAttr ∷ Parser (CI Text, Text)
+tagAttr = do name ← many1 letter
+ void $ char '='
+ void $ char '"'
+ value ← many (satisfy (≠ '"'))
+ void $ char '"'
+ return (CI.mk $ T.pack name, T.pack value)
+
comment :: Parser ()
comment = (try (string "<!--") >> skipTillEnd 1)
"comment"
where
skipTillEnd :: Int -> Parser ()
- skipTillEnd level = ( (try (string "<!--") >> skipTillEnd (level + 1))
- <|>
- (try (string "-->") >> case level of
- 1 -> return ()
- n -> skipTillEnd (n - 1))
- <|>
- (anyChar >> skipTillEnd level)
- )
+ skipTillEnd level = (try (string "<!--") >> skipTillEnd (level + 1))
+ <|>
+ (try (string "-->") >> case level of
+ 1 -> return ()
+ n -> skipTillEnd (n - 1))
+ <|>
+ (anyChar >> skipTillEnd level)
blockSymbols :: [Char]
-blockSymbols = " =-*#"
+blockSymbols = " =-*#;<"
inlineSymbols :: [Char]
-inlineSymbols = "<["
+inlineSymbols = "<[:'"
-- white space
ws :: Parser ()
-- end of line
eol :: Parser ()
-eol = ( (newline >> return ())
- <|>
- eof
- )
+eol = (newline >> return ())
+ <|>
+ eof