]> gitweb @ CieloNegro.org - Rakka.git/blobdiff - Rakka/Wiki/Parser.hs
merge branch origin/master
[Rakka.git] / Rakka / Wiki / Parser.hs
index 52e430a7e4cc12df5771fe0fe9c3695563b6df7d..e7ca8ebbc456e063587cfed6767e0ee1bff92adb 100644 (file)
+{-# LANGUAGE
+    OverloadedStrings
+  , RankNTypes
+  , UnicodeSyntax
+  , ViewPatterns
+  #-}
 module Rakka.Wiki.Parser
-    ( wikiPage
+    ( CommandTypeOf
+    , wikiPage
     )
     where
+-- FIXME: use attoparsec
+import Control.Applicative hiding ((<|>), many)
+import Control.Applicative.Unicode
+import Control.Monad
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
+import Data.Maybe
+import Data.Monoid.Unicode ((⊕))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI hiding (fragment)
+import Prelude.Unicode
+import Rakka.Wiki
+import Text.ParserCombinators.Parsec hiding (label)
 
-import           Rakka.Wiki
-import           Text.ParserCombinators.Parsec
+type CommandTypeOf = Alternative f ⇒ Text → f CommandType
 
+wikiPage :: CommandTypeOf -> Parser WikiPage
+wikiPage cmdTypeOf
+    = do xs <- many (blockElement cmdTypeOf)
+         skipMany ( comment
+                    <|>
+                    (newline >> return ())
+                  )
+         eof
+         return xs
 
-wikiPage :: Parser WikiPage
-wikiPage = do xs <- many (try wikiElement)
-              skipMany comment
-              eof
-              return xs
 
+blockElement :: CommandTypeOf -> Parser BlockElement
+blockElement cmdTypeOf
+    = try $ do skipMany ( comment
+                          <|>
+                          (newline >> return ())
+                        )
+               foldr (<|>) pzero [ heading
+                                 , horizontalLine
+                                 , listElement cmdTypeOf
+                                 , definitionList cmdTypeOf
+                                 , verbatim
+                                 , leadingSpaced cmdTypeOf
+                                 , paragraph cmdTypeOf
+                                 , blockCmd cmdTypeOf
+                                 ]
 
-wikiElement :: Parser WikiElement
-wikiElement = skipMany comment >>
-              ( try (blockElement >>= return . Block)
-                <|>
-                try (inlineElement >>= return . Inline)
-              )
-
-
-blockElement :: Parser BlockElement
-blockElement = ( try heading
-                 <|>
-                 try emptyLine
-               )
-
-
-heading :: Parser BlockElement
-heading = foldr (<|>) pzero (map (try . heading') [1..5])
+heading ∷ Parser BlockElement
+heading = foldr (<|>) pzero (map heading' [1..5])
           <?>
           "heading"
     where
-      heading' :: Int -> Parser BlockElement
-      heading' n = do count n (char '=')
-                      notFollowedBy (char '=')
+      heading' ∷ Int → Parser BlockElement
+      heading' n = do try ( void (count n (char '=')) *>
+                            notFollowedBy (char '=')
+                          )
                       ws
-                      x  <- notFollowedBy (char '=') >> anyChar
-                      xs <- manyTill anyChar (try $ ws >> (count n (char '=')))
+                      x  ← notFollowedBy (char '=') *> anyChar
+                      xs ← manyTill anyChar (try $ ws *> ( count n (char '=')
+                                                           <?>
+                                                           ("trailing " ++ replicate n '=')
+                                                         )
+                                            )
                       ws
                       eol
-                      return (Heading n (x:xs))
+                      pure ∘ Heading n $ T.pack (x:xs)
+
+horizontalLine :: Parser BlockElement
+horizontalLine = try ( do _ <- count 4 (char '-')
+                          _ <- many (char '-')
+                          ws
+                          eol
+                          return HorizontalLine
+                     )
+                 <?>
+                 "horizontal line"
+
+
+listElement :: CommandTypeOf -> Parser BlockElement
+listElement cmdTypeOf = listElement' []
+    where
+      listElement' :: [Char] -> Parser BlockElement
+      listElement' stack
+          = do t  <- oneOf "*#"
+               ws
+               xs <- items (stack ++ [t])
+               return (List (toType t) xs)
+
+      items :: [Char] -> Parser [ListItem]
+      items stack = do xs     <- many1 $ inlineElement cmdTypeOf
+                       nested <- option Nothing
+                                 $ try $ do skipMany comment
+                                            _ <- newline
+                                            _ <- string stack
+                                            liftM Just (listElement' stack)
+                       rest <- items stack
+                       return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest
+                    <|>
+                    (try $ do skipMany comment
+                              _ <- newline
+                              _ <- string stack
+                              ws
+                              items stack
+                    )
+                    <|>
+                    return []
+
+      toType :: Char -> ListType
+      toType '*' = Bullet
+      toType '#' = Numbered
+      toType _   = undefined
+
+
+definitionList :: CommandTypeOf -> Parser BlockElement
+definitionList cmdTypeOf = liftM DefinitionList (many1 definition)
+    where
+      definition :: Parser Definition
+      definition = do _     <- char ';'
+                      _     <- ws
+                      tHead <- inlineElement cmdTypeOf
+                      tRest <- term
+                      d     <- description
+                      return (Definition (tHead:tRest) d)
+                   <?>
+                   "definition list"
 
+      term :: Parser [InlineElement]
+      term = (char ':' >> ws >> return [])
+             <|>
+             (newline >> char ':' >> ws >> return [])
+             <|>
+             do x  <- inlineElement cmdTypeOf
+                xs <- term
+                return (x:xs)
+             <?>
+             "term to be defined"
 
-emptyLine :: Parser BlockElement
-emptyLine = count 2 newline >> many newline >> return EmptyLine
-            <?>
-            "empty line"
+      description :: Parser [InlineElement]
+      description = do x  <- inlineElement cmdTypeOf
+                       xs <- description
+                       return (x:xs)
+                    <|>
+                    try ( do _  <- newline
+                             _  <- char ':'
+                             _  <- ws
+                             xs <- description
+                             return (Text "\n" : xs)
+                        )
+                    <|>
+                    (newline >> return [])
+                    <|>
+                    (eof >> return [])
+                    <?>
+                    "description of term"
 
 
-inlineElement :: Parser InlineElement
-inlineElement = ( try text
+verbatim ∷ Parser BlockElement
+verbatim = try (string "<!verbatim[") *>
+           many (oneOf " \t\n")       *>
+           (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim')
+    where
+      verbatim' :: Parser String
+      verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure []
                   <|>
-                  try pageLink
-                )
+                  ((:) <$> anyChar ⊛ verbatim')
+
+
+leadingSpaced :: CommandTypeOf -> Parser BlockElement
+leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced')
+                          <?>
+                          "leading space"
+    where
+      leadingSpaced' :: Parser [InlineElement]
+      leadingSpaced' = do x  <- inlineElement cmdTypeOf
+                          xs <- leadingSpaced'
+                          return (x:xs)
+                       <|>
+                       try ( liftM (Text "\n" :) ( newline
+                                                   >>
+                                                   char ' '
+                                                   >>
+                                                   leadingSpaced'
+                                                 )
+                           )
+                       <|>
+                       return []
+
+
+paragraph :: CommandTypeOf -> Parser BlockElement
+paragraph cmdTypeOf = liftM Paragraph paragraph'
+    where
+      paragraph' :: Parser [InlineElement]
+      paragraph' = do x  <- inlineElement cmdTypeOf
+                      xs <- try ( do _ <- newline
+                                     _ <- eof
+                                     return []
+                                  -- \n で文字列が終はってゐたら、ここ
+                                  -- で終了。
+                                )
+                            <|>
+                            try ( do _  <- newline
+                                     _  <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
+                                     ys <- (paragraph' <|> return [])
+                                     return (Text "\n" : ys)
+                                  -- \n があり、その次に \n または
+                                  -- blockSymbols があれば、fail して最
+                                  -- 初の newline を讀んだ所まで卷き戻
+                                  -- す。oneOf が一文字消費しているので、
+                                  -- <|> は右辺を適用せずに try まで戻
+                                  -- る。
+                                )
+                            <|>
+                            paragraph'
+                            -- それ以外の場合は次の inlineElement から
+                            -- を讀んで見る。但し一つも無くても良い。
+                            <|>
+                            return [] -- 全部失敗したらここで終了。
+                      return (x:xs)
+
+
+blockCmd :: CommandTypeOf -> Parser BlockElement
+blockCmd cmdTypeOf
+    = (try $ do (tagName, tagAttrs) <- openTag
+                case cmdTypeOf tagName of
+                  Just BlockCommandType
+                      -> do xs <- contents
+                            closeTag tagName
+                            return $ BlockCmd BlockCommand {
+                                         bCmdName       = tagName
+                                       , bCmdAttributes = tagAttrs
+                                       , bCmdContents   = xs
+                                       }
+                  Just InlineCommandType
+                      -> pzero
+                  _   -> return $ undefinedCmdErr tagName
+      )
+      <|>
+      (try $ do (tagName, tagAttrs) <- emptyTag
+                case cmdTypeOf tagName of
+                  Just BlockCommandType
+                      -> return $ BlockCmd BlockCommand {
+                                         bCmdName       = tagName
+                                       , bCmdAttributes = tagAttrs
+                                       , bCmdContents   = []
+                                       }
+                  Just InlineCommandType
+                      -> pzero
+                  _   -> return $ undefinedCmdErr tagName
+      )
+      <?>
+      "block command"
+    where
+      contents ∷ Parser [BlockElement]
+      contents = ((:) <$> blockElement cmdTypeOf ⊛ contents)
+                 <|>
+                 (newline *> contents)
+                 <|>
+                 (comment *> contents)
+                 <|>
+                 pure []
+
+      undefinedCmdErr ∷ Text → BlockElement
+      undefinedCmdErr name
+          = Div [("class", "error")]
+            [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕
+                                      "Make sure you haven't mistyped.")
+                               ])
+            ]
 
+inlineElement :: CommandTypeOf -> Parser InlineElement
+inlineElement cmdTypeOf
+    = try $ do skipMany comment
+               foldr (<|>) pzero [ nowiki
+                                 , apostrophes cmdTypeOf
+                                 , text
+                                 , objLink
+                                 , pageLink
+                                 , extLink
+                                 , inlineCmd cmdTypeOf
+                                 ]
 
-text :: Parser InlineElement
-text = text' >>= return . Text
+nowiki ∷ Parser InlineElement
+nowiki = Text ∘ T.pack <$> (try (string "<!nowiki[") *> nowiki')
     where
-      text' :: Parser String
-      text' = do x <- noneOf inlineSymbols
-                 case x of
-                   -- 單獨の \n は受け入れる。
-                   '\n' -> return [x]
-                   -- それ以外では \n を受け入れない。
-                   _    -> many (noneOf ('\n':inlineSymbols)) >>= return . (x:)
-
-
-pageLink :: Parser InlineElement
-pageLink = do string "[["
-              page     <- option Nothing 
-                          (many1 (noneOf "#|]") >>= return . Just)
-              fragment <- option Nothing
-                          (char '#' >> many1 (noneOf "|]") >>= return . Just)
-              text     <- option Nothing
-                          (char '|' >> many1 (noneOf "]") >>= return . Just)
-
-              case (page, fragment) of
-                (Nothing, Nothing) -> pzero
-                (_, _)             -> return ()
-
-              string "]]"
-              return $ PageLink page fragment text
+      nowiki' ∷ Parser String
+      nowiki' = (try (string "]>") *> pure [])
+                <|>
+                ((:) <$> anyChar ⊛ nowiki')
+
+text ∷ Parser InlineElement
+text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *>
+                                      many (noneOf ('\n':inlineSymbols))
+                                    ))
+       -- 定義リストとの關係上、コロンは先頭にしか來られない。
+       <|>
+       (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols))))
+       <?>
+       "text"
+
+apostrophes :: CommandTypeOf -> Parser InlineElement
+apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
+    where
+      apos1 = apos 1 >> return (Text "'")
+
+      apos2 = do apos 2
+                 xs <- many1 $ inlineElement cmdTypeOf
+                 apos 2
+                 return (Italic xs)
+
+      apos3 = do apos 3
+                 xs <- many1 $ inlineElement cmdTypeOf
+                 apos 3
+                 return (Bold xs)
+
+      apos4 = apos 4 >> return (Text "'")
+
+      apos5 = do apos 5
+                 xs <- many1 $ inlineElement cmdTypeOf
+                 apos 5
+                 return (Italic [Bold xs])
+
+      apos :: Int -> Parser ()
+      apos n = count n (char '\'') >> notFollowedBy (char '\'')
+
+
+objLink ∷ Parser InlineElement
+objLink = do void $ try (string "[[[")
+             page  ← many1 (noneOf "|]")
+             label ← option Nothing $
+                     Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+             void $ string "]]]"
+             pure $ ObjectLink (T.pack page) (T.pack <$> label)
+          <?>
+          "object link"
+
+pageLink ∷ Parser InlineElement
+pageLink = do void $ try (string "[[")
+              page     ← option Nothing $
+                         Just <$> many1 (noneOf "#|]")
+              fragment ← option Nothing $
+                         Just <$> (char '#' *> many1 (noneOf "|]"))
+              label    ← option Nothing $
+                         Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+              when (isNothing page ∧ isNothing fragment) (∅)
+              void $ string "]]"
+              pure $ PageLink (T.pack <$> page    )
+                              (T.pack <$> fragment)
+                              (T.pack <$> label   )
            <?>
            "page link"
 
+extLink ∷ Parser InlineElement
+extLink = do void $ char '['
+             uriStr ← many1 (noneOf " \t]")
+             void $ skipMany (oneOf " \t")
+             label ← option Nothing $
+                     Just <$> many1 (noneOf "]")
+             case parseURI uriStr of
+               Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label))
+               Nothing  → pzero <?> "absolute URI"
+          <?>
+          "external link"
+
+inlineCmd ∷ CommandTypeOf → Parser InlineElement
+inlineCmd cmdTypeOf
+    = (try $ do (tagName, tagAttrs) ← openTag
+                case cmdTypeOf tagName of
+                  Just InlineCommandType
+                      → do xs ← contents
+                           closeTag tagName
+                           pure $ InlineCmd InlineCommand {
+                                         iCmdName       = tagName
+                                       , iCmdAttributes = tagAttrs
+                                       , iCmdContents   = xs
+                                       }
+                  _   → pzero
+      )
+      <|>
+      (try $ do (tagName, tagAttrs) <- emptyTag
+                case cmdTypeOf tagName of
+                  Just InlineCommandType
+                      -> return $ InlineCmd InlineCommand {
+                                         iCmdName       = tagName
+                                       , iCmdAttributes = tagAttrs
+                                       , iCmdContents   = []
+                                       }
+                  _   -> pzero
+      )
+      <?>
+      "inline command"
+    where
+      contents ∷ Parser [InlineElement]
+      contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents)
+                 <|>
+                 (comment *> contents)
+                 <|>
+                 ((Text "\n" :) <$> (newline *> contents))
+                 <|>
+                 pure []
+
+openTag ∷ Parser (Text, [Attribute])
+openTag = try $ do void $ char '<'
+                   void $ many space
+                   name ← many1 letter
+                   void $ many space
+                   attrs ← many $ do attr ← tagAttr
+                                     void $ many space
+                                     pure attr
+                   void $ char '>'
+                   return (T.pack name, attrs)
+
+emptyTag ∷ Parser (Text, [Attribute])
+emptyTag = try $ do void $ char '<'
+                    void $ many space
+                    name ← many1 letter
+                    void $ many space
+                    attrs ← many $ do attr ← tagAttr
+                                      void $ many space
+                                      pure attr
+                    void $ char '/'
+                    void $ many space
+                    void $ char '>'
+                    return (T.pack name, attrs)
+
+closeTag ∷ Text → Parser ()
+closeTag (T.unpack → name)
+    = try ( char '<'    *>
+            many space  *>
+            char '/'    *>
+            many space  *>
+            string name *>
+            many space  *>
+            char '>'    *>
+            pure ()
+          )
+
+tagAttr ∷ Parser (CI Text, Text)
+tagAttr = do name ← many1 letter
+             void $ char '='
+             void $ char '"'
+             value ← many (satisfy (≠ '"'))
+             void $ char '"'
+             return (CI.mk $ T.pack name, T.pack value)
+
 
 comment :: Parser ()
 comment = (try (string "<!--") >> skipTillEnd 1)
@@ -95,18 +457,21 @@ comment = (try (string "<!--") >> skipTillEnd 1)
           "comment"
     where
       skipTillEnd :: Int -> Parser ()
-      skipTillEnd level = ( (try (string "<!--") >> skipTillEnd (level + 1))
-                            <|>
-                            (try (string "-->") >> case level of
-                                                     1 -> return ()
-                                                     n -> skipTillEnd (n - 1))
-                            <|>
-                            (anyChar >> skipTillEnd level)
-                          )
+      skipTillEnd level = (try (string "<!--") >> skipTillEnd (level + 1))
+                          <|>
+                          (try (string "-->") >> case level of
+                                                   1 -> return ()
+                                                   n -> skipTillEnd (n - 1))
+                          <|>
+                          (anyChar >> skipTillEnd level)
+
+
+blockSymbols :: [Char]
+blockSymbols = " =-*#;<"
 
 
 inlineSymbols :: [Char]
-inlineSymbols = "<["
+inlineSymbols = "<[:'"
 
 -- white space
 ws :: Parser ()
@@ -117,7 +482,6 @@ ws = skipMany ( (oneOf " \t" >> return ())
 
 -- end of line
 eol :: Parser ()
-eol = ( (many1 newline >> return ())
-        <|>
-        eof
-      )
+eol = (newline >> return ())
+      <|>
+      eof