]> gitweb @ CieloNegro.org - Rakka.git/blobdiff - Rakka/Wiki/Parser.hs
merge branch origin/master
[Rakka.git] / Rakka / Wiki / Parser.hs
index db26a497b7eb66e15107b12b8fcd023f4eddfaa5..e7ca8ebbc456e063587cfed6767e0ee1bff92adb 100644 (file)
@@ -1,68 +1,81 @@
+{-# LANGUAGE
+    OverloadedStrings
+  , RankNTypes
+  , UnicodeSyntax
+  , ViewPatterns
+  #-}
 module Rakka.Wiki.Parser
-    ( wikiPage
+    ( CommandTypeOf
+    , wikiPage
     )
     where
-
-import           Data.Maybe
-import           Rakka.Wiki
-import           Text.ParserCombinators.Parsec
-
-
-wikiPage :: Parser WikiPage
-wikiPage = do xs <- many (try blockElement)
-              skipMany ( comment
-                         <|>
-                         (newline >> return ())
-                       )
-              eof
-              return xs
+-- FIXME: use attoparsec
+import Control.Applicative hiding ((<|>), many)
+import Control.Applicative.Unicode
+import Control.Monad
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
+import Data.Maybe
+import Data.Monoid.Unicode ((⊕))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI hiding (fragment)
+import Prelude.Unicode
+import Rakka.Wiki
+import Text.ParserCombinators.Parsec hiding (label)
+
+type CommandTypeOf = Alternative f ⇒ Text → f CommandType
+
+wikiPage :: CommandTypeOf -> Parser WikiPage
+wikiPage cmdTypeOf
+    = do xs <- many (blockElement cmdTypeOf)
+         skipMany ( comment
+                    <|>
+                    (newline >> return ())
+                  )
+         eof
+         return xs
 
 
-blockElement :: Parser BlockElement
-blockElement = skipMany ( comment
+blockElement :: CommandTypeOf -> Parser BlockElement
+blockElement cmdTypeOf
+    = try $ do skipMany ( comment
                           <|>
                           (newline >> return ())
                         )
-               >>
-               ( heading
-                 <|>
-                 horizontalLine
-                 <|>
-                 listElement
-                 <|>
-                 definitionList
-                 <|>
-                 pdata
-                 <|>
-                 leadingSpaced
-                 <|>
-                 paragraph
-               )
-
-
-heading :: Parser BlockElement
+               foldr (<|>) pzero [ heading
+                                 , horizontalLine
+                                 , listElement cmdTypeOf
+                                 , definitionList cmdTypeOf
+                                 , verbatim
+                                 , leadingSpaced cmdTypeOf
+                                 , paragraph cmdTypeOf
+                                 , blockCmd cmdTypeOf
+                                 ]
+
+heading ∷ Parser BlockElement
 heading = foldr (<|>) pzero (map heading' [1..5])
           <?>
           "heading"
     where
-      heading' :: Int -> Parser BlockElement
-      heading' n = do try $ do count n (char '=')
-                               notFollowedBy (char '=')
+      heading' ∷ Int → Parser BlockElement
+      heading' n = do try ( void (count n (char '=')) *>
+                            notFollowedBy (char '=')
+                          )
                       ws
-                      x  <- notFollowedBy (char '=') >> anyChar
-                      xs <- manyTill anyChar (try $ ws >> ( count n (char '=')
-                                                            <?>
-                                                            ("trailing " ++ take n (repeat '='))
-                                                          )
-                                             )
+                      x  ← notFollowedBy (char '=') *> anyChar
+                      xs ← manyTill anyChar (try $ ws *> ( count n (char '=')
+                                                           <?>
+                                                           ("trailing " ++ replicate n '=')
+                                                         )
+                                            )
                       ws
                       eol
-                      return (Heading n (x:xs))
-
+                      pure ∘ Heading n $ T.pack (x:xs)
 
 horizontalLine :: Parser BlockElement
-horizontalLine = try ( do count 4 (char '-')
-                          many (char '-')
+horizontalLine = try ( do _ <- count 4 (char '-')
+                          _ <- many (char '-')
                           ws
                           eol
                           return HorizontalLine
@@ -71,28 +84,29 @@ horizontalLine = try ( do count 4 (char '-')
                  "horizontal line"
 
 
-listElement :: Parser BlockElement
-listElement = listElement' [] >>= return . List
+listElement :: CommandTypeOf -> Parser BlockElement
+listElement cmdTypeOf = listElement' []
     where
-      listElement' :: [Char] -> Parser ListElement
+      listElement' :: [Char] -> Parser BlockElement
       listElement' stack
-          = try $ do t  <- oneOf "*#"
-                     ws
-                     xs <- items (stack ++ [t])
-                     return (ListElement (toType t) xs)
+          = do t  <- oneOf "*#"
+               ws
+               xs <- items (stack ++ [t])
+               return (List (toType t) xs)
 
-      -- ListItem の終了條件は、
       items :: [Char] -> Parser [ListItem]
-      items stack = do xs     <- many1 inlineElement
+      items stack = do xs     <- many1 $ inlineElement cmdTypeOf
                        nested <- option Nothing
-                                 $ try $ do newline
-                                            string stack
-                                            listElement' stack >>= return . Just
+                                 $ try $ do skipMany comment
+                                            _ <- newline
+                                            _ <- string stack
+                                            liftM Just (listElement' stack)
                        rest <- items stack
-                       return $ (map Right xs ++ map Left (catMaybes [nested])) : rest
+                       return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest
                     <|>
-                    (try $ do newline
-                              string stack
+                    (try $ do skipMany comment
+                              _ <- newline
+                              _ <- string stack
                               ws
                               items stack
                     )
@@ -102,15 +116,16 @@ listElement = listElement' [] >>= return . List
       toType :: Char -> ListType
       toType '*' = Bullet
       toType '#' = Numbered
+      toType _   = undefined
 
 
-definitionList :: Parser BlockElement
-definitionList = many1 definition >>= return . DefinitionList
+definitionList :: CommandTypeOf -> Parser BlockElement
+definitionList cmdTypeOf = liftM DefinitionList (many1 definition)
     where
       definition :: Parser Definition
-      definition = do char ';'
-                      ws
-                      tHead <- inlineElement
+      definition = do _     <- char ';'
+                      _     <- ws
+                      tHead <- inlineElement cmdTypeOf
                       tRest <- term
                       d     <- description
                       return (Definition (tHead:tRest) d)
@@ -122,20 +137,20 @@ definitionList = many1 definition >>= return . DefinitionList
              <|>
              (newline >> char ':' >> ws >> return [])
              <|>
-             do x  <- inlineElement
+             do x  <- inlineElement cmdTypeOf
                 xs <- term
                 return (x:xs)
              <?>
              "term to be defined"
 
       description :: Parser [InlineElement]
-      description = do x  <- inlineElement
+      description = do x  <- inlineElement cmdTypeOf
                        xs <- description
                        return (x:xs)
                     <|>
-                    try ( do newline
-                             char ':'
-                             ws
+                    try ( do _  <- newline
+                             _  <- char ':'
+                             _  <- ws
                              xs <- description
                              return (Text "\n" : xs)
                         )
@@ -147,71 +162,63 @@ definitionList = many1 definition >>= return . DefinitionList
                     "description of term"
 
 
-pdata :: Parser BlockElement
-pdata = do try (string "<![PDATA[")
-           many (oneOf " \t\n")
-           x <- pdata'
-           return (Preformatted [Text x])
+verbatim ∷ Parser BlockElement
+verbatim = try (string "<!verbatim[") *>
+           many (oneOf " \t\n")       *>
+           (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim')
     where
-      pdata' :: Parser String
-      pdata' = do try (many (oneOf " \t\n") >> string "]]>")
-                  return []
-               <|>
-               do x  <- anyChar
-                  xs <- pdata'
-                  return (x:xs)
-
-
-leadingSpaced :: Parser BlockElement
-leadingSpaced = (char ' ' >> leadingSpaced' >>= return . Preformatted)
-                <?>
-                "leading space"
+      verbatim' :: Parser String
+      verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure []
+                  <|>
+                  ((:) <$> anyChar ⊛ verbatim')
+
+
+leadingSpaced :: CommandTypeOf -> Parser BlockElement
+leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced')
+                          <?>
+                          "leading space"
     where
       leadingSpaced' :: Parser [InlineElement]
-      leadingSpaced' = do x  <- inlineElement
+      leadingSpaced' = do x  <- inlineElement cmdTypeOf
                           xs <- leadingSpaced'
                           return (x:xs)
                        <|>
-                       try ( newline
-                             >>
-                             char ' '
-                             >>
-                             leadingSpaced'
-                             >>=
-                             return . (Text "\n" :)
+                       try ( liftM (Text "\n" :) ( newline
+                                                   >>
+                                                   char ' '
+                                                   >>
+                                                   leadingSpaced'
+                                                 )
                            )
                        <|>
                        return []
 
 
-blockTag :: Parser BlockElement
-blockTag = pzero -- not implemented
-
-
-paragraph :: Parser BlockElement
-paragraph = paragraph' >>= return . Paragraph
+paragraph :: CommandTypeOf -> Parser BlockElement
+paragraph cmdTypeOf = liftM Paragraph paragraph'
     where
       paragraph' :: Parser [InlineElement]
-      paragraph' = do x  <- inlineElement
-                      xs <- try ( do newline
-                                     eof
+      paragraph' = do x  <- inlineElement cmdTypeOf
+                      xs <- try ( do _ <- newline
+                                     _ <- eof
                                      return []
                                   -- \n で文字列が終はってゐたら、ここ
                                   -- で終了。
                                 )
                             <|>
-                            try ( do newline
-                                     ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
-                                     ((blockTag                  >> pzero) <|> return ())
+                            try ( do _  <- newline
+                                     _  <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
                                      ys <- (paragraph' <|> return [])
                                      return (Text "\n" : ys)
-                                  -- \n があり、その次に \n、ブロックタ
-                                  -- グまたは blockSymbols があれば、
-                                  -- fail して 最初の newline を讀んだ
-                                  -- 所まで卷き戻す。
+                                  -- \n があり、その次に \n または
+                                  -- blockSymbols があれば、fail して最
+                                  -- 初の newline を讀んだ所まで卷き戻
+                                  -- す。oneOf が一文字消費しているので、
+                                  -- <|> は右辺を適用せずに try まで戻
+                                  -- る。
                                 )
                             <|>
-                            try paragraph'
+                            paragraph'
                             -- それ以外の場合は次の inlineElement から
                             -- を讀んで見る。但し一つも無くても良い。
                             <|>
@@ -219,67 +226,104 @@ paragraph = paragraph' >>= return . Paragraph
                       return (x:xs)
 
 
-inlineElement :: Parser InlineElement
-inlineElement = skipMany comment
-                >>
-                ( cdata
-                  <|>
-                  apostrophes
-                  <|>
-                  text
-                  <|>
-                  pageLink
-                )
-
-
-cdata :: Parser InlineElement
-cdata = try (string "<![CDATA[") >> cdata' >>= return . Text
+blockCmd :: CommandTypeOf -> Parser BlockElement
+blockCmd cmdTypeOf
+    = (try $ do (tagName, tagAttrs) <- openTag
+                case cmdTypeOf tagName of
+                  Just BlockCommandType
+                      -> do xs <- contents
+                            closeTag tagName
+                            return $ BlockCmd BlockCommand {
+                                         bCmdName       = tagName
+                                       , bCmdAttributes = tagAttrs
+                                       , bCmdContents   = xs
+                                       }
+                  Just InlineCommandType
+                      -> pzero
+                  _   -> return $ undefinedCmdErr tagName
+      )
+      <|>
+      (try $ do (tagName, tagAttrs) <- emptyTag
+                case cmdTypeOf tagName of
+                  Just BlockCommandType
+                      -> return $ BlockCmd BlockCommand {
+                                         bCmdName       = tagName
+                                       , bCmdAttributes = tagAttrs
+                                       , bCmdContents   = []
+                                       }
+                  Just InlineCommandType
+                      -> pzero
+                  _   -> return $ undefinedCmdErr tagName
+      )
+      <?>
+      "block command"
     where
-      cdata' :: Parser String
-      cdata' = do try (string "]]>")
-                  return []
-               <|>
-               do x  <- anyChar
-                  xs <- cdata'
-                  return (x:xs)
-
-
-text :: Parser InlineElement
-text = ( char ':'
-         >>
-         many (noneOf ('\n':inlineSymbols))
-         >>=
-         return . Text . (':' :)
-         -- 定義リストとの關係上、コロンは先頭にしか來れない。
-       )
+      contents ∷ Parser [BlockElement]
+      contents = ((:) <$> blockElement cmdTypeOf ⊛ contents)
+                 <|>
+                 (newline *> contents)
+                 <|>
+                 (comment *> contents)
+                 <|>
+                 pure []
+
+      undefinedCmdErr ∷ Text → BlockElement
+      undefinedCmdErr name
+          = Div [("class", "error")]
+            [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕
+                                      "Make sure you haven't mistyped.")
+                               ])
+            ]
+
+inlineElement :: CommandTypeOf -> Parser InlineElement
+inlineElement cmdTypeOf
+    = try $ do skipMany comment
+               foldr (<|>) pzero [ nowiki
+                                 , apostrophes cmdTypeOf
+                                 , text
+                                 , objLink
+                                 , pageLink
+                                 , extLink
+                                 , inlineCmd cmdTypeOf
+                                 ]
+
+nowiki ∷ Parser InlineElement
+nowiki = Text ∘ T.pack <$> (try (string "<!nowiki[") *> nowiki')
+    where
+      nowiki' ∷ Parser String
+      nowiki' = (try (string "]>") *> pure [])
+                <|>
+                ((:) <$> anyChar ⊛ nowiki')
+
+text ∷ Parser InlineElement
+text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *>
+                                      many (noneOf ('\n':inlineSymbols))
+                                    ))
+       -- 定義リストとの關係上、コロンは先頭にしか來られない。
        <|>
-       ( many1 (noneOf ('\n':inlineSymbols))
-         >>=
-         return . Text
-       )
+       (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols))))
        <?>
        "text"
 
-
-apostrophes :: Parser InlineElement
-apostrophes = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
+apostrophes :: CommandTypeOf -> Parser InlineElement
+apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
     where
       apos1 = apos 1 >> return (Text "'")
 
       apos2 = do apos 2
-                 xs <- many1 inlineElement
+                 xs <- many1 $ inlineElement cmdTypeOf
                  apos 2
                  return (Italic xs)
 
       apos3 = do apos 3
-                 xs <- many1 inlineElement
+                 xs <- many1 $ inlineElement cmdTypeOf
                  apos 3
                  return (Bold xs)
 
       apos4 = apos 4 >> return (Text "'")
 
       apos5 = do apos 5
-                 xs <- many1 inlineElement
+                 xs <- many1 $ inlineElement cmdTypeOf
                  apos 5
                  return (Italic [Bold xs])
 
@@ -287,24 +331,125 @@ apostrophes = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
       apos n = count n (char '\'') >> notFollowedBy (char '\'')
 
 
-pageLink :: Parser InlineElement
-pageLink = do try (string "[[")
-              page     <- option Nothing 
-                          (many1 (noneOf "#|]") >>= return . Just)
-              fragment <- option Nothing
-                          (char '#' >> many1 (noneOf "|]") >>= return . Just)
-              text     <- option Nothing
-                          (char '|' >> many1 (noneOf "]") >>= return . Just)
-
-              case (page, fragment) of
-                (Nothing, Nothing) -> pzero
-                (_, _)             -> return ()
-
-              string "]]"
-              return $ PageLink page fragment text
+objLink ∷ Parser InlineElement
+objLink = do void $ try (string "[[[")
+             page  ← many1 (noneOf "|]")
+             label ← option Nothing $
+                     Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+             void $ string "]]]"
+             pure $ ObjectLink (T.pack page) (T.pack <$> label)
+          <?>
+          "object link"
+
+pageLink ∷ Parser InlineElement
+pageLink = do void $ try (string "[[")
+              page     ← option Nothing $
+                         Just <$> many1 (noneOf "#|]")
+              fragment ← option Nothing $
+                         Just <$> (char '#' *> many1 (noneOf "|]"))
+              label    ← option Nothing $
+                         Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+              when (isNothing page ∧ isNothing fragment) (∅)
+              void $ string "]]"
+              pure $ PageLink (T.pack <$> page    )
+                              (T.pack <$> fragment)
+                              (T.pack <$> label   )
            <?>
            "page link"
 
+extLink ∷ Parser InlineElement
+extLink = do void $ char '['
+             uriStr ← many1 (noneOf " \t]")
+             void $ skipMany (oneOf " \t")
+             label ← option Nothing $
+                     Just <$> many1 (noneOf "]")
+             case parseURI uriStr of
+               Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label))
+               Nothing  → pzero <?> "absolute URI"
+          <?>
+          "external link"
+
+inlineCmd ∷ CommandTypeOf → Parser InlineElement
+inlineCmd cmdTypeOf
+    = (try $ do (tagName, tagAttrs) ← openTag
+                case cmdTypeOf tagName of
+                  Just InlineCommandType
+                      → do xs ← contents
+                           closeTag tagName
+                           pure $ InlineCmd InlineCommand {
+                                         iCmdName       = tagName
+                                       , iCmdAttributes = tagAttrs
+                                       , iCmdContents   = xs
+                                       }
+                  _   → pzero
+      )
+      <|>
+      (try $ do (tagName, tagAttrs) <- emptyTag
+                case cmdTypeOf tagName of
+                  Just InlineCommandType
+                      -> return $ InlineCmd InlineCommand {
+                                         iCmdName       = tagName
+                                       , iCmdAttributes = tagAttrs
+                                       , iCmdContents   = []
+                                       }
+                  _   -> pzero
+      )
+      <?>
+      "inline command"
+    where
+      contents ∷ Parser [InlineElement]
+      contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents)
+                 <|>
+                 (comment *> contents)
+                 <|>
+                 ((Text "\n" :) <$> (newline *> contents))
+                 <|>
+                 pure []
+
+openTag ∷ Parser (Text, [Attribute])
+openTag = try $ do void $ char '<'
+                   void $ many space
+                   name ← many1 letter
+                   void $ many space
+                   attrs ← many $ do attr ← tagAttr
+                                     void $ many space
+                                     pure attr
+                   void $ char '>'
+                   return (T.pack name, attrs)
+
+emptyTag ∷ Parser (Text, [Attribute])
+emptyTag = try $ do void $ char '<'
+                    void $ many space
+                    name ← many1 letter
+                    void $ many space
+                    attrs ← many $ do attr ← tagAttr
+                                      void $ many space
+                                      pure attr
+                    void $ char '/'
+                    void $ many space
+                    void $ char '>'
+                    return (T.pack name, attrs)
+
+closeTag ∷ Text → Parser ()
+closeTag (T.unpack → name)
+    = try ( char '<'    *>
+            many space  *>
+            char '/'    *>
+            many space  *>
+            string name *>
+            many space  *>
+            char '>'    *>
+            pure ()
+          )
+
+tagAttr ∷ Parser (CI Text, Text)
+tagAttr = do name ← many1 letter
+             void $ char '='
+             void $ char '"'
+             value ← many (satisfy (≠ '"'))
+             void $ char '"'
+             return (CI.mk $ T.pack name, T.pack value)
+
 
 comment :: Parser ()
 comment = (try (string "<!--") >> skipTillEnd 1)
@@ -312,18 +457,17 @@ comment = (try (string "<!--") >> skipTillEnd 1)
           "comment"
     where
       skipTillEnd :: Int -> Parser ()
-      skipTillEnd level = ( (try (string "<!--") >> skipTillEnd (level + 1))
-                            <|>
-                            (try (string "-->") >> case level of
-                                                     1 -> return ()
-                                                     n -> skipTillEnd (n - 1))
-                            <|>
-                            (anyChar >> skipTillEnd level)
-                          )
+      skipTillEnd level = (try (string "<!--") >> skipTillEnd (level + 1))
+                          <|>
+                          (try (string "-->") >> case level of
+                                                   1 -> return ()
+                                                   n -> skipTillEnd (n - 1))
+                          <|>
+                          (anyChar >> skipTillEnd level)
 
 
 blockSymbols :: [Char]
-blockSymbols = " =-*#;"
+blockSymbols = " =-*#;<"
 
 
 inlineSymbols :: [Char]
@@ -338,7 +482,6 @@ ws = skipMany ( (oneOf " \t" >> return ())
 
 -- end of line
 eol :: Parser ()
-eol = ( (newline >> return ())
-        <|>
-        eof
-      )
+eol = (newline >> return ())
+      <|>
+      eof