]> gitweb @ CieloNegro.org - Rakka.git/blobdiff - Rakka/Wiki/Parser.hs
merge branch origin/master
[Rakka.git] / Rakka / Wiki / Parser.hs
index 313521c23cf89971862f4091657d9ef22bef5348..e7ca8ebbc456e063587cfed6767e0ee1bff92adb 100644 (file)
@@ -1,16 +1,30 @@
+{-# LANGUAGE
+    OverloadedStrings
+  , RankNTypes
+  , UnicodeSyntax
+  , ViewPatterns
+  #-}
 module Rakka.Wiki.Parser
     ( CommandTypeOf
     , wikiPage
     )
     where
-
-import           Data.Maybe
-import           Rakka.Wiki
-import           Text.ParserCombinators.Parsec
-
-
-type CommandTypeOf = String -> Maybe CommandType
-
+-- FIXME: use attoparsec
+import Control.Applicative hiding ((<|>), many)
+import Control.Applicative.Unicode
+import Control.Monad
+import Data.CaseInsensitive (CI)
+import qualified Data.CaseInsensitive as CI
+import Data.Maybe
+import Data.Monoid.Unicode ((⊕))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.URI hiding (fragment)
+import Prelude.Unicode
+import Rakka.Wiki
+import Text.ParserCombinators.Parsec hiding (label)
+
+type CommandTypeOf = Alternative f ⇒ Text → f CommandType
 
 wikiPage :: CommandTypeOf -> Parser WikiPage
 wikiPage cmdTypeOf
@@ -33,36 +47,35 @@ blockElement cmdTypeOf
                                  , horizontalLine
                                  , listElement cmdTypeOf
                                  , definitionList cmdTypeOf
-                                 , pdata
+                                 , verbatim
                                  , leadingSpaced cmdTypeOf
                                  , paragraph cmdTypeOf
                                  , blockCmd cmdTypeOf
                                  ]
 
-
-heading :: Parser BlockElement
+heading ∷ Parser BlockElement
 heading = foldr (<|>) pzero (map heading' [1..5])
           <?>
           "heading"
     where
-      heading' :: Int -> Parser BlockElement
-      heading' n = do try $ do count n (char '=')
-                               notFollowedBy (char '=')
+      heading' ∷ Int → Parser BlockElement
+      heading' n = do try ( void (count n (char '=')) *>
+                            notFollowedBy (char '=')
+                          )
                       ws
-                      x  <- notFollowedBy (char '=') >> anyChar
-                      xs <- manyTill anyChar (try $ ws >> ( count n (char '=')
-                                                            <?>
-                                                            ("trailing " ++ take n (repeat '='))
-                                                          )
-                                             )
+                      x  ← notFollowedBy (char '=') *> anyChar
+                      xs ← manyTill anyChar (try $ ws *> ( count n (char '=')
+                                                           <?>
+                                                           ("trailing " ++ replicate n '=')
+                                                         )
+                                            )
                       ws
                       eol
-                      return (Heading n (x:xs))
-
+                      pure ∘ Heading n $ T.pack (x:xs)
 
 horizontalLine :: Parser BlockElement
-horizontalLine = try ( do count 4 (char '-')
-                          many (char '-')
+horizontalLine = try ( do _ <- count 4 (char '-')
+                          _ <- many (char '-')
                           ws
                           eol
                           return HorizontalLine
@@ -72,29 +85,28 @@ horizontalLine = try ( do count 4 (char '-')
 
 
 listElement :: CommandTypeOf -> Parser BlockElement
-listElement cmdTypeOf = listElement' [] >>= return . List
+listElement cmdTypeOf = listElement' []
     where
-      listElement' :: [Char] -> Parser ListElement
+      listElement' :: [Char] -> Parser BlockElement
       listElement' stack
           = do t  <- oneOf "*#"
                ws
                xs <- items (stack ++ [t])
-               return (ListElement (toType t) xs)
+               return (List (toType t) xs)
 
-      -- ListItem の終了條件は、
       items :: [Char] -> Parser [ListItem]
       items stack = do xs     <- many1 $ inlineElement cmdTypeOf
                        nested <- option Nothing
                                  $ try $ do skipMany comment
-                                            newline
-                                            string stack
-                                            listElement' stack >>= return . Just
+                                            _ <- newline
+                                            _ <- string stack
+                                            liftM Just (listElement' stack)
                        rest <- items stack
-                       return $ (map Right xs ++ map Left (catMaybes [nested])) : rest
+                       return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest
                     <|>
                     (try $ do skipMany comment
-                              newline
-                              string stack
+                              _ <- newline
+                              _ <- string stack
                               ws
                               items stack
                     )
@@ -104,14 +116,15 @@ listElement cmdTypeOf = listElement' [] >>= return . List
       toType :: Char -> ListType
       toType '*' = Bullet
       toType '#' = Numbered
+      toType _   = undefined
 
 
 definitionList :: CommandTypeOf -> Parser BlockElement
-definitionList cmdTypeOf = many1 definition >>= return . DefinitionList
+definitionList cmdTypeOf = liftM DefinitionList (many1 definition)
     where
       definition :: Parser Definition
-      definition = do char ';'
-                      ws
+      definition = do _     <- char ';'
+                      _     <- ws
                       tHead <- inlineElement cmdTypeOf
                       tRest <- term
                       d     <- description
@@ -135,9 +148,9 @@ definitionList cmdTypeOf = many1 definition >>= return . DefinitionList
                        xs <- description
                        return (x:xs)
                     <|>
-                    try ( do newline
-                             char ':'
-                             ws
+                    try ( do _  <- newline
+                             _  <- char ':'
+                             _  <- ws
                              xs <- description
                              return (Text "\n" : xs)
                         )
@@ -149,23 +162,19 @@ definitionList cmdTypeOf = many1 definition >>= return . DefinitionList
                     "description of term"
 
 
-pdata :: Parser BlockElement
-pdata = do try (string "<![PDATA[")
-           many (oneOf " \t\n")
-           x <- pdata'
-           return (Preformatted [Text x])
+verbatim ∷ Parser BlockElement
+verbatim = try (string "<!verbatim[") *>
+           many (oneOf " \t\n")       *>
+           (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim')
     where
-      pdata' :: Parser String
-      pdata' = do try (many (oneOf " \t\n") >> string "]]>")
-                  return []
-               <|>
-               do x  <- anyChar
-                  xs <- pdata'
-                  return (x:xs)
+      verbatim' :: Parser String
+      verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure []
+                  <|>
+                  ((:) <$> anyChar ⊛ verbatim')
 
 
 leadingSpaced :: CommandTypeOf -> Parser BlockElement
-leadingSpaced cmdTypeOf = (char ' ' >> leadingSpaced' >>= return . Preformatted)
+leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced')
                           <?>
                           "leading space"
     where
@@ -174,42 +183,39 @@ leadingSpaced cmdTypeOf = (char ' ' >> leadingSpaced' >>= return . Preformatted)
                           xs <- leadingSpaced'
                           return (x:xs)
                        <|>
-                       try ( newline
-                             >>
-                             char ' '
-                             >>
-                             leadingSpaced'
-                             >>=
-                             return . (Text "\n" :)
+                       try ( liftM (Text "\n" :) ( newline
+                                                   >>
+                                                   char ' '
+                                                   >>
+                                                   leadingSpaced'
+                                                 )
                            )
                        <|>
                        return []
 
 
-blockCommand :: Parser BlockElement
-blockCommand = pzero -- not implemented
-
-
 paragraph :: CommandTypeOf -> Parser BlockElement
-paragraph cmdTypeOf = paragraph' >>= return . Paragraph
+paragraph cmdTypeOf = liftM Paragraph paragraph'
     where
       paragraph' :: Parser [InlineElement]
       paragraph' = do x  <- inlineElement cmdTypeOf
-                      xs <- try ( do newline
-                                     eof
+                      xs <- try ( do _ <- newline
+                                     _ <- eof
                                      return []
                                   -- \n で文字列が終はってゐたら、ここ
                                   -- で終了。
                                 )
                             <|>
-                            try ( do newline
-                                     ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
+                            try ( do _  <- newline
+                                     _  <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
                                      ys <- (paragraph' <|> return [])
                                      return (Text "\n" : ys)
                                   -- \n があり、その次に \n または
-                                  -- blockSymbols があれば、fail して
-                                  -- 最初の newline を讀んだ所まで卷き
-                                  -- 戻す。
+                                  -- blockSymbols があれば、fail して最
+                                  -- 初の newline を讀んだ所まで卷き戻
+                                  -- す。oneOf が一文字消費しているので、
+                                  -- <|> は右辺を適用せずに try まで戻
+                                  -- る。
                                 )
                             <|>
                             paragraph'
@@ -227,77 +233,78 @@ blockCmd cmdTypeOf
                   Just BlockCommandType
                       -> do xs <- contents
                             closeTag tagName
-                            return $ BlockCmd BlockCommand {
+                            return $ BlockCmd BlockCommand {
                                          bCmdName       = tagName
                                        , bCmdAttributes = tagAttrs
                                        , bCmdContents   = xs
                                        }
-                  _   -> pzero
+                  Just InlineCommandType
+                      -> pzero
+                  _   -> return $ undefinedCmdErr tagName
       )
       <|>
       (try $ do (tagName, tagAttrs) <- emptyTag
                 case cmdTypeOf tagName of
                   Just BlockCommandType
-                      -> return $ BlockCmd BlockCommand {
+                      -> return $ BlockCmd BlockCommand {
                                          bCmdName       = tagName
                                        , bCmdAttributes = tagAttrs
                                        , bCmdContents   = []
                                        }
-                  _   -> pzero
+                  Just InlineCommandType
+                      -> pzero
+                  _   -> return $ undefinedCmdErr tagName
       )
       <?>
       "block command"
     where
-      contents :: Parser [BlockElement]
-      contents = do x  <- try $ blockElement cmdTypeOf
-                    xs <- contents
-                    return (x:xs)
+      contents  Parser [BlockElement]
+      contents = ((:) <$> blockElement cmdTypeOf ⊛ contents)
+                 <|>
+                 (newline *> contents)
                  <|>
-                 (comment >> contents)
+                 (comment *> contents)
                  <|>
-                 return []
+                 pure []
 
+      undefinedCmdErr ∷ Text → BlockElement
+      undefinedCmdErr name
+          = Div [("class", "error")]
+            [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕
+                                      "Make sure you haven't mistyped.")
+                               ])
+            ]
 
 inlineElement :: CommandTypeOf -> Parser InlineElement
 inlineElement cmdTypeOf
     = try $ do skipMany comment
-               foldr (<|>) pzero [ cdata
+               foldr (<|>) pzero [ nowiki
                                  , apostrophes cmdTypeOf
                                  , text
+                                 , objLink
                                  , pageLink
+                                 , extLink
                                  , inlineCmd cmdTypeOf
                                  ]
 
-
-cdata :: Parser InlineElement
-cdata = try (string "<![CDATA[") >> cdata' >>= return . Text
+nowiki ∷ Parser InlineElement
+nowiki = Text ∘ T.pack <$> (try (string "<!nowiki[") *> nowiki')
     where
-      cdata' :: Parser String
-      cdata' = do try (string "]]>")
-                  return []
-               <|>
-               do x  <- anyChar
-                  xs <- cdata'
-                  return (x:xs)
-
-
-text :: Parser InlineElement
-text = ( char ':'
-         >>
-         many (noneOf ('\n':inlineSymbols))
-         >>=
-         return . Text . (':' :)
-         -- 定義リストとの關係上、コロンは先頭にしか來れない。
-       )
+      nowiki' ∷ Parser String
+      nowiki' = (try (string "]>") *> pure [])
+                <|>
+                ((:) <$> anyChar ⊛ nowiki')
+
+text ∷ Parser InlineElement
+text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *>
+                                      many (noneOf ('\n':inlineSymbols))
+                                    ))
+       -- 定義リストとの關係上、コロンは先頭にしか來られない。
        <|>
-       ( many1 (noneOf ('\n':inlineSymbols))
-         >>=
-         return . Text
-       )
+       (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols))))
        <?>
        "text"
 
-
 apostrophes :: CommandTypeOf -> Parser InlineElement
 apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
     where
@@ -324,44 +331,63 @@ apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4,
       apos n = count n (char '\'') >> notFollowedBy (char '\'')
 
 
-pageLink :: Parser InlineElement
-pageLink = do try (string "[[")
-              page     <- option Nothing 
-                          (many1 (noneOf "#|]") >>= return . Just)
-              fragment <- option Nothing
-                          (char '#' >> many1 (noneOf "|]") >>= return . Just)
-              text     <- option Nothing
-                          (char '|' >> many1 (satisfy (/= ']')) >>= return . Just)
-
-              case (page, fragment) of
-                (Nothing, Nothing) -> pzero
-                (_, _)             -> return ()
-
-              string "]]"
-              return $ PageLink page fragment text
+objLink ∷ Parser InlineElement
+objLink = do void $ try (string "[[[")
+             page  ← many1 (noneOf "|]")
+             label ← option Nothing $
+                     Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+             void $ string "]]]"
+             pure $ ObjectLink (T.pack page) (T.pack <$> label)
+          <?>
+          "object link"
+
+pageLink ∷ Parser InlineElement
+pageLink = do void $ try (string "[[")
+              page     ← option Nothing $
+                         Just <$> many1 (noneOf "#|]")
+              fragment ← option Nothing $
+                         Just <$> (char '#' *> many1 (noneOf "|]"))
+              label    ← option Nothing $
+                         Just <$> (char '|' *> many1 (satisfy (≠ ']')))
+              when (isNothing page ∧ isNothing fragment) (∅)
+              void $ string "]]"
+              pure $ PageLink (T.pack <$> page    )
+                              (T.pack <$> fragment)
+                              (T.pack <$> label   )
            <?>
            "page link"
 
+extLink ∷ Parser InlineElement
+extLink = do void $ char '['
+             uriStr ← many1 (noneOf " \t]")
+             void $ skipMany (oneOf " \t")
+             label ← option Nothing $
+                     Just <$> many1 (noneOf "]")
+             case parseURI uriStr of
+               Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label))
+               Nothing  → pzero <?> "absolute URI"
+          <?>
+          "external link"
 
-inlineCmd :: CommandTypeOf -> Parser InlineElement
+inlineCmd ∷ CommandTypeOf → Parser InlineElement
 inlineCmd cmdTypeOf
-    = (try $ do (tagName, tagAttrs) <- openTag
+    = (try $ do (tagName, tagAttrs)  openTag
                 case cmdTypeOf tagName of
                   Just InlineCommandType
-                      -> do xs <- contents
-                            closeTag tagName
-                            return $ InlineCmd $ InlineCommand {
+                      → do xs ← contents
+                           closeTag tagName
+                           pure $ InlineCmd InlineCommand {
                                          iCmdName       = tagName
                                        , iCmdAttributes = tagAttrs
                                        , iCmdContents   = xs
                                        }
-                  _   -> pzero
+                  _    pzero
       )
       <|>
       (try $ do (tagName, tagAttrs) <- emptyTag
                 case cmdTypeOf tagName of
                   Just InlineCommandType
-                      -> return $ InlineCmd InlineCommand {
+                      -> return $ InlineCmd InlineCommand {
                                          iCmdName       = tagName
                                        , iCmdAttributes = tagAttrs
                                        , iCmdContents   = []
@@ -371,62 +397,58 @@ inlineCmd cmdTypeOf
       <?>
       "inline command"
     where
-      contents :: Parser [InlineElement]
-      contents = do x  <- inlineElement cmdTypeOf
-                    xs <- contents
-                    return (x:xs)
+      contents ∷ Parser [InlineElement]
+      contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents)
                  <|>
-                 (comment >> contents)
+                 (comment *> contents)
                  <|>
-                 (newline >> contents >>= return . (Text "\n" :))
+                 ((Text "\n" :) <$> (newline *> contents))
                  <|>
-                 return []
-
-
-openTag :: Parser (String, [Attribute])
-openTag = try $ do char '<'
-                   many space
-                   name  <- many1 letter
-                   many space
-                   attrs <- many $ do attr <- tagAttr
-                                      many space
-                                      return attr
-                   char '>'
-                   return (name, attrs)
-
-
-emptyTag :: Parser (String, [Attribute])
-emptyTag = try $ do char '<'
-                    many space
-                    name  <- many1 letter
-                    many space
-                    attrs <- many $ do attr <- tagAttr
-                                       many space
-                                       return attr
-                    char '/'
-                    many space
-                    char '>'
-                    return (name, attrs)
-
-
-closeTag :: String -> Parser ()
-closeTag name = try $ do char '<'
-                         many space
-                         char '/'
-                         many space
-                         string name
-                         many space
-                         char '>'
-                         return ()
-
-
-tagAttr :: Parser (String, String)
-tagAttr = do name  <- many1 letter
-             char '='
-             char '"'
-             value <- many (satisfy (/= '"'))
-             char '"'
-             return (name, value)
+                 pure []
+
+openTag ∷ Parser (Text, [Attribute])
+openTag = try $ do void $ char '<'
+                   void $ many space
+                   name ← many1 letter
+                   void $ many space
+                   attrs ← many $ do attr ← tagAttr
+                                     void $ many space
+                                     pure attr
+                   void $ char '>'
+                   return (T.pack name, attrs)
+
+emptyTag ∷ Parser (Text, [Attribute])
+emptyTag = try $ do void $ char '<'
+                    void $ many space
+                    name ← many1 letter
+                    void $ many space
+                    attrs ← many $ do attr ← tagAttr
+                                      void $ many space
+                                      pure attr
+                    void $ char '/'
+                    void $ many space
+                    void $ char '>'
+                    return (T.pack name, attrs)
+
+closeTag ∷ Text → Parser ()
+closeTag (T.unpack → name)
+    = try ( char '<'    *>
+            many space  *>
+            char '/'    *>
+            many space  *>
+            string name *>
+            many space  *>
+            char '>'    *>
+            pure ()
+          )
+
+tagAttr ∷ Parser (CI Text, Text)
+tagAttr = do name ← many1 letter
+             void $ char '='
+             void $ char '"'
+             value ← many (satisfy (≠ '"'))
+             void $ char '"'
+             return (CI.mk $ T.pack name, T.pack value)
 
 
 comment :: Parser ()
@@ -435,14 +457,13 @@ comment = (try (string "<!--") >> skipTillEnd 1)
           "comment"
     where
       skipTillEnd :: Int -> Parser ()
-      skipTillEnd level = ( (try (string "<!--") >> skipTillEnd (level + 1))
-                            <|>
-                            (try (string "-->") >> case level of
-                                                     1 -> return ()
-                                                     n -> skipTillEnd (n - 1))
-                            <|>
-                            (anyChar >> skipTillEnd level)
-                          )
+      skipTillEnd level = (try (string "<!--") >> skipTillEnd (level + 1))
+                          <|>
+                          (try (string "-->") >> case level of
+                                                   1 -> return ()
+                                                   n -> skipTillEnd (n - 1))
+                          <|>
+                          (anyChar >> skipTillEnd level)
 
 
 blockSymbols :: [Char]
@@ -461,7 +482,6 @@ ws = skipMany ( (oneOf " \t" >> return ())
 
 -- end of line
 eol :: Parser ()
-eol = ( (newline >> return ())
-        <|>
-        eof
-      )
+eol = (newline >> return ())
+      <|>
+      eof