7 module Rakka.Wiki.Parser
12 -- FIXME: use attoparsec
13 import Control.Applicative hiding ((<|>), many)
14 import Control.Applicative.Unicode
16 import Data.CaseInsensitive (CI)
17 import qualified Data.CaseInsensitive as CI
19 import Data.Monoid.Unicode ((⊕))
20 import Data.Text (Text)
21 import qualified Data.Text as T
22 import Network.URI hiding (fragment)
23 import Prelude.Unicode
25 import Text.ParserCombinators.Parsec hiding (label)
27 type CommandTypeOf = Alternative f ⇒ Text → f CommandType
29 wikiPage :: CommandTypeOf -> Parser WikiPage
31 = do xs <- many (blockElement cmdTypeOf)
34 (newline >> return ())
40 blockElement :: CommandTypeOf -> Parser BlockElement
41 blockElement cmdTypeOf
42 = try $ do skipMany ( comment
44 (newline >> return ())
46 foldr (<|>) pzero [ heading
48 , listElement cmdTypeOf
49 , definitionList cmdTypeOf
51 , leadingSpaced cmdTypeOf
56 heading ∷ Parser BlockElement
57 heading = foldr (<|>) pzero (map heading' [1..5])
61 heading' ∷ Int → Parser BlockElement
62 heading' n = do try ( void (count n (char '=')) *>
63 notFollowedBy (char '=')
66 x ← notFollowedBy (char '=') *> anyChar
67 xs ← manyTill anyChar (try $ ws *> ( count n (char '=')
69 ("trailing " ++ replicate n '=')
74 pure ∘ Heading n $ T.pack (x:xs)
76 horizontalLine :: Parser BlockElement
77 horizontalLine = try ( do _ <- count 4 (char '-')
87 listElement :: CommandTypeOf -> Parser BlockElement
88 listElement cmdTypeOf = listElement' []
90 listElement' :: [Char] -> Parser BlockElement
94 xs <- items (stack ++ [t])
95 return (List (toType t) xs)
97 items :: [Char] -> Parser [ListItem]
98 items stack = do xs <- many1 $ inlineElement cmdTypeOf
99 nested <- option Nothing
100 $ try $ do skipMany comment
103 liftM Just (listElement' stack)
105 return $ (map Inline xs ++ map Block (catMaybes [nested])) : rest
107 (try $ do skipMany comment
116 toType :: Char -> ListType
118 toType '#' = Numbered
122 definitionList :: CommandTypeOf -> Parser BlockElement
123 definitionList cmdTypeOf = liftM DefinitionList (many1 definition)
125 definition :: Parser Definition
126 definition = do _ <- char ';'
128 tHead <- inlineElement cmdTypeOf
131 return (Definition (tHead:tRest) d)
135 term :: Parser [InlineElement]
136 term = (char ':' >> ws >> return [])
138 (newline >> char ':' >> ws >> return [])
140 do x <- inlineElement cmdTypeOf
146 description :: Parser [InlineElement]
147 description = do x <- inlineElement cmdTypeOf
151 try ( do _ <- newline
155 return (Text "\n" : xs)
158 (newline >> return [])
162 "description of term"
165 verbatim ∷ Parser BlockElement
166 verbatim = try (string "<!verbatim[") *>
167 many (oneOf " \t\n") *>
168 (Preformatted ∘ (:[]) ∘ Text ∘ T.pack <$> verbatim')
170 verbatim' :: Parser String
171 verbatim' = try (many (oneOf " \t\n") *> string "]>") *> pure []
173 ((:) <$> anyChar ⊛ verbatim')
176 leadingSpaced :: CommandTypeOf -> Parser BlockElement
177 leadingSpaced cmdTypeOf = liftM Preformatted (char ' ' >> leadingSpaced')
181 leadingSpaced' :: Parser [InlineElement]
182 leadingSpaced' = do x <- inlineElement cmdTypeOf
186 try ( liftM (Text "\n" :) ( newline
197 paragraph :: CommandTypeOf -> Parser BlockElement
198 paragraph cmdTypeOf = liftM Paragraph paragraph'
200 paragraph' :: Parser [InlineElement]
201 paragraph' = do x <- inlineElement cmdTypeOf
202 xs <- try ( do _ <- newline
205 -- \n で文字列が終はってゐたら、ここ
209 try ( do _ <- newline
210 _ <- ((oneOf ('\n':blockSymbols) >> pzero) <|> return ())
211 ys <- (paragraph' <|> return [])
212 return (Text "\n" : ys)
213 -- \n があり、その次に \n または
214 -- blockSymbols があれば、fail して最
215 -- 初の newline を讀んだ所まで卷き戻
216 -- す。oneOf が一文字消費しているので、
217 -- <|> は右辺を適用せずに try まで戻
222 -- それ以外の場合は次の inlineElement から
223 -- を讀んで見る。但し一つも無くても良い。
225 return [] -- 全部失敗したらここで終了。
229 blockCmd :: CommandTypeOf -> Parser BlockElement
231 = (try $ do (tagName, tagAttrs) <- openTag
232 case cmdTypeOf tagName of
233 Just BlockCommandType
236 return $ BlockCmd BlockCommand {
238 , bCmdAttributes = tagAttrs
241 Just InlineCommandType
243 _ -> return $ undefinedCmdErr tagName
246 (try $ do (tagName, tagAttrs) <- emptyTag
247 case cmdTypeOf tagName of
248 Just BlockCommandType
249 -> return $ BlockCmd BlockCommand {
251 , bCmdAttributes = tagAttrs
254 Just InlineCommandType
256 _ -> return $ undefinedCmdErr tagName
261 contents ∷ Parser [BlockElement]
262 contents = ((:) <$> blockElement cmdTypeOf ⊛ contents)
264 (newline *> contents)
266 (comment *> contents)
270 undefinedCmdErr ∷ Text → BlockElement
272 = Div [("class", "error")]
273 [ Block (Paragraph [Text ("The command `" ⊕ name ⊕ "' is not defined. " ⊕
274 "Make sure you haven't mistyped.")
278 inlineElement :: CommandTypeOf -> Parser InlineElement
279 inlineElement cmdTypeOf
280 = try $ do skipMany comment
281 foldr (<|>) pzero [ nowiki
282 , apostrophes cmdTypeOf
287 , inlineCmd cmdTypeOf
290 nowiki ∷ Parser InlineElement
291 nowiki = Text ∘ T.pack <$> (try (string "<!nowiki[") *> nowiki')
293 nowiki' ∷ Parser String
294 nowiki' = (try (string "]>") *> pure [])
296 ((:) <$> anyChar ⊛ nowiki')
298 text ∷ Parser InlineElement
299 text = (Text ∘ T.pack ∘ (':' :) <$> ( char ':' *>
300 many (noneOf ('\n':inlineSymbols))
302 -- 定義リストとの關係上、コロンは先頭にしか來られない。
304 (Text ∘ T.pack <$> (many1 (noneOf ('\n':inlineSymbols))))
308 apostrophes :: CommandTypeOf -> Parser InlineElement
309 apostrophes cmdTypeOf = foldr (<|>) pzero (map try [apos1, apos2, apos3, apos4, apos5])
311 apos1 = apos 1 >> return (Text "'")
314 xs <- many1 $ inlineElement cmdTypeOf
319 xs <- many1 $ inlineElement cmdTypeOf
323 apos4 = apos 4 >> return (Text "'")
326 xs <- many1 $ inlineElement cmdTypeOf
328 return (Italic [Bold xs])
330 apos :: Int -> Parser ()
331 apos n = count n (char '\'') >> notFollowedBy (char '\'')
334 objLink ∷ Parser InlineElement
335 objLink = do void $ try (string "[[[")
336 page ← many1 (noneOf "|]")
337 label ← option Nothing $
338 Just <$> (char '|' *> many1 (satisfy (≠ ']')))
340 pure $ ObjectLink (T.pack page) (T.pack <$> label)
344 pageLink ∷ Parser InlineElement
345 pageLink = do void $ try (string "[[")
346 page ← option Nothing $
347 Just <$> many1 (noneOf "#|]")
348 fragment ← option Nothing $
349 Just <$> (char '#' *> many1 (noneOf "|]"))
350 label ← option Nothing $
351 Just <$> (char '|' *> many1 (satisfy (≠ ']')))
352 when (isNothing page ∧ isNothing fragment) (∅)
354 pure $ PageLink (T.pack <$> page )
355 (T.pack <$> fragment)
360 extLink ∷ Parser InlineElement
361 extLink = do void $ char '['
362 uriStr ← many1 (noneOf " \t]")
363 void $ skipMany (oneOf " \t")
364 label ← option Nothing $
365 Just <$> many1 (noneOf "]")
366 case parseURI uriStr of
367 Just uri → char ']' *> pure (ExternalLink uri (T.pack <$> label))
368 Nothing → pzero <?> "absolute URI"
372 inlineCmd ∷ CommandTypeOf → Parser InlineElement
374 = (try $ do (tagName, tagAttrs) ← openTag
375 case cmdTypeOf tagName of
376 Just InlineCommandType
379 pure $ InlineCmd InlineCommand {
381 , iCmdAttributes = tagAttrs
387 (try $ do (tagName, tagAttrs) <- emptyTag
388 case cmdTypeOf tagName of
389 Just InlineCommandType
390 -> return $ InlineCmd InlineCommand {
392 , iCmdAttributes = tagAttrs
400 contents ∷ Parser [InlineElement]
401 contents = ((:) <$> inlineElement cmdTypeOf ⊛ contents)
403 (comment *> contents)
405 ((Text "\n" :) <$> (newline *> contents))
409 openTag ∷ Parser (Text, [Attribute])
410 openTag = try $ do void $ char '<'
414 attrs ← many $ do attr ← tagAttr
418 return (T.pack name, attrs)
420 emptyTag ∷ Parser (Text, [Attribute])
421 emptyTag = try $ do void $ char '<'
425 attrs ← many $ do attr ← tagAttr
431 return (T.pack name, attrs)
433 closeTag ∷ Text → Parser ()
434 closeTag (T.unpack → name)
445 tagAttr ∷ Parser (CI Text, Text)
446 tagAttr = do name ← many1 letter
449 value ← many (satisfy (≠ '"'))
451 return (CI.mk $ T.pack name, T.pack value)
455 comment = (try (string "<!--") >> skipTillEnd 1)
459 skipTillEnd :: Int -> Parser ()
460 skipTillEnd level = (try (string "<!--") >> skipTillEnd (level + 1))
462 (try (string "-->") >> case level of
464 n -> skipTillEnd (n - 1))
466 (anyChar >> skipTillEnd level)
469 blockSymbols :: [Char]
470 blockSymbols = " =-*#;<"
473 inlineSymbols :: [Char]
474 inlineSymbols = "<[:'"
478 ws = skipMany ( (oneOf " \t" >> return ())
485 eol = (newline >> return ())