X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FParser.hs;h=34953f58c9dc5de637759c671f672b02815aa59e;hb=8bdd1da1ee1f3e453dbe2bce246618e12e26d30c;hp=3fa4c150e669dd088112efaf4f7f457fb36add83;hpb=3c7a58ab749a55a30466a033b170536bcdf18b98;p=Lucu.git diff --git a/Network/HTTP/Lucu/Parser.hs b/Network/HTTP/Lucu/Parser.hs index 3fa4c15..34953f5 100644 --- a/Network/HTTP/Lucu/Parser.hs +++ b/Network/HTTP/Lucu/Parser.hs @@ -1,130 +1,324 @@ +-- |Yet another parser combinator. This is mostly a subset of +-- "Text.ParserCombinators.Parsec" but there are some differences: +-- +-- * This parser works on 'Data.ByteString.Base.LazyByteString' +-- instead of 'Prelude.String'. +-- +-- * Backtracking is the only possible behavior so there is no \"try\" +-- action. +-- +-- * On success, the remaining string is returned as well as the +-- parser result. +-- +-- * You can choose whether to treat reaching EOF (trying to eat one +-- more letter at the end of string) a fatal error or to treat it a +-- normal failure. If a fatal error occurs, the entire parsing +-- process immediately fails without trying any backtracks. The +-- default behavior is to treat EOF fatal. +-- +-- In general, you don't have to use this module directly. module Network.HTTP.Lucu.Parser - ( Parser(..) - , parse -- Parser a -> ByteString -> Maybe (a, ByteString) - , anyChar -- Parser Char - , satisfy -- (Char -> Bool) -> Parser Char - , char -- Char -> Parser Char - , string -- String -> Parser String - , (<|>) -- Parser a -> Parser a -> Parser a - , oneOf -- [Char] -> Parser Char - , digit -- Parser Char - , notFollowedBy -- Parser a -> Parser () - , many -- Parser a -> Parser [a] - , many1 -- Parser a -> Parser [a] - , manyTill -- Parser a -> Parser end -> Parser [a] - , many1Till -- Parser a -> Parser end -> Parser [a] - , option -- a -> Parser a -> Parser a - , sp -- Parser Char - , ht -- Parser Char - , crlf -- Parser String + ( Parser + , ParserResult(..) + + , failP + + , parse + , parseStr + + , anyChar + , eof + , allowEOF + , satisfy + , char + , string + , (<|>) + , choice + , oneOf + , digit + , hexDigit + , notFollowedBy + , many + , manyChar + , many1 + , count + , option + , sepBy + , sepBy1 + + , sp + , ht + , crlf ) where -import Control.Monad -import Control.Monad.State -import qualified Data.ByteString.Lazy.Char8 as B -import Data.ByteString.Lazy.Char8 (ByteString) - -data Parser a = Parser { - runParser :: State ByteString (Maybe a) +import Control.Monad.State.Strict hiding (state) +import qualified Data.ByteString.Lazy as Lazy (ByteString) +import qualified Data.ByteString.Lazy.Char8 as B hiding (ByteString) +import qualified Data.Foldable as Fold +import Data.Int +import qualified Data.Sequence as Seq +import Data.Sequence (Seq, (|>)) + +-- |@'Parser' a@ is obviously a parser which parses and returns @a@. +newtype Parser a = Parser { + runParser :: State ParserState (ParserResult a) } +data ParserState + = PST { + pstInput :: Lazy.ByteString + , pstIsEOFFatal :: !Bool + } + deriving (Eq, Show) + + +data ParserResult a = Success !a + | IllegalInput -- 受理出來ない入力があった + | ReachedEOF -- 限界を越えて讀まうとした + deriving (Eq, Show) + + -- (>>=) :: Parser a -> (a -> Parser b) -> Parser b instance Monad Parser where - p >>= f = Parser $ do saved <- get -- 失敗した時の爲に状態を保存 - result <- runParser p - case result of - Just a -> runParser (f a) - Nothing -> do put saved -- 状態を復歸 - return Nothing - return = Parser . return . Just - fail _ = Parser $ return Nothing + p >>= f = Parser $! do saved <- get -- 失敗した時の爲に状態を保存 + result <- runParser p + case result of + Success a -> runParser (f a) + IllegalInput -> do put saved -- 状態を復歸 + return IllegalInput + ReachedEOF -> do put saved -- 状態を復歸 + return ReachedEOF + return !x = Parser $! return $! Success x + fail _ = Parser $! return $! IllegalInput + +instance Functor Parser where + fmap f p = p >>= return . f + +-- |@'failP'@ is just a synonym for @'Prelude.fail' +-- 'Prelude.undefined'@. +failP :: Parser a +failP = fail undefined + +-- |@'parse' p bstr@ parses @bstr@ with @p@ and returns @(# result, +-- remaining #)@. +parse :: Parser a -> Lazy.ByteString -> (# ParserResult a, Lazy.ByteString #) +parse !p input -- input は lazy である必要有り。 + = let (!result, state') = runState (runParser p) (PST input True) + in + (# result, pstInput state' #) -- pstInput state' も lazy である必要有り。 + +-- |@'parseStr' p str@ packs @str@ and parses it. +parseStr :: Parser a -> String -> (# ParserResult a, Lazy.ByteString #) +parseStr !p input -- input は lazy である必要有り。 + = parse p (B.pack input) -parse :: Parser a -> ByteString -> Maybe (a, ByteString) -parse p input = case runState (runParser p) input of - (Just a , input') -> Just (a, input') - (Nothing, _ ) -> Nothing +anyChar :: Parser Char +anyChar = Parser $! + do state@(PST input _) <- get + if B.null input then + return ReachedEOF + else + do put $! state { pstInput = B.tail input } + return (Success $! B.head input) -anyChar :: Parser Char -anyChar = Parser $ do input <- get - if B.null input then - return Nothing - else - do let c = B.head input - put (B.tail input) - return (Just c) +eof :: Parser () +eof = Parser $! + do PST input _ <- get + if B.null input then + return $! Success () + else + return IllegalInput + +-- |@'allowEOF' p@ makes @p@ treat reaching EOF a normal failure. +allowEOF :: Parser a -> Parser a +allowEOF !f + = Parser $! do saved@(PST _ isEOFFatal) <- get + put $! saved { pstIsEOFFatal = False } + + result <- runParser f + + state <- get + put $! state { pstIsEOFFatal = isEOFFatal } + + return result satisfy :: (Char -> Bool) -> Parser Char -satisfy f = do c <- anyChar - unless (f c) (fail "") - return c +satisfy !f + = do c <- anyChar + if f c then + return c + else + failP char :: Char -> Parser Char -char c = satisfy (== c) +char !c = satisfy (== c) string :: String -> Parser String -string str = do mapM_ char str - return str +string !str + = let bs = B.pack str + len = B.length bs + in + Parser $! + do st <- get + let (bs', rest) = B.splitAt len $ pstInput st + st' = st { pstInput = rest } + if B.length bs' < len then + return ReachedEOF + else + if bs == bs' then + do put st' + return $ Success str + else + return IllegalInput infixr 0 <|> +-- |This is the backtracking alternation. There is no non-backtracking +-- equivalent. (<|>) :: Parser a -> Parser a -> Parser a -f <|> g = Parser $ do saved <- get -- 状態を保存 - result <- runParser f - case result of - Just a -> return (Just a) - Nothing -> do put saved -- 状態を復歸 - runParser g +(!f) <|> (!g) + = Parser $! do saved <- get -- 状態を保存 + result <- runParser f + case result of + Success a -> return $! Success a + IllegalInput -> do put saved -- 状態を復歸 + runParser g + ReachedEOF -> if pstIsEOFFatal saved then + do put saved + return ReachedEOF + else + do put saved + runParser g + + +choice :: [Parser a] -> Parser a +choice = foldl (<|>) failP oneOf :: [Char] -> Parser Char -oneOf = foldl (<|>) (fail "") . map char +oneOf = foldl (<|>) failP . map char notFollowedBy :: Parser a -> Parser () -notFollowedBy p = p >>= fail "" <|> return () +notFollowedBy !p + = Parser $! do saved <- get -- 状態を保存 + result <- runParser p + case result of + Success _ -> do put saved -- 状態を復歸 + return IllegalInput + IllegalInput -> do put saved -- 状態を復歸 + return $! Success () + ReachedEOF -> do put saved -- 状態を復歸 + return $! Success () digit :: Parser Char -digit = oneOf "0123456789" +digit = do c <- anyChar + if c >= '0' && c <= '9' then + return c + else + failP + + +hexDigit :: Parser Char +hexDigit = do c <- anyChar + if (c >= '0' && c <= '9') || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F') then + return c + else + failP + + +many :: forall a. Parser a -> Parser [a] +many !p = Parser $! + do state <- get + let (# result, state' #) = many' state Seq.empty + put state' + return result + where + many' :: ParserState -> Seq a -> (# ParserResult [a], ParserState #) + many' !st !soFar + = case runState (runParser p) st of + (Success a, st') -> many' st' (soFar |> a) + (IllegalInput, _) -> (# Success (Fold.toList soFar), st #) + (ReachedEOF , _) -> if pstIsEOFFatal st then + (# ReachedEOF, st #) + else + (# Success (Fold.toList soFar), st #) + +manyChar :: Parser Char -> Parser Lazy.ByteString +manyChar !p = Parser $! + do state <- get + case scan' state 0 of + Success len + -> do let (bs, rest) = B.splitAt len (pstInput state) + state' = state { pstInput = rest } + put state' + return $ Success bs + ReachedEOF + -> if pstIsEOFFatal state then + return ReachedEOF + else + error "internal error" + _ -> error "internal error" + where + scan' :: ParserState -> Int64 -> ParserResult Int64 + scan' !st !soFar + = case runState (runParser p) st of + (Success _ , st') -> scan' st' (soFar + 1) + (IllegalInput, _ ) -> Success soFar + (ReachedEOF , _ ) -> if pstIsEOFFatal st then + ReachedEOF + else + Success soFar -many :: Parser a -> Parser [a] -many p = do x <- p - xs <- many p - return (x:xs) - <|> - return [] +many1 :: Parser a -> Parser [a] +many1 !p = do x <- p + xs <- many p + return (x:xs) -many1 :: Parser a -> Parser [a] -many1 p = do ret <- many p - case ret of - [] -> fail "" - xs -> return xs +count :: Int -> Parser a -> Parser [a] +count !n !p = Parser $! count' n p Seq.empty +-- This implementation is rather ugly but we need to make it +-- tail-recursive to avoid stack overflow. +count' :: Int -> Parser a -> Seq a -> State ParserState (ParserResult [a]) +count' 0 _ !soFar = return $! Success $! Fold.toList soFar +count' !n !p !soFar = do saved <- get + result <- runParser p + case result of + Success a -> count' (n-1) p (soFar |> a) + IllegalInput -> do put saved + return IllegalInput + ReachedEOF -> do put saved + return ReachedEOF -manyTill :: Parser a -> Parser end -> Parser [a] -manyTill p end = many $ do x <- p - end - return x +-- def may be a _|_ +option :: a -> Parser a -> Parser a +option def !p = p <|> return def -many1Till :: Parser a -> Parser end -> Parser [a] -many1Till p end = many1 $ do x <- p - end - return x +sepBy :: Parser a -> Parser sep -> Parser [a] +sepBy !p !sep = sepBy1 p sep <|> return [] -option :: a -> Parser a -> Parser a -option def p = p <|> return def + +sepBy1 :: Parser a -> Parser sep -> Parser [a] +sepBy1 !p !sep + = do x <- p + xs <- many $! sep >> p + return (x:xs) sp :: Parser Char