X-Git-Url: http://git.cielonegro.org/gitweb.cgi?p=Lucu.git;a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FParser%2FHttp.hs;h=65ba8b27ccb1ff66f52d6bd83a6b2af86f3980be;hp=fe54bde4c5d9f08b10ce443dd029f6d5bd838aa2;hb=9bb89434103e9a22f100d6ecb7e65a5d461e0454;hpb=86d100e294fa482456980021cca10393b9830ec1 diff --git a/Network/HTTP/Lucu/Parser/Http.hs b/Network/HTTP/Lucu/Parser/Http.hs index fe54bde..65ba8b2 100644 --- a/Network/HTTP/Lucu/Parser/Http.hs +++ b/Network/HTTP/Lucu/Parser/Http.hs @@ -1,5 +1,6 @@ {-# LANGUAGE BangPatterns + , OverloadedStrings , UnicodeSyntax #-} -- |This is an auxiliary parser utilities for parsing things related @@ -8,120 +9,156 @@ -- In general you don't have to use this module directly. module Network.HTTP.Lucu.Parser.Http ( isCtl + , isText , isSeparator , isChar , isToken + , isSPHT + , listOf - , token + + , crlf + , sp , lws - , text - , separator + + , token + , separators , quotedStr , qvalue + + , atMost ) where +import Control.Applicative +import Control.Applicative.Unicode +import Control.Monad.Unicode +import Data.Ascii (Ascii) +import qualified Data.Ascii as A +import Data.Attoparsec.Char8 as P +import qualified Data.Attoparsec.FastSet as FS +import qualified Data.ByteString.Char8 as BS +import Prelude.Unicode -import Network.HTTP.Lucu.Parser - --- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= @c@ < 0x7F@. -isCtl :: Char -> Bool +-- |@'isCtl' c@ is 'Prelude.False' iff @0x20 <= c < 0x7F@. +isCtl ∷ Char → Bool +{-# INLINE isCtl #-} isCtl c - | c < '\x1f' = True - | c >= '\x7f' = True - | otherwise = False + | c ≤ '\x1f' = True + | c > '\x7f' = True + | otherwise = False + +-- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@ +isText ∷ Char → Bool +{-# INLINE isText #-} +isText = (¬) ∘ isCtl -- |@'isSeparator' c@ is 'Prelude.True' iff c is one of HTTP -- separators. -isSeparator :: Char -> Bool -isSeparator '(' = True -isSeparator ')' = True -isSeparator '<' = True -isSeparator '>' = True -isSeparator '@' = True -isSeparator ',' = True -isSeparator ';' = True -isSeparator ':' = True -isSeparator '\\' = True -isSeparator '"' = True -isSeparator '/' = True -isSeparator '[' = True -isSeparator ']' = True -isSeparator '?' = True -isSeparator '=' = True -isSeparator '{' = True -isSeparator '}' = True -isSeparator ' ' = True -isSeparator '\t' = True -isSeparator _ = False +isSeparator ∷ Char → Bool +{-# INLINE isSeparator #-} +isSeparator = flip FS.memberChar set + where + {-# NOINLINE set #-} + set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09" -- |@'isChar' c@ is 'Prelude.True' iff @c <= 0x7f@. -isChar :: Char -> Bool -isChar c - | c <= '\x7f' = True - | otherwise = False +isChar ∷ Char → Bool +{-# INLINE isChar #-} +isChar = (≤ '\x7F') -- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator' -- c)@ -isToken :: Char -> Bool -isToken c = c `seq` - not (isCtl c || isSeparator c) - --- |@'listOf' p@ is similar to @'Network.HTTP.Lucu.Parser.sepBy' p --- ('Network.HTTP.Lucu.Parser.char' \',\')@ but it allows any --- occurrences of LWS before and after each tokens. -listOf :: Parser a -> Parser [a] -listOf !p = do _ <- many lws - sepBy p $! do _ <- many lws - _ <- char ',' - many lws - --- |'token' is equivalent to @'Network.HTTP.Lucu.Parser.many1' $ --- 'Network.HTTP.Lucu.Parser.satisfy' 'isToken'@ -token :: Parser String -token = many1 $! satisfy isToken - --- |'lws' is an HTTP LWS: @'Network.HTTP.Lucu.Parser.crlf'? --- ('Network.HTTP.Lucu.Parser.sp' | 'Network.HTTP.Lucu.Parser.ht')+@ -lws :: Parser String -lws = do s <- option "" crlf - xs <- many1 (sp <|> ht) - return (s ++ xs) - --- |'text' accepts one character which doesn't satisfy 'isCtl'. -text :: Parser Char -text = satisfy (not . isCtl) - --- |'separator' accepts one character which satisfies 'isSeparator'. -separator :: Parser Char -separator = satisfy isSeparator +isToken ∷ Char → Bool +{-# INLINE isToken #-} +isToken !c + = (¬) (isCtl c ∨ isSeparator c) + +-- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it +-- allows any occurrences of 'lws' before and after each tokens. +listOf ∷ Parser a → Parser [a] +{-# INLINEABLE listOf #-} +listOf p + = try $ + do skipMany lws + sepBy p $ do skipMany lws + _ <- char ',' + skipMany lws + +-- |'token' is similar to @'takeWhile1' 'isToken'@ +token ∷ Parser Ascii +{-# INLINE token #-} +token = A.unsafeFromByteString <$> takeWhile1 isToken + +-- |The CRLF: 0x0D 0x0A. +crlf ∷ Parser () +{-# INLINE crlf #-} +crlf = string "\x0D\x0A" ≫ return () + +-- |The SP: 0x20. +sp ∷ Parser () +{-# INLINE sp #-} +sp = char '\x20' ≫ return () + +-- |HTTP LWS: crlf? (sp | ht)+ +lws ∷ Parser () +{-# INLINEABLE lws #-} +lws = try $ + do option () crlf + _ ← satisfy isSPHT + skipWhile isSPHT + +-- |Returns 'True' for SP and HT. +isSPHT ∷ Char → Bool +{-# INLINE isSPHT #-} +isSPHT '\x20' = True +isSPHT '\x09' = True +isSPHT _ = False + +-- |@'separators'@ is similar to @'takeWhile1' 'isSeparator'@. +separators ∷ Parser Ascii +{-# INLINE separators #-} +separators = A.unsafeFromByteString <$> takeWhile1 isSeparator -- |'quotedStr' accepts a string surrounded by double quotation -- marks. Quotes can be escaped by backslashes. -quotedStr :: Parser String -quotedStr = do _ <- char '"' - xs <- many (qdtext <|> quotedPair) - _ <- char '"' - return $ foldr (++) "" xs +quotedStr ∷ Parser Ascii +{-# INLINEABLE quotedStr #-} +quotedStr = try $ + do _ ← char '"' + xs ← P.many (qdtext <|> quotedPair) + _ ← char '"' + return $ A.unsafeFromByteString $ BS.pack xs where - qdtext = do c <- satisfy (/= '"') - return [c] + qdtext ∷ Parser Char + {-# INLINE qdtext #-} + qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c)) - quotedPair = do _ <- char '\\' - c <- satisfy isChar - return [c] + quotedPair ∷ Parser Char + {-# INLINE quotedPair #-} + quotedPair = char '\\' ≫ satisfy isChar -- |'qvalue' accepts a so-called qvalue. -qvalue :: Parser Double -qvalue = do x <- char '0' - xs <- option "" - $ do y <- char '.' - ys <- many digit -- 本當は三文字までに制限 - return (y:ys) +qvalue ∷ Parser Double +{-# INLINEABLE qvalue #-} +qvalue = do x ← char '0' + xs ← option "" $ + do y ← char '.' + ys ← atMost 3 digit + return (y:ys) return $ read (x:xs) <|> - do x <- char '1' - xs <- option "" - $ do y <- char '.' - ys <- many (char '0') -- 本當は三文字までに制限 - return (y:ys) + do x ← char '1' + xs ← option "" $ + do y ← char '.' + ys ← atMost 3 (char '0') + return (y:ys) return $ read (x:xs) + +-- |@'atMost' n v@ is like @'P.many' v@ but applies the given action +-- at most @n@ times. +atMost ∷ Alternative f ⇒ Int → f a → f [a] +{-# INLINE atMost #-} +atMost 0 _ = pure [] +atMost n v = ( (:) <$> v ⊛ atMost (n-1) v ) + <|> + pure []