X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=Network%2FHTTP%2FLucu%2FParser%2FHttp.hs;h=72d8ca1721ae7908a94cd1b2445eff1aaa16e1c8;hb=ece223c516e66223ef1d5d8e6bbe4054a235d983;hp=cb21d299418671fccfe308d13a6befe3f75ed9be;hpb=8e78bc83bfe67a376293c346ae0b30f1a684c787;p=Lucu.git diff --git a/Network/HTTP/Lucu/Parser/Http.hs b/Network/HTTP/Lucu/Parser/Http.hs index cb21d29..72d8ca1 100644 --- a/Network/HTTP/Lucu/Parser/Http.hs +++ b/Network/HTTP/Lucu/Parser/Http.hs @@ -1,107 +1,165 @@ +{-# LANGUAGE + OverloadedStrings + , UnicodeSyntax + #-} -- |This is an auxiliary parser utilities for parsing things related -- on HTTP protocol. -- -- In general you don't have to use this module directly. module Network.HTTP.Lucu.Parser.Http ( isCtl + , isText , isSeparator , isChar , isToken + , isSPHT + , listOf - , token + + , crlf + , sp , lws - , text - , separator + + , token + , separators , quotedStr + , qvalue ) where +import Control.Applicative +import Control.Monad +import Data.Ascii (Ascii) +import qualified Data.Ascii as A +import Data.Attoparsec.Char8 as P hiding (scan) +import qualified Data.Attoparsec.FastSet as FS +import qualified Data.ByteString.Char8 as BS +import Network.HTTP.Lucu.Parser +import Prelude.Unicode -import Data.List -import Network.HTTP.Lucu.Parser - --- |@'isCtl' c@ is True iff @0x20 <= @c@ < 0x7F@. -isCtl :: Char -> Bool +-- |@'isCtl' c@ returns 'False' iff @0x20 <= c < 0x7F@. +isCtl ∷ Char → Bool +{-# INLINE isCtl #-} isCtl c - | c < '\x1f' = True - | c >= '\x7f' = True - | otherwise = False - --- |@'isSeparator' c@ is True iff c is one of HTTP separators. -isSeparator :: Char -> Bool -isSeparator '(' = True -isSeparator ')' = True -isSeparator '<' = True -isSeparator '>' = True -isSeparator '@' = True -isSeparator ',' = True -isSeparator ';' = True -isSeparator ':' = True -isSeparator '\\' = True -isSeparator '"' = True -isSeparator '/' = True -isSeparator '[' = True -isSeparator ']' = True -isSeparator '?' = True -isSeparator '=' = True -isSeparator '{' = True -isSeparator '}' = True -isSeparator ' ' = True -isSeparator '\t' = True -isSeparator _ = False - --- |@'isChar' c@ is True iff @c <= 0x7f@. -isChar :: Char -> Bool -isChar c - | c <= '\x7f' = True - | otherwise = False - --- |@'isToken' c@ is equivalent to @not ('isCtl' c || 'isSeparator' + | c ≤ '\x1f' = True + | c > '\x7f' = True + | otherwise = False + +-- |@'isText'@ is equivalent to @'not' '.' 'isCtl'@ +isText ∷ Char → Bool +{-# INLINE isText #-} +isText = (¬) ∘ isCtl + +-- |@'isSeparator' c@ returns 'True' iff c is one of the HTTP +-- separators. +isSeparator ∷ Char → Bool +{-# INLINE isSeparator #-} +isSeparator = flip FS.memberChar set + where + {-# NOINLINE set #-} + set = FS.charClass "()<>@,;:\\\"/[]?={}\x20\x09" + +-- |@'isChar' c@ returns 'True' iff @c <= 0x7f@. +isChar ∷ Char → Bool +{-# INLINE isChar #-} +isChar = (≤ '\x7F') + +-- |@'isToken' c@ is equivalent to @not ('isCtl' c '||' 'isSeparator' -- c)@ -isToken :: Char -> Bool -isToken c = c `seq` - not (isCtl c || isSeparator c) - --- |@'listOf' p@ is similar to @'Network.HTTP.Lucu.Parser.sepBy' p --- ('Network.HTTP.Lucu.Parser.char' \',\')@ but it allows any --- occurrences of LWS before and after each tokens. -listOf :: Parser a -> Parser [a] -listOf p = p `seq` - do many lws - sepBy p $! do many lws - char ',' - many lws - --- |'token' is equivalent to @'Network.HTTP.Lucu.Parser.many1' $ --- 'Network.HTTP.Lucu.Parser.satisfy' 'isToken'@ -token :: Parser String -token = many1 $! satisfy isToken - --- |'lws' is an HTTP LWS: @'Network.HTTP.Lucu.Parser.crlf'? --- ('Network.HTTP.Lucu.Parser.sp' | 'Network.HTTP.Lucu.Parser.ht')+@ -lws :: Parser String -lws = do s <- option "" crlf - xs <- many1 (sp <|> ht) - return (s ++ xs) - --- |'text' accepts one character which doesn't satisfy 'isCtl'. -text :: Parser Char -text = satisfy (\ c -> not (isCtl c)) - --- |'separator' accepts one character which satisfies 'isSeparator'. -separator :: Parser Char -separator = satisfy isSeparator +isToken ∷ Char → Bool +{-# INLINE isToken #-} +isToken c = (¬) (isCtl c ∨ isSeparator c) + +-- |@'listOf' p@ is similar to @'sepBy' p ('char' \',\')@ but it +-- allows any occurrences of 'lws' before and after each tokens. +listOf ∷ Parser a → Parser [a] +{-# INLINEABLE listOf #-} +listOf p + = do skipMany lws + p `sepBy` do skipMany lws + void $ char ',' + skipMany lws + + "listOf" + +-- |'token' is almost the same as @'takeWhile1' 'isToken'@ +token ∷ Parser Ascii +{-# INLINE token #-} +token = (A.unsafeFromByteString <$> takeWhile1 isToken) + + "token" + +-- |The CRLF: 0x0D 0x0A. +crlf ∷ Parser () +{-# INLINE crlf #-} +crlf = (string "\x0D\x0A" *> return ()) + + "crlf" + +-- |The SP: 0x20. +sp ∷ Parser () +{-# INLINE sp #-} +sp = char '\x20' *> return () + +-- |HTTP LWS: crlf? (sp | ht)+ +lws ∷ Parser () +{-# INLINEABLE lws #-} +lws = (option () crlf *> void (takeWhile1 isSPHT)) + + "lws" + +-- |Returns 'True' for SP and HT. +isSPHT ∷ Char → Bool +{-# INLINE isSPHT #-} +isSPHT '\x20' = True +isSPHT '\x09' = True +isSPHT _ = False + +-- |@'separators'@ is almost the same as @'takeWhile1' 'isSeparator'@. +separators ∷ Parser Ascii +{-# INLINE separators #-} +separators = (A.unsafeFromByteString <$> takeWhile1 isSeparator) + + "separators" -- |'quotedStr' accepts a string surrounded by double quotation -- marks. Quotes can be escaped by backslashes. -quotedStr :: Parser String -quotedStr = do char '"' - xs <- many (qdtext <|> quotedPair) - char '"' - return $ foldr (++) "" xs +quotedStr ∷ Parser Ascii +{-# INLINEABLE quotedStr #-} +quotedStr = do void $ char '"' + xs ← P.many (qdtext <|> quotedPair) + void $ char '"' + return $ A.unsafeFromByteString $ BS.pack xs + + "quotedStr" where - qdtext = do c <- satisfy (/= '"') - return [c] + qdtext ∷ Parser Char + {-# INLINE qdtext #-} + qdtext = satisfy (\c → c ≢ '"' ∧ (¬) (isCtl c)) + + "qdtext" + + quotedPair ∷ Parser Char + {-# INLINE quotedPair #-} + quotedPair = (char '\\' *> satisfy isChar) + + "quotedPair" - quotedPair = do q <- char '\\' - c <- satisfy isChar - return [c] +-- |'qvalue' accepts a so-called qvalue. +qvalue ∷ Parser Double +{-# INLINEABLE qvalue #-} +qvalue = ( do x ← char '0' + xs ← option "" $ + do y ← char '.' + ys ← atMost 3 digit + return (y:ys) + return $ read (x:xs) + <|> + do x ← char '1' + xs ← option "" $ + do y ← char '.' + ys ← atMost 3 (char '0') + return (y:ys) + return $ read (x:xs) + ) + + "qvalue"