From: PHO Date: Sun, 31 Jul 2011 03:22:07 +0000 (+0900) Subject: MIMEType.Guess X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=360baa7;p=Lucu.git MIMEType.Guess Ditz-issue: 8959dadc07db1bd363283dee401073f6e48dc7fa --- diff --git a/.gitignore b/.gitignore index 0b4ee08..00bc286 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ Setup dist report.html +Network/HTTP/Lucu/MIMEType/DefaultExtensionMap.hs + +data/CompileMimeTypes + examples/HelloWorld examples/Implanted examples/ImplantedSmall diff --git a/GNUmakefile b/GNUmakefile index 8b9ab31..3b5520e 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -4,14 +4,5 @@ CONFIGURE_ARGS = -O include cabal-package.mk -update-web: update-web-doc update-web-ditz - -update-web-doc: doc - rsync -av --delete \ - dist/doc/html/Lucu/ \ - www@nem.cielonegro.org:static.cielonegro.org/htdocs/doc/Lucu - -update-web-ditz: ditz - rsync -av --delete \ - dist/ditz/ \ - www@nem.cielonegro.org:static.cielonegro.org/htdocs/ditz/Lucu +build-hook: + $(MAKE) -C data diff --git a/Lucu.cabal b/Lucu.cabal index 36e1cd2..ec67718 100644 --- a/Lucu.cabal +++ b/Lucu.cabal @@ -60,6 +60,7 @@ Library mtl == 2.0.*, network == 2.3.*, stm == 2.2.*, + text == 0.11.*, time == 1.2.*, time-http == 0.1.*, unix == 2.4.*, diff --git a/Network/HTTP/Lucu/MIMEType/DefaultExtensionMap.hs b/Network/HTTP/Lucu/MIMEType/DefaultExtensionMap.hs deleted file mode 100644 index d6add2b..0000000 --- a/Network/HTTP/Lucu/MIMEType/DefaultExtensionMap.hs +++ /dev/null @@ -1,182 +0,0 @@ --- |This module is automatically generated from data\/mime.types. --- 'defaultExtensionMap' contains every possible pairs of an extension --- and a MIME Type. - -{- !!! WARNING !!! - This file is automatically generated. - DO NOT EDIT BY HAND OR YOU WILL REGRET -} - -module Network.HTTP.Lucu.MIMEType.DefaultExtensionMap - (defaultExtensionMap) where -import Network.HTTP.Lucu.MIMEType () -import Network.HTTP.Lucu.MIMEType.Guess -import qualified Data.Map as M - -defaultExtensionMap :: ExtMap -defaultExtensionMap - = M.fromList - [("3gp", read "application/x-3gp"), ("669", read "audio/x-mod"), - ("Z", read "application/x-compress"), - ("a", read "application/x-ar"), ("ac3", read "audio/x-ac3"), - ("ai", read "application/postscript"), - ("aif", read "audio/x-aiff"), ("aifc", read "audio/x-aiff"), - ("aiff", read "audio/x-aiff"), ("amf", read "audio/x-mod"), - ("anx", read "application/ogg"), ("ape", read "application/x-ape"), - ("asc", read "text/plain"), ("asf", read "video/x-ms-asf"), - ("atom", read "application/atom+xml"), ("au", read "audio/x-au"), - ("avi", read "video/x-msvideo"), - ("bcpio", read "application/x-bcpio"), - ("bin", read "application/octet-stream"), - ("bmp", read "image/bmp"), ("bz2", read "application/x-bzip"), - ("cabal", read "text/x-cabal"), - ("cdf", read "application/x-netcdf"), ("cgm", read "image/cgm"), - ("class", read "application/octet-stream"), - ("cpio", read "application/x-cpio"), - ("cpt", read "application/mac-compactpro"), - ("csh", read "application/x-csh"), ("css", read "text/css"), - ("dcr", read "application/x-director"), ("dif", read "video/x-dv"), - ("dir", read "application/x-director"), - ("djv", read "image/vnd.djvu"), ("djvu", read "image/vnd.djvu"), - ("dll", read "application/octet-stream"), - ("dmg", read "application/octet-stream"), - ("dms", read "application/octet-stream"), - ("doc", read "application/msword"), ("dsm", read "audio/x-mod"), - ("dtd", read "application/xml-dtd"), ("dv", read "video/x-dv"), - ("dvi", read "application/x-dvi"), - ("dxr", read "application/x-director"), - ("eps", read "application/postscript"), - ("etx", read "text/x-setext"), - ("exe", read "application/octet-stream"), - ("ez", read "application/andrew-inset"), - ("far", read "audio/x-mod"), ("flac", read "audio/x-flac"), - ("flc", read "video/x-fli"), ("fli", read "video/x-fli"), - ("flv", read "video/x-flv"), ("gdm", read "audio/x-mod"), - ("gif", read "image/gif"), ("gram", read "application/srgs"), - ("grxml", read "application/srgs+xml"), - ("gtar", read "application/x-gtar"), - ("gz", read "application/x-gzip"), - ("hdf", read "application/x-hdf"), - ("hi", read "application/octet-stream"), - ("hqx", read "application/mac-binhex40"), - ("hs", read "text/x-haskell"), ("htm", read "text/html"), - ("html", read "text/html"), - ("ice", read "x-conference/x-cooltalk"), - ("ico", read "image/x-icon"), ("ics", read "text/calendar"), - ("ief", read "image/ief"), ("ifb", read "text/calendar"), - ("iff", read "audio/x-svx"), ("iges", read "model/iges"), - ("igs", read "model/iges"), ("ilbc", read "audio/iLBC-sh"), - ("imf", read "audio/x-mod"), ("it", read "audio/x-mod"), - ("jng", read "image/x-jng"), - ("jnlp", read "application/x-java-jnlp-file"), - ("jp2", read "image/jp2"), ("jpe", read "image/jpeg"), - ("jpeg", read "image/jpeg"), ("jpg", read "image/jpeg"), - ("js", read "application/x-javascript"), - ("kar", read "audio/midi"), ("latex", read "application/x-latex"), - ("lha", read "application/octet-stream"), - ("lzh", read "application/octet-stream"), - ("m3u", read "audio/x-mpegurl"), ("m4a", read "audio/mp4a-latm"), - ("m4p", read "audio/mp4a-latm"), ("m4u", read "video/vnd.mpegurl"), - ("m4v", read "video/mpeg4"), ("mac", read "image/x-macpaint"), - ("man", read "application/x-troff-man"), - ("mathml", read "application/mathml+xml"), - ("me", read "application/x-troff-me"), ("med", read "audio/x-mod"), - ("mesh", read "model/mesh"), ("mid", read "audio/midi"), - ("midi", read "audio/midi"), ("mif", read "application/vnd.mif"), - ("mka", read "video/x-matroska"), ("mkv", read "video/x-matroska"), - ("mng", read "video/x-mng"), ("mod", read "audio/x-mod"), - ("mov", read "video/quicktime"), - ("movie", read "video/x-sgi-movie"), ("mp2", read "audio/mpeg"), - ("mp3", read "audio/mpeg"), ("mp4", read "video/mp4"), - ("mpc", read "audio/x-musepack"), ("mpe", read "video/mpeg"), - ("mpeg", read "video/mpeg"), ("mpg", read "video/mpeg"), - ("mpga", read "audio/mpeg"), ("ms", read "application/x-troff-ms"), - ("msh", read "model/mesh"), ("mtm", read "audio/x-mod"), - ("mve", read "video/x-mve"), ("mxu", read "video/vnd.mpegurl"), - ("nar", read "application/x-nar"), - ("nc", read "application/x-netcdf"), ("nist", read "audio/x-nist"), - ("nuv", read "video/x-nuv"), - ("o", read "application/octet-stream"), - ("oda", read "application/oda"), ("ogg", read "application/ogg"), - ("ogm", read "application/ogg"), ("okt", read "audio/x-mod"), - ("paf", read "audio/x-paris"), - ("pbm", read "image/x-portable-bitmap"), - ("pct", read "image/pict"), ("pdb", read "chemical/x-pdb"), - ("pdf", read "application/pdf"), - ("pgm", read "image/x-portable-graymap"), - ("pgn", read "application/x-chess-pgn"), - ("pic", read "image/pict"), ("pict", read "image/pict"), - ("png", read "image/png"), ("pnm", read "image/x-portable-anymap"), - ("pnt", read "image/x-macpaint"), - ("pntg", read "image/x-macpaint"), - ("ppm", read "image/x-portable-pixmap"), - ("ppt", read "application/vnd.ms-powerpoint"), - ("ps", read "application/postscript"), - ("qif", read "image/x-quicktime"), ("qt", read "video/quicktime"), - ("qti", read "image/x-quicktime"), - ("qtif", read "image/x-quicktime"), - ("ra", read "audio/x-pn-realaudio"), ("ram", read "text/uri-list"), - ("rar", read "application/x-rar"), - ("ras", read "image/x-sun-raster"), - ("rdf", read "application/rdf+xml"), ("rgb", read "image/x-rgb"), - ("rm", read "application/vnd.rn-realmedia"), - ("roff", read "application/x-troff"), ("rtf", read "text/rtf"), - ("rtx", read "text/richtext"), ("s3m", read "audio/x-mod"), - ("sam", read "audio/x-mod"), ("sds", read "audio/x-sds"), - ("sf", read "audio/x-ircam"), ("sgm", read "text/sgml"), - ("sgml", read "text/sgml"), ("sh", read "application/x-sh"), - ("shar", read "application/x-shar"), - ("shn", read "audio/x-shorten"), ("sid", read "audio/x-sid"), - ("silo", read "model/mesh"), ("sit", read "application/x-stuffit"), - ("skd", read "application/x-koan"), - ("skm", read "application/x-koan"), - ("skp", read "application/x-koan"), - ("skt", read "application/x-koan"), - ("smi", read "application/smil"), - ("smil", read "application/smil"), ("snd", read "audio/x-au"), - ("so", read "application/octet-stream"), - ("spc", read "application/x-spc"), - ("spl", read "application/x-futuresplash"), - ("src", read "application/x-wais-source"), - ("stm", read "audio/x-mod"), ("stx", read "audio/x-mod"), - ("sv4cpio", read "application/x-sv4cpio"), - ("sv4crc", read "application/x-sv4crc"), - ("svg", read "image/svg+xml"), ("svx", read "audio/x-svx"), - ("swf", read "application/x-shockwave-flash"), - ("swfl", read "application/x-shockwave-flash"), - ("t", read "application/x-troff"), - ("tar", read "application/x-tar"), - ("tbz", read "application/x-bzip"), - ("tcl", read "application/x-tcl"), - ("tex", read "application/x-tex"), - ("texi", read "application/x-texinfo"), - ("texinfo", read "application/x-texinfo"), - ("tgz", read "application/x-gzip"), ("tif", read "image/tiff"), - ("tiff", read "image/tiff"), ("tr", read "application/x-troff"), - ("ts", read "video/mpegts"), - ("tsv", read "text/tab-separated-values"), - ("tta", read "audio/x-ttafile"), ("txt", read "text/plain"), - ("ult", read "audio/x-mod"), ("ustar", read "application/x-ustar"), - ("vcd", read "application/x-cdlink"), ("voc", read "audio/x-voc"), - ("vrml", read "model/vrml"), - ("vxml", read "application/voicexml+xml"), - ("w64", read "audio/x-w64"), ("wav", read "audio/x-wav"), - ("wbmp", read "image/vnd.wap.wbmp"), - ("wbxml", read "application/vnd.wap.wbxml"), - ("wm", read "video/x-ms-asf"), ("wma", read "video/x-ms-asf"), - ("wml", read "text/vnd.wap.wml"), - ("wmlc", read "application/vnd.wap.wmlc"), - ("wmls", read "text/vnd.wap.wmlscript"), - ("wmlsc", read "application/vnd.wap.wmlscriptc"), - ("wmv", read "video/x-ms-asf"), ("wrl", read "model/vrml"), - ("wv", read "audio/x-wavpack"), - ("wvc", read "audio/x-wavpack-correction"), - ("wvp", read "audio/x-wavpack"), ("xbm", read "image/x-xbitmap"), - ("xcf", read "image/x-xcf"), ("xht", read "application/xhtml+xml"), - ("xhtml", read "application/xhtml+xml"), - ("xls", read "application/vnd.ms-excel"), - ("xm", read "audio/x-mod"), ("xml", read "application/xml"), - ("xpm", read "image/x-xpixmap"), ("xsl", read "application/xml"), - ("xslt", read "application/xslt+xml"), - ("xul", read "application/vnd.mozilla.xul+xml"), - ("xwd", read "image/x-xwindowdump"), - ("xyz", read "chemical/x-xyz"), ("zip", read "application/zip")] diff --git a/Network/HTTP/Lucu/MIMEType/Guess.hs b/Network/HTTP/Lucu/MIMEType/Guess.hs index 5a10bb6..2319477 100644 --- a/Network/HTTP/Lucu/MIMEType/Guess.hs +++ b/Network/HTTP/Lucu/MIMEType/Guess.hs @@ -1,5 +1,6 @@ {-# LANGUAGE - UnboxedTuples + BangPatterns + , UnboxedTuples , UnicodeSyntax #-} -- |MIME Type guessing by a file extension. This is a poor man's way @@ -14,92 +15,104 @@ module Network.HTTP.Lucu.MIMEType.Guess , serializeExtMap ) where +import Control.Applicative +import qualified Data.Ascii as A +import Data.Attoparsec.Char8 as P +import qualified Data.Attoparsec.Lazy as AL import qualified Data.ByteString.Lazy.Char8 as B import qualified Data.Map as M import Data.Map (Map) import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T +import Data.Text.Encoding import Language.Haskell.Pretty import Language.Haskell.Syntax import Network.HTTP.Lucu.MIMEType -import Network.HTTP.Lucu.Parser.Http -import Network.HTTP.Lucu.Utils +import Prelude.Unicode +import System.FilePath --- |'Data.Map.Map' from extension to MIME Type. -type ExtMap = Map String MIMEType +-- |'Map' from extension to 'MIMEType'. +type ExtMap = Map Text MIMEType -- |Guess the MIME Type of file. -guessTypeByFileName :: ExtMap -> FilePath -> Maybe MIMEType -guessTypeByFileName extMap fpath - = extMap `seq` fpath `seq` - let ext = last $ splitBy (== '.') fpath +guessTypeByFileName ∷ ExtMap → FilePath → Maybe MIMEType +guessTypeByFileName !extMap !fpath + = let ext = T.pack $ takeExtension fpath in - M.lookup ext extMap >>= return + M.lookup ext extMap -- |Read an Apache mime.types and parse it. -parseExtMapFile :: FilePath -> IO ExtMap +parseExtMapFile ∷ FilePath → IO ExtMap parseExtMapFile fpath - = fpath `seq` - do file <- B.readFile fpath - case parse (allowEOF extMapP) file of - (# Success xs, _ #) - -> return $ compile xs + = do file ← B.readFile fpath + case AL.parse extMapP file of + AL.Done _ xs → return $ compile xs + AL.Fail _ _ e → fail ("Failed to parse: " ⧺ fpath ⧺ ": " ⧺ e) - (# _, input' #) - -> let near = B.unpack $ B.take 100 input' - in - fail ("Failed to parse: " ++ fpath ++ " (near: " ++ near ++ ")") - - -extMapP :: Parser [ (MIMEType, [String]) ] -extMapP = do xs <- many (comment <|> validLine <|> emptyLine) - eof +extMapP ∷ Parser [ (MIMEType, [Text]) ] +extMapP = do xs ← P.many (comment <|> validLine <|> emptyLine) + endOfInput return $ catMaybes xs where - spc = oneOf " \t" + isSpc ∷ Char → Bool + isSpc c = c ≡ '\x20' ∨ c ≡ '\x09' - comment = many spc >> - char '#' >> - ( many $ satisfy (/= '\n') ) >> - return Nothing + comment ∷ Parser (Maybe (MIMEType, [Text])) + comment = try $ + do skipWhile isSpc + _ ← char '#' + skipWhile (≢ '\x0A') + return Nothing - validLine = do _ <- many spc - mime <- mimeTypeP - _ <- many spc - exts <- sepBy token (many spc) + validLine ∷ Parser (Maybe (MIMEType, [Text])) + validLine = try $ + do skipWhile isSpc + mime ← mimeTypeP + skipWhile isSpc + exts ← sepBy extP (skipWhile isSpc) return $ Just (mime, exts) - emptyLine = oneOf " \t\n" >> return Nothing + extP ∷ Parser Text + extP = decodeUtf8 <$> takeWhile1 (\c → (¬) (isSpc c ∨ c ≡ '\x0A')) + emptyLine ∷ Parser (Maybe (MIMEType, [Text])) + emptyLine = try $ + do skipWhile isSpc + _ ← char '\x0A' + return Nothing -compile :: [ (MIMEType, [String]) ] -> Map String MIMEType -compile = M.fromList . foldr (++) [] . map tr +compile ∷ [ (MIMEType, [Text]) ] → Map Text MIMEType +compile = M.fromList ∘ concat ∘ map tr where - tr :: (MIMEType, [String]) -> [ (String, MIMEType) ] - tr (mime, exts) = [ (ext, mime) | ext <- exts ] + tr ∷ (MIMEType, [Text]) → [ (Text, MIMEType) ] + tr (mime, exts) = [ (ext, mime) | ext ← exts ] -- |@'serializeExtMap' extMap moduleName variableName@ generates a -- Haskell source code which contains the following things: -- -- * A definition of module named @moduleName@. -- --- * @variableName :: 'ExtMap'@ whose content is a serialization of +-- * @variableName ∷ 'ExtMap'@ whose content is a serialization of -- @extMap@. -- -- The module "Network.HTTP.Lucu.MIMEType.DefaultExtensionMap" is -- surely generated using this function. -serializeExtMap :: ExtMap -> String -> String -> String +serializeExtMap ∷ ExtMap → String → String → String serializeExtMap extMap moduleName variableName - = let hsModule = HsModule undefined modName (Just exports) imports decls + = let hsModule = HsModule (⊥) modName (Just exports) imports decls modName = Module moduleName exports = [HsEVar (UnQual (HsIdent variableName))] - imports = [ HsImportDecl undefined (Module "Network.HTTP.Lucu.MIMEType") False Nothing (Just (False, [])) - , HsImportDecl undefined (Module "Network.HTTP.Lucu.MIMEType.Guess") False Nothing Nothing - , HsImportDecl undefined (Module "Data.Map") True (Just (Module "M")) Nothing + imports = [ HsImportDecl (⊥) (Module "Network.HTTP.Lucu.MIMEType") False Nothing Nothing + , HsImportDecl (⊥) (Module "Network.HTTP.Lucu.MIMEType.Guess") False Nothing Nothing + , HsImportDecl (⊥) (Module "Data.Ascii") True (Just (Module "A")) Nothing + , HsImportDecl (⊥) (Module "Data.Map") True (Just (Module "M")) Nothing + , HsImportDecl (⊥) (Module "Data.Text") True (Just (Module "T")) Nothing ] - decls = [ HsTypeSig undefined [HsIdent variableName] + decls = [ HsTypeSig (⊥) [HsIdent variableName] (HsQualType [] (HsTyCon (UnQual (HsIdent "ExtMap")))) - , HsFunBind [HsMatch undefined (HsIdent variableName) + , HsFunBind [HsMatch (⊥) (HsIdent variableName) [] (HsUnGuardedRhs extMapExp) []] ] extMapExp = HsApp (HsVar (Qual (Module "M") (HsIdent "fromList"))) (HsList records) @@ -109,13 +122,20 @@ serializeExtMap extMap moduleName variableName in comment ++ prettyPrint hsModule ++ "\n" where - records :: [HsExp] + records ∷ [HsExp] records = map record $ M.assocs extMap - record :: (String, MIMEType) -> HsExp + record ∷ (Text, MIMEType) → HsExp record (ext, mime) - = HsTuple [HsLit (HsString ext), mimeToExp mime] + = HsTuple + [ HsApp (HsVar (Qual (Module "T") (HsIdent "pack"))) + (HsLit (HsString (T.unpack ext))) + , mimeToExp mime + ] - mimeToExp :: MIMEType -> HsExp + mimeToExp ∷ MIMEType → HsExp mimeToExp mt - = HsApp (HsVar (UnQual (HsIdent "read"))) (HsLit (HsString $ show mt)) + = HsApp (HsVar (UnQual (HsIdent "parseMIMEType"))) + (HsParen + (HsApp (HsVar (Qual (Module "A") (HsIdent "unsafeFromString"))) + (HsLit (HsString $ A.toString $ printMIMEType mt)))) diff --git a/data/Makefile b/data/Makefile index 584c8d6..23c69ed 100644 --- a/data/Makefile +++ b/data/Makefile @@ -1,5 +1,10 @@ ../Network/HTTP/Lucu/MIMEType/DefaultExtensionMap.hs: mime.types CompileMimeTypes ./CompileMimeTypes $< $@ -CompileMimeTypes: CompileMimeTypes.hs - ghc --make $@ +CompileMimeTypes: + ghc --make $@ -i.. + +clean: + rm -f *.hi *.o CompileMimeTypes + +.PHONY: clean