]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube-dl
Minor documentation change
[youtube-dl.git] / youtube-dl
index 271952af821a682b698255308808d16a6c24e30d..496ae036fa7c98ed3166e0dfb16e857f4efad409 100755 (executable)
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Author: Ricardo Garcia Gonzalez
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Author: Ricardo Garcia Gonzalez
+# Author: Danny Colligan
 # License: Public domain code
 import htmlentitydefs
 import httplib
 # License: Public domain code
 import htmlentitydefs
 import httplib
@@ -17,8 +18,8 @@ import time
 import urllib
 import urllib2
 
 import urllib
 import urllib2
 
-std_headers = {        
-       'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5',
+std_headers = {
+       'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Language': 'en-us,en;q=0.5',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Language': 'en-us,en;q=0.5',
@@ -64,16 +65,17 @@ class FileDownloader(object):
        For this, file downloader objects have a method that allows
        InfoExtractors to be registered in a given order. When it is passed
        a URL, the file downloader handles it to the first InfoExtractor it
        For this, file downloader objects have a method that allows
        InfoExtractors to be registered in a given order. When it is passed
        a URL, the file downloader handles it to the first InfoExtractor it
-       finds that reports being able to handle it. The InfoExtractor returns
-       all the information to the FileDownloader and the latter downloads the
-       file or does whatever it's instructed to do.
+       finds that reports being able to handle it. The InfoExtractor extracts
+       all the information about the video or videos the URL refers to, and
+       asks the FileDownloader to process the video information, possibly
+       downloading the video.
 
        File downloaders accept a lot of parameters. In order not to saturate
        the object constructor with arguments, it receives a dictionary of
 
        File downloaders accept a lot of parameters. In order not to saturate
        the object constructor with arguments, it receives a dictionary of
-       options instead. These options are available through the get_params()
-       method for the InfoExtractors to use. The FileDownloader also registers
-       itself as the downloader in charge for the InfoExtractors that are
-       added to it, so this is a "mutual registration".
+       options instead. These options are available through the params
+       attribute for the InfoExtractors to use. The FileDownloader also
+       registers itself as the downloader in charge for the InfoExtractors
+       that are added to it, so this is a "mutual registration".
 
        Available options:
 
 
        Available options:
 
@@ -88,17 +90,20 @@ class FileDownloader(object):
        outtmpl:        Template for output names.
        ignoreerrors:   Do not stop on download errors.
        ratelimit:      Download speed limit, in bytes/sec.
        outtmpl:        Template for output names.
        ignoreerrors:   Do not stop on download errors.
        ratelimit:      Download speed limit, in bytes/sec.
+       nooverwrites:   Prevent overwriting files.
        """
 
        """
 
-       _params = None
+       params = None
        _ies = []
        _pps = []
        _ies = []
        _pps = []
+       _download_retcode = None
 
        def __init__(self, params):
                """Create a FileDownloader object with the given options."""
                self._ies = []
                self._pps = []
 
        def __init__(self, params):
                """Create a FileDownloader object with the given options."""
                self._ies = []
                self._pps = []
-               self.set_params(params)
+               self._download_retcode = 0
+               self.params = params
        
        @staticmethod
        def pmkdir(filename):
        
        @staticmethod
        def pmkdir(filename):
@@ -142,7 +147,7 @@ class FileDownloader(object):
                        return '--:--'
                return '%02d:%02d' % (eta_mins, eta_secs)
 
                        return '--:--'
                return '%02d:%02d' % (eta_mins, eta_secs)
 
-       @staticmethod
+       @staticmethod
        def calc_speed(start, now, bytes):
                dif = now - start
                if bytes == 0 or dif < 0.001: # One millisecond
        def calc_speed(start, now, bytes):
                dif = now - start
                if bytes == 0 or dif < 0.001: # One millisecond
@@ -172,16 +177,6 @@ class FileDownloader(object):
                multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
                return long(round(number * multiplier))
 
                multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
                return long(round(number * multiplier))
 
-       def set_params(self, params):
-               """Sets parameters."""
-               if type(params) != dict:
-                       raise ValueError('params: dictionary expected')
-               self._params = params
-       
-       def get_params(self):
-               """Get parameters."""
-               return self._params
-
        def add_info_extractor(self, ie):
                """Add an InfoExtractor object to the end of the list."""
                self._ies.append(ie)
        def add_info_extractor(self, ie):
                """Add an InfoExtractor object to the end of the list."""
                self._ies.append(ie)
@@ -194,8 +189,8 @@ class FileDownloader(object):
        
        def to_stdout(self, message, skip_eol=False):
                """Print message to stdout if not in quiet mode."""
        
        def to_stdout(self, message, skip_eol=False):
                """Print message to stdout if not in quiet mode."""
-               if not self._params.get('quiet', False):
-                       print u'%s%s' % (message, [u'\n', u''][skip_eol]),
+               if not self.params.get('quiet', False):
+                       print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()),
                        sys.stdout.flush()
        
        def to_stderr(self, message):
                        sys.stdout.flush()
        
        def to_stderr(self, message):
@@ -204,26 +199,24 @@ class FileDownloader(object):
        
        def fixed_template(self):
                """Checks if the output template is fixed."""
        
        def fixed_template(self):
                """Checks if the output template is fixed."""
-               return (re.search(ur'(?u)%\(.+?\)s', self._params['outtmpl']) is None)
+               return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 
        def trouble(self, message=None):
                """Determine action to take when a download problem appears.
 
                Depending on if the downloader has been configured to ignore
                download errors or not, this method may throw an exception or
 
        def trouble(self, message=None):
                """Determine action to take when a download problem appears.
 
                Depending on if the downloader has been configured to ignore
                download errors or not, this method may throw an exception or
-               not when errors are found, after printing the message. If it
-               doesn't raise, it returns an error code suitable to be returned
-               later as a program exit code to indicate error.
+               not when errors are found, after printing the message.
                """
                if message is not None:
                        self.to_stderr(message)
                """
                if message is not None:
                        self.to_stderr(message)
-               if not self._params.get('ignoreerrors', False):
+               if not self.params.get('ignoreerrors', False):
                        raise DownloadError(message)
                        raise DownloadError(message)
-               return 1
+               self._download_retcode = 1
 
        def slow_down(self, start_time, byte_counter):
                """Sleep if the download speed is over the rate limit."""
 
        def slow_down(self, start_time, byte_counter):
                """Sleep if the download speed is over the rate limit."""
-               rate_limit = self._params.get('ratelimit', None)
+               rate_limit = self.params.get('ratelimit', None)
                if rate_limit is None or byte_counter == 0:
                        return
                now = time.time()
                if rate_limit is None or byte_counter == 0:
                        return
                now = time.time()
@@ -247,74 +240,78 @@ class FileDownloader(object):
                """Report download finished."""
                self.to_stdout(u'')
 
                """Report download finished."""
                self.to_stdout(u'')
 
+       def process_info(self, info_dict):
+               """Process a single dictionary returned by an InfoExtractor."""
+               # Forced printings
+               if self.params.get('forcetitle', False):
+                       print info_dict['title'].encode(locale.getpreferredencoding())
+               if self.params.get('forceurl', False):
+                       print info_dict['url'].encode(locale.getpreferredencoding())
+                       
+               # Do nothing else if in simulate mode
+               if self.params.get('simulate', False):
+                       return
+
+               try:
+                       filename = self.params['outtmpl'] % info_dict
+                       self.report_destination(filename)
+               except (ValueError, KeyError), err:
+                       self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
+               if self.params['nooverwrites'] and os.path.exists(filename):
+                       self.to_stderr('WARNING: file exists: %s; skipping' % filename)
+                       return
+               try:
+                       self.pmkdir(filename)
+               except (OSError, IOError), err:
+                       self.trouble('ERROR: unable to create directories: %s' % str(err))
+                       return
+               try:
+                       outstream = open(filename, 'wb')
+               except (OSError, IOError), err:
+                       self.trouble('ERROR: unable to open for writing: %s' % str(err))
+                       return
+               try:
+                       self._do_download(outstream, info_dict['url'])
+                       outstream.close()
+               except (OSError, IOError), err:
+                       self.trouble('ERROR: unable to write video data: %s' % str(err))
+                       return
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self.trouble('ERROR: unable to download video data: %s' % str(err))
+                       return
+               try:
+                       self.post_process(filename, info_dict)
+               except (PostProcessingError), err:
+                       self.trouble('ERROR: postprocessing: %s' % str(err))
+                       return
+
+               return
+
        def download(self, url_list):
                """Download a given list of URLs."""
        def download(self, url_list):
                """Download a given list of URLs."""
-               retcode = 0
                if len(url_list) > 1 and self.fixed_template():
                if len(url_list) > 1 and self.fixed_template():
-                       raise SameFileError(self._params['outtmpl'])
+                       raise SameFileError(self.params['outtmpl'])
 
                for url in url_list:
                        suitable_found = False
                        for ie in self._ies:
 
                for url in url_list:
                        suitable_found = False
                        for ie in self._ies:
+                               # Go to next InfoExtractor if not suitable
                                if not ie.suitable(url):
                                        continue
                                if not ie.suitable(url):
                                        continue
+
                                # Suitable InfoExtractor found
                                suitable_found = True
                                # Suitable InfoExtractor found
                                suitable_found = True
-                               all_results = ie.extract(url)
-                               results = [x for x in all_results if x is not None]
-                               if len(results) != len(all_results):
-                                       retcode = self.trouble()
-
-                               if len(results) > 1 and self.fixed_template():
-                                       raise SameFileError(self._params['outtmpl'])
-
-                               for result in results:
-                                       # Forced printings
-                                       if self._params.get('forcetitle', False):
-                                               print result['title']
-                                       if self._params.get('forceurl', False):
-                                               print result['url']
-                                               
-                                       # Do nothing else if in simulate mode
-                                       if self._params.get('simulate', False):
-                                               continue
-
-                                       try:
-                                               filename = self._params['outtmpl'] % result
-                                               self.report_destination(filename)
-                                       except (ValueError, KeyError), err:
-                                               retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-                                               continue
-                                       try:
-                                               self.pmkdir(filename)
-                                       except (OSError, IOError), err:
-                                               retcode = self.trouble('ERROR: unable to create directories: %s' % str(err))
-                                               continue
-                                       try:
-                                               outstream = open(filename, 'wb')
-                                       except (OSError, IOError), err:
-                                               retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err))
-                                               continue
-                                       try:
-                                               self._do_download(outstream, result['url'])
-                                               outstream.close()
-                                       except (OSError, IOError), err:
-                                               retcode = self.trouble('ERROR: unable to write video data: %s' % str(err))
-                                               continue
-                                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                                               retcode = self.trouble('ERROR: unable to download video data: %s' % str(err))
-                                               continue
-                                       try:
-                                               self.post_process(filename, result)
-                                       except (PostProcessingError), err:
-                                               retcode = self.trouble('ERROR: postprocessing: %s' % str(err))
-                                               continue
 
 
+                               # Extract information from URL and process it
+                               ie.extract(url)
+
+                               # Suitable InfoExtractor had been found; go to next URL
                                break
                                break
+
                        if not suitable_found:
                        if not suitable_found:
-                               retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
+                               self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 
 
-               return retcode
+               return self._download_retcode
 
        def post_process(self, filename, ie_info):
                """Run the postprocessing chain on the given file."""
 
        def post_process(self, filename, ie_info):
                """Run the postprocessing chain on the given file."""
@@ -364,9 +361,10 @@ class InfoExtractor(object):
        Information extractors are the classes that, given a URL, extract
        information from the video (or videos) the URL refers to. This
        information includes the real video URL, the video title and simplified
        Information extractors are the classes that, given a URL, extract
        information from the video (or videos) the URL refers to. This
        information includes the real video URL, the video title and simplified
-       title, author and others. It is returned in a list of dictionaries when
-       calling its extract() method. It is a list because a URL can refer to
-       more than one video (think of playlists). The dictionaries must include
+       title, author and others. The information is stored in a dictionary
+       which is then passed to the FileDownloader. The FileDownloader
+       processes this information possibly downloading the video to the file
+       system, among other possible outcomes. The dictionaries must include
        the following fields:
 
        id:             Video identifier.
        the following fields:
 
        id:             Video identifier.
@@ -410,15 +408,6 @@ class InfoExtractor(object):
                """Sets the downloader for this IE."""
                self._downloader = downloader
        
                """Sets the downloader for this IE."""
                self._downloader = downloader
        
-       def to_stdout(self, message):
-               """Print message to stdout if downloader is not in quiet mode."""
-               if self._downloader is None or not self._downloader.get_params().get('quiet', False):
-                       print message
-       
-       def to_stderr(self, message):
-               """Print message to stderr."""
-               print >>sys.stderr, message
-
        def _real_initialize(self):
                """Real initialization process. Redefine in subclasses."""
                pass
        def _real_initialize(self):
                """Real initialization process. Redefine in subclasses."""
                pass
@@ -440,37 +429,60 @@ class YoutubeIE(InfoExtractor):
        def suitable(url):
                return (re.match(YoutubeIE._VALID_URL, url) is not None)
 
        def suitable(url):
                return (re.match(YoutubeIE._VALID_URL, url) is not None)
 
+       @staticmethod
+       def htmlentity_transform(matchobj):
+               """Transforms an HTML entity to a Unicode character."""
+               entity = matchobj.group(1)
+
+               # Known non-numeric HTML entity
+               if entity in htmlentitydefs.name2codepoint:
+                       return unichr(htmlentitydefs.name2codepoint[entity])
+
+               # Unicode character
+               mobj = re.match(ur'(?u)#(x?\d+)', entity)
+               if mobj is not None:
+                       numstr = mobj.group(1)
+                       if numstr.startswith(u'x'):
+                               base = 16
+                               numstr = u'0%s' % numstr
+                       else:
+                               base = 10
+                       return unichr(long(numstr, base))
+
+               # Unknown entity in name, return its literal representation
+               return (u'&%s;' % entity)
+
        def report_lang(self):
                """Report attempt to set language."""
        def report_lang(self):
                """Report attempt to set language."""
-               self.to_stdout(u'[youtube] Setting language')
+               self._downloader.to_stdout(u'[youtube] Setting language')
 
        def report_login(self):
                """Report attempt to log in."""
 
        def report_login(self):
                """Report attempt to log in."""
-               self.to_stdout(u'[youtube] Logging in')
+               self._downloader.to_stdout(u'[youtube] Logging in')
        
        def report_age_confirmation(self):
                """Report attempt to confirm age."""
        
        def report_age_confirmation(self):
                """Report attempt to confirm age."""
-               self.to_stdout(u'[youtube] Confirming age')
+               self._downloader.to_stdout(u'[youtube] Confirming age')
        
        def report_webpage_download(self, video_id):
                """Report attempt to download webpage."""
        
        def report_webpage_download(self, video_id):
                """Report attempt to download webpage."""
-               self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
+               self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
        
        def report_information_extraction(self, video_id):
                """Report attempt to extract video information."""
        
        def report_information_extraction(self, video_id):
                """Report attempt to extract video information."""
-               self.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
+               self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
        
        def report_video_url(self, video_id, video_real_url):
                """Report extracted video URL."""
        
        def report_video_url(self, video_id, video_real_url):
                """Report extracted video URL."""
-               self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
-
+               self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
+       
        def _real_initialize(self):
                if self._downloader is None:
                        return
 
                username = None
                password = None
        def _real_initialize(self):
                if self._downloader is None:
                        return
 
                username = None
                password = None
-               downloader_params = self._downloader.get_params()
+               downloader_params = self._downloader.params
 
                # Attempt to use provided username and password or .netrc data
                if downloader_params.get('username', None) is not None:
 
                # Attempt to use provided username and password or .netrc data
                if downloader_params.get('username', None) is not None:
@@ -485,20 +497,20 @@ class YoutubeIE(InfoExtractor):
                                else:
                                        raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
                        except (IOError, netrc.NetrcParseError), err:
                                else:
                                        raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
                        except (IOError, netrc.NetrcParseError), err:
-                               self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+                               self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
                                return
 
                                return
 
-               # No authentication to be performed
-               if username is None:
-                       return
-
                # Set language
                # Set language
-               request = urllib2.Request(self._LOGIN_URL, None, std_headers)
+               request = urllib2.Request(self._LANG_URL, None, std_headers)
                try:
                        self.report_lang()
                        urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                try:
                        self.report_lang()
                        urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+                       self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+                       return
+
+               # No authentication to be performed
+               if username is None:
                        return
 
                # Log in
                        return
 
                # Log in
@@ -514,10 +526,10 @@ class YoutubeIE(InfoExtractor):
                        self.report_login()
                        login_results = urllib2.urlopen(request).read()
                        if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                        self.report_login()
                        login_results = urllib2.urlopen(request).read()
                        if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
-                               self.to_stderr(u'WARNING: unable to log in: bad username or password')
+                               self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                                return
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                                return
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+                       self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
                        return
        
                # Confirm age
                        return
        
                # Confirm age
@@ -530,25 +542,29 @@ class YoutubeIE(InfoExtractor):
                        self.report_age_confirmation()
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        self.report_age_confirmation()
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
                        return
 
        def _real_extract(self, url):
                # Extract video id from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
                        return
 
        def _real_extract(self, url):
                # Extract video id from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
-                       self.to_stderr(u'ERROR: invalid URL: %s' % url)
-                       return [None]
+                       self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+                       return
                video_id = mobj.group(2)
 
                # Downloader parameters
                format_param = None
                if self._downloader is not None:
                video_id = mobj.group(2)
 
                # Downloader parameters
                format_param = None
                if self._downloader is not None:
-                       params = self._downloader.get_params()
+                       params = self._downloader.params
                        format_param = params.get('format', None)
 
                # Extension
                        format_param = params.get('format', None)
 
                # Extension
-               video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv')
+               video_extension = {
+                       '17': '3gp',
+                       '18': 'mp4',
+                       '22': 'mp4',
+               }.get(format_param, 'flv')
 
                # Normalize URL, including format
                normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
 
                # Normalize URL, including format
                normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
@@ -559,16 +575,16 @@ class YoutubeIE(InfoExtractor):
                        self.report_webpage_download(video_id)
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        self.report_webpage_download(video_id)
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err))
-                       return [None]
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       return
                self.report_information_extraction(video_id)
                
                # "t" param
                mobj = re.search(r', "t": "([^"]+)"', video_webpage)
                if mobj is None:
                self.report_information_extraction(video_id)
                
                # "t" param
                mobj = re.search(r', "t": "([^"]+)"', video_webpage)
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract "t" parameter')
-                       return [None]
-               video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
+                       self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
+                       return
+               video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
                if format_param is not None:
                        video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
                self.report_video_url(video_id, video_real_url)
                if format_param is not None:
                        video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
                self.report_video_url(video_id, video_real_url)
@@ -576,38 +592,39 @@ class YoutubeIE(InfoExtractor):
                # uploader
                mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
                if mobj is None:
                # uploader
                mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract uploader nickname')
-                       return [None]
+                       self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+                       return
                video_uploader = mobj.group(1)
 
                # title
                mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
                if mobj is None:
                video_uploader = mobj.group(1)
 
                # title
                mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract video title')
-                       return [None]
+                       self._downloader.trouble(u'ERROR: unable to extract video title')
+                       return
                video_title = mobj.group(1).decode('utf-8')
                video_title = mobj.group(1).decode('utf-8')
-               video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title)
+               video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
                video_title = video_title.replace(os.sep, u'%')
 
                # simplified title
                simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
                simple_title = simple_title.strip(ur'_')
 
                video_title = video_title.replace(os.sep, u'%')
 
                # simplified title
                simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
                simple_title = simple_title.strip(ur'_')
 
-               # Return information
-               return [{
+               # Process video information
+               self._downloader.process_info({
                        'id':           video_id.decode('utf-8'),
                        'url':          video_real_url.decode('utf-8'),
                        'uploader':     video_uploader.decode('utf-8'),
                        'title':        video_title,
                        'stitle':       simple_title,
                        'ext':          video_extension.decode('utf-8'),
                        'id':           video_id.decode('utf-8'),
                        'url':          video_real_url.decode('utf-8'),
                        'uploader':     video_uploader.decode('utf-8'),
                        'title':        video_title,
                        'stitle':       simple_title,
                        'ext':          video_extension.decode('utf-8'),
-                       }]
+                       })
 
 class MetacafeIE(InfoExtractor):
        """Information Extractor for metacafe.com."""
 
        _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
        _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 
 class MetacafeIE(InfoExtractor):
        """Information Extractor for metacafe.com."""
 
        _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
        _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
+       _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
        _youtube_ie = None
 
        def __init__(self, youtube_ie, downloader=None):
        _youtube_ie = None
 
        def __init__(self, youtube_ie, downloader=None):
@@ -620,19 +637,19 @@ class MetacafeIE(InfoExtractor):
 
        def report_disclaimer(self):
                """Report disclaimer retrieval."""
 
        def report_disclaimer(self):
                """Report disclaimer retrieval."""
-               self.to_stdout(u'[metacafe] Retrieving disclaimer')
+               self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 
        def report_age_confirmation(self):
                """Report attempt to confirm age."""
 
        def report_age_confirmation(self):
                """Report attempt to confirm age."""
-               self.to_stdout(u'[metacafe] Confirming age')
+               self._downloader.to_stdout(u'[metacafe] Confirming age')
        
        def report_download_webpage(self, video_id):
                """Report webpage download."""
        
        def report_download_webpage(self, video_id):
                """Report webpage download."""
-               self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
+               self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
        
        def report_extraction(self, video_id):
                """Report information extraction."""
        
        def report_extraction(self, video_id):
                """Report information extraction."""
-               self.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
+               self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 
        def _real_initialize(self):
                # Retrieve disclaimer
 
        def _real_initialize(self):
                # Retrieve disclaimer
@@ -641,7 +658,7 @@ class MetacafeIE(InfoExtractor):
                        self.report_disclaimer()
                        disclaimer = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        self.report_disclaimer()
                        disclaimer = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
                        return
 
                # Confirm age
                        return
 
                # Confirm age
@@ -649,27 +666,28 @@ class MetacafeIE(InfoExtractor):
                        'filters': '0',
                        'submit': "Continue - I'm over 18",
                        }
                        'filters': '0',
                        'submit': "Continue - I'm over 18",
                        }
-               request = urllib2.Request('http://www.metacafe.com/', urllib.urlencode(disclaimer_form), std_headers)
+               request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
                try:
                        self.report_age_confirmation()
                        disclaimer = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                try:
                        self.report_age_confirmation()
                        disclaimer = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
                        return
        
        def _real_extract(self, url):
                # Extract id and simplified title from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
                        return
        
        def _real_extract(self, url):
                # Extract id and simplified title from URL
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
-                       self.to_stderr(u'ERROR: invalid URL: %s' % url)
-                       return [None]
+                       self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+                       return
 
                video_id = mobj.group(1)
 
                # Check if video comes from YouTube
                mobj2 = re.match(r'^yt-(.*)$', video_id)
                if mobj2 is not None:
 
                video_id = mobj.group(1)
 
                # Check if video comes from YouTube
                mobj2 = re.match(r'^yt-(.*)$', video_id)
                if mobj2 is not None:
-                       return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
+                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
+                       return
 
                simple_title = mobj.group(2).decode('utf-8')
                video_extension = 'flv'
 
                simple_title = mobj.group(2).decode('utf-8')
                video_extension = 'flv'
@@ -680,46 +698,136 @@ class MetacafeIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err))
-                       return [None]
+                       self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+                       return
 
                # Extract URL, uploader and title from webpage
                self.report_extraction(video_id)
 
                # Extract URL, uploader and title from webpage
                self.report_extraction(video_id)
-               mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage)
+               mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage)
                if mobj is None:
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract media URL')
-                       return [None]
-               mediaURL = mobj.group(1).replace('\\', '')
+                       self._downloader.trouble(u'ERROR: unable to extract media URL')
+                       return
+               mediaURL = urllib.unquote(mobj.group(1))
 
 
-               mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage)
+               mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
                if mobj is None:
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract gdaKey')
-                       return [None]
+                       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
+                       return
                gdaKey = mobj.group(1)
 
                video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 
                mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
                if mobj is None:
                gdaKey = mobj.group(1)
 
                video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 
                mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract title')
-                       return [None]
+                       self._downloader.trouble(u'ERROR: unable to extract title')
+                       return
                video_title = mobj.group(1).decode('utf-8')
 
                video_title = mobj.group(1).decode('utf-8')
 
-               mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage)
+               mobj = re.search(r'(?ms)<li id="ChnlUsr">.*?Submitter:.*?<a .*?>(.*?)<', webpage)
                if mobj is None:
                if mobj is None:
-                       self.to_stderr(u'ERROR: unable to extract uploader nickname')
-                       return [None]
-               video_uploader = re.sub(r'<.*?>', '', mobj.group(1))
+                       self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+                       return
+               video_uploader = mobj.group(1)
 
 
-               # Return information
-               return [{
+               # Process video information
+               self._downloader.process_info({
                        'id':           video_id.decode('utf-8'),
                        'url':          video_url.decode('utf-8'),
                        'uploader':     video_uploader.decode('utf-8'),
                        'title':        video_title,
                        'stitle':       simple_title,
                        'ext':          video_extension.decode('utf-8'),
                        'id':           video_id.decode('utf-8'),
                        'url':          video_url.decode('utf-8'),
                        'uploader':     video_uploader.decode('utf-8'),
                        'title':        video_title,
                        'stitle':       simple_title,
                        'ext':          video_extension.decode('utf-8'),
-                       }]
+                       })
+
+
+class YoutubeSearchIE(InfoExtractor):
+       """Information Extractor for YouTube search queries."""
+       _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
+       _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
+       _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
+       _MORE_PAGES_INDICATOR = r'>Next</a>'
+       _youtube_ie = None
+       _max_youtube_results = 1000
+
+       def __init__(self, youtube_ie, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+               self._youtube_ie = youtube_ie
+       
+       @staticmethod
+       def suitable(url):
+               return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
+
+       def report_download_page(self, query, pagenum):
+               """Report attempt to download playlist page with given number."""
+               self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
+
+       def _real_initialize(self):
+               self._youtube_ie.initialize()
+       
+       def _real_extract(self, query):
+               mobj = re.match(self._VALID_QUERY, query)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
+                       return
+
+               prefix, query = query.split(':')
+               prefix = prefix[8:]
+               if prefix == '':
+                       self._download_n_results(query, 1)
+                       return
+               elif prefix == 'all':
+                       self._download_n_results(query, self._max_youtube_results)
+                       return
+               else:
+                       try:
+                               n = int(prefix)
+                               if n <= 0:
+                                       self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
+                                       return
+                               elif n > self._max_youtube_results:
+                                       self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
+                                       n = self._max_youtube_results
+                               self._download_n_results(query, n)
+                               return
+                       except ValueError: # parsing prefix as int fails
+                               self._download_n_results(query, 1)
+                               return
+
+       def _download_n_results(self, query, n):
+               """Downloads a specified number of results for a query"""
+
+               video_ids = []
+               already_seen = set()
+               pagenum = 1
+
+               while True:
+                       self.report_download_page(query, pagenum)
+                       result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
+                       request = urllib2.Request(result_url, None, std_headers)
+                       try:
+                               page = urllib2.urlopen(request).read()
+                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               return
+
+                       # Extract video identifiers
+                       for mobj in re.finditer(self._VIDEO_INDICATOR, page):
+                               video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
+                               if video_id not in already_seen:
+                                       video_ids.append(video_id)
+                                       already_seen.add(video_id)
+                                       if len(video_ids) == n:
+                                               # Specified n videos reached
+                                               for id in video_ids:
+                                                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+                                               return
+
+                       if self._MORE_PAGES_INDICATOR not in page:
+                               for id in video_ids:
+                                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+                               return
+
+                       pagenum = pagenum + 1
 
 class YoutubePlaylistIE(InfoExtractor):
        """Information Extractor for YouTube playlists."""
 
 class YoutubePlaylistIE(InfoExtractor):
        """Information Extractor for YouTube playlists."""
@@ -740,7 +848,7 @@ class YoutubePlaylistIE(InfoExtractor):
 
        def report_download_page(self, playlist_id, pagenum):
                """Report attempt to download playlist page with given number."""
 
        def report_download_page(self, playlist_id, pagenum):
                """Report attempt to download playlist page with given number."""
-               self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
+               self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
 
        def _real_initialize(self):
                self._youtube_ie.initialize()
 
        def _real_initialize(self):
                self._youtube_ie.initialize()
@@ -749,8 +857,8 @@ class YoutubePlaylistIE(InfoExtractor):
                # Extract playlist id
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
                # Extract playlist id
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
-                       self.to_stderr(u'ERROR: invalid url: %s' % url)
-                       return [None]
+                       self._downloader.trouble(u'ERROR: invalid url: %s' % url)
+                       return
 
                # Download playlist pages
                playlist_id = mobj.group(1)
 
                # Download playlist pages
                playlist_id = mobj.group(1)
@@ -763,8 +871,8 @@ class YoutubePlaylistIE(InfoExtractor):
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
-                               return [None]
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               return
 
                        # Extract video identifiers
                        ids_in_page = []
 
                        # Extract video identifiers
                        ids_in_page = []
@@ -777,10 +885,9 @@ class YoutubePlaylistIE(InfoExtractor):
                                break
                        pagenum = pagenum + 1
 
                                break
                        pagenum = pagenum + 1
 
-               information = []
                for id in video_ids:
                for id in video_ids:
-                       information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id))
-               return information
+                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+               return
 
 class PostProcessor(object):
        """Post Processor class.
 
 class PostProcessor(object):
        """Post Processor class.
@@ -804,15 +911,6 @@ class PostProcessor(object):
        def __init__(self, downloader=None):
                self._downloader = downloader
 
        def __init__(self, downloader=None):
                self._downloader = downloader
 
-       def to_stdout(self, message):
-               """Print message to stdout if downloader is not in quiet mode."""
-               if self._downloader is None or not self._downloader.get_params().get('quiet', False):
-                       print message
-       
-       def to_stderr(self, message):
-               """Print message to stderr."""
-               print >>sys.stderr, message
-
        def set_downloader(self, downloader):
                """Sets the downloader for this PP."""
                self._downloader = downloader
        def set_downloader(self, downloader):
                """Sets the downloader for this PP."""
                self._downloader = downloader
@@ -821,7 +919,7 @@ class PostProcessor(object):
                """Run the PostProcessor.
 
                The "information" argument is a dictionary like the ones
                """Run the PostProcessor.
 
                The "information" argument is a dictionary like the ones
-               returned by InfoExtractors. The only difference is that this
+               composed by InfoExtractors. The only difference is that this
                one has an extra field called "filepath" that points to the
                downloaded file.
 
                one has an extra field called "filepath" that points to the
                downloaded file.
 
@@ -852,7 +950,7 @@ if __name__ == '__main__':
                # Parse command line
                parser = optparse.OptionParser(
                                usage='Usage: %prog [options] url...',
                # Parse command line
                parser = optparse.OptionParser(
                                usage='Usage: %prog [options] url...',
-                               version='2009.01.31',
+                               version='INTERNAL',
                                conflict_handler='resolve',
                                )
                parser.add_option('-h', '--help',
                                conflict_handler='resolve',
                                )
                parser.add_option('-h', '--help',
@@ -881,22 +979,27 @@ if __name__ == '__main__':
                                action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
                parser.add_option('-f', '--format',
                                dest='format', metavar='FMT', help='video format code')
                                action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
                parser.add_option('-f', '--format',
                                dest='format', metavar='FMT', help='video format code')
-               parser.add_option('-b', '--best-quality',
-                               action='store_const', dest='format', help='alias for -f 18', const='18')
                parser.add_option('-m', '--mobile-version',
                                action='store_const', dest='format', help='alias for -f 17', const='17')
                parser.add_option('-m', '--mobile-version',
                                action='store_const', dest='format', help='alias for -f 17', const='17')
+               parser.add_option('-d', '--high-def',
+                               action='store_const', dest='format', help='alias for -f 22', const='22')
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-r', '--rate-limit',
                                dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
                parser.add_option('-a', '--batch-file',
                                dest='batchfile', metavar='F', help='file containing URLs to download')
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-r', '--rate-limit',
                                dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
                parser.add_option('-a', '--batch-file',
                                dest='batchfile', metavar='F', help='file containing URLs to download')
+               parser.add_option('-w', '--no-overwrites',
+                               action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
                (opts, args) = parser.parse_args()
 
                # Batch file verification
                (opts, args) = parser.parse_args()
 
                # Batch file verification
+               batchurls = []
                if opts.batchfile is not None:
                        try:
                if opts.batchfile is not None:
                        try:
-                               batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
+                               batchurls = open(opts.batchfile, 'r').readlines()
+                               batchurls = [x.strip() for x in batchurls]
+                               batchurls = [x for x in batchurls if len(x) > 0]
                        except IOError:
                                sys.exit(u'ERROR: batch file could not be read')
                all_urls = batchurls + args
                        except IOError:
                                sys.exit(u'ERROR: batch file could not be read')
                all_urls = batchurls + args
@@ -924,11 +1027,9 @@ if __name__ == '__main__':
                youtube_ie = YoutubeIE()
                metacafe_ie = MetacafeIE(youtube_ie)
                youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
                youtube_ie = YoutubeIE()
                metacafe_ie = MetacafeIE(youtube_ie)
                youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
+               youtube_search_ie = YoutubeSearchIE(youtube_ie)
 
                # File downloader
 
                # File downloader
-               charset = locale.getdefaultlocale()[1]
-               if charset is None:
-                       charset = 'ascii'
                fd = FileDownloader({
                        'usenetrc': opts.usenetrc,
                        'username': opts.username,
                fd = FileDownloader({
                        'usenetrc': opts.usenetrc,
                        'username': opts.username,
@@ -938,13 +1039,15 @@ if __name__ == '__main__':
                        'forcetitle': opts.gettitle,
                        'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                        'format': opts.format,
                        'forcetitle': opts.gettitle,
                        'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                        'format': opts.format,
-                       'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(charset))
+                       'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
                                or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                                or u'%(id)s.%(ext)s'),
                        'ignoreerrors': opts.ignoreerrors,
                        'ratelimit': opts.ratelimit,
                                or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                                or u'%(id)s.%(ext)s'),
                        'ignoreerrors': opts.ignoreerrors,
                        'ratelimit': opts.ratelimit,
+                       'nooverwrites': opts.nooverwrites,
                        })
                        })
+               fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)
                fd.add_info_extractor(metacafe_ie)
                fd.add_info_extractor(youtube_ie)
                fd.add_info_extractor(youtube_pl_ie)
                fd.add_info_extractor(metacafe_ie)
                fd.add_info_extractor(youtube_ie)