]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube-dl
Add --rate-limit program option
[youtube-dl.git] / youtube-dl
index 46d45d37d95630d5fd742ee16d182530a3dcb34f..4dea34376bba4b87dfcf4ff7a47fd570ec906549 100755 (executable)
@@ -25,6 +25,23 @@ std_headers = {
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
+class DownloadError(Exception):
+       """Download Error exception.
+       
+       This exception may be thrown by FileDownloader objects if they are not
+       configured to continue on errors. They will contain the appropriate
+       error message.
+       """
+       pass
+
+class SameFileError(Exception):
+       """Same File exception.
+
+       This exception will be thrown by FileDownloader objects if they detect
+       multiple files would have to be downloaded to the same file on disk.
+       """
+       pass
+
 class FileDownloader(object):
        """File Downloader class.
 
 class FileDownloader(object):
        """File Downloader class.
 
@@ -61,6 +78,7 @@ class FileDownloader(object):
        format:         Video format code.
        outtmpl:        Template for output names.
        ignoreerrors:   Do not stop on download errors.
        format:         Video format code.
        outtmpl:        Template for output names.
        ignoreerrors:   Do not stop on download errors.
+       ratelimit:      Download speed limit, in bytes/sec.
        """
 
        _params = None
        """
 
        _params = None
@@ -132,6 +150,16 @@ class FileDownloader(object):
                        return int(new_min)
                return int(rate)
 
                        return int(new_min)
                return int(rate)
 
+       @staticmethod
+       def parse_bytes(bytestr):
+               """Parse a string indicating a byte quantity into a long integer."""
+               matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
+               if matchobj is None:
+                       return None
+               number = float(matchobj.group(1))
+               multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
+               return long(round(number * multiplier))
+
        def set_params(self, params):
                """Sets parameters."""
                if type(params) != dict:
        def set_params(self, params):
                """Sets parameters."""
                if type(params) != dict:
@@ -165,22 +193,48 @@ class FileDownloader(object):
                """Determine action to take when a download problem appears.
 
                Depending on if the downloader has been configured to ignore
                """Determine action to take when a download problem appears.
 
                Depending on if the downloader has been configured to ignore
-               download errors or not, this method may exit the program or
+               download errors or not, this method may throw an exception or
                not when errors are found, after printing the message. If it
                not when errors are found, after printing the message. If it
-               doesn't exit, it returns an error code suitable to be returned
+               doesn't raise, it returns an error code suitable to be returned
                later as a program exit code to indicate error.
                """
                if message is not None:
                        self.to_stderr(message)
                if not self._params.get('ignoreerrors', False):
                later as a program exit code to indicate error.
                """
                if message is not None:
                        self.to_stderr(message)
                if not self._params.get('ignoreerrors', False):
-                       sys.exit(1)
+                       raise DownloadError(message)
                return 1
 
                return 1
 
+       def slow_down(self, start_time, byte_counter):
+               """Sleep if the download speed is over the rate limit."""
+               rate_limit = self._params.get('ratelimit', None)
+               if rate_limit is None or byte_counter == 0:
+                       return
+               now = time.time()
+               elapsed = now - start_time
+               if elapsed <= 0.0:
+                       return
+               speed = float(byte_counter) / elapsed
+               if speed > rate_limit:
+                       time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
+
+       def report_destination(self, filename):
+               """Report destination filename."""
+               self.to_stdout('[download] Destination: %s' % filename)
+       
+       def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
+               """Report download progress."""
+               self.to_stdout('\r[download] %s of %s at %s ETA %s' %
+                               (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+       
+       def report_finish(self):
+               """Report download finished."""
+               self.to_stdout('')
+
        def download(self, url_list):
                """Download a given list of URLs."""
                retcode = 0
                if len(url_list) > 1 and self.fixed_template():
        def download(self, url_list):
                """Download a given list of URLs."""
                retcode = 0
                if len(url_list) > 1 and self.fixed_template():
-                       sys.exit('ERROR: fixed output name but more than one file to download')
+                       raise SameFileError(self._params['outtmpl'])
 
                for url in url_list:
                        suitable_found = False
 
                for url in url_list:
                        suitable_found = False
@@ -195,7 +249,7 @@ class FileDownloader(object):
                                        retcode = self.trouble()
 
                                if len(results) > 1 and self.fixed_template():
                                        retcode = self.trouble()
 
                                if len(results) > 1 and self.fixed_template():
-                                       sys.exit('ERROR: fixed output name but more than one file to download')
+                                       raise SameFileError(self._params['outtmpl'])
 
                                for result in results:
 
 
                                for result in results:
 
@@ -211,6 +265,7 @@ class FileDownloader(object):
 
                                        try:
                                                filename = self._params['outtmpl'] % result
 
                                        try:
                                                filename = self._params['outtmpl'] % result
+                                               self.report_destination(filename)
                                        except (ValueError, KeyError), err:
                                                retcode = self.trouble('ERROR: invalid output template: %s' % str(err))
                                                continue
                                        except (ValueError, KeyError), err:
                                                retcode = self.trouble('ERROR: invalid output template: %s' % str(err))
                                                continue
@@ -248,12 +303,13 @@ class FileDownloader(object):
                block_size = 1024
                start = time.time()
                while True:
                block_size = 1024
                start = time.time()
                while True:
+                       # Progress message
                        percent_str = self.calc_percent(byte_counter, data_len)
                        eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
                        speed_str = self.calc_speed(start, time.time(), byte_counter)
                        percent_str = self.calc_percent(byte_counter, data_len)
                        eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
                        speed_str = self.calc_speed(start, time.time(), byte_counter)
-                       self.to_stdout('\r[download] %s of %s at %s ETA %s' %
-                                       (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+                       self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 
 
+                       # Download and write
                        before = time.time()
                        data_block = data.read(block_size)
                        after = time.time()
                        before = time.time()
                        data_block = data.read(block_size)
                        after = time.time()
@@ -264,7 +320,10 @@ class FileDownloader(object):
                        stream.write(data_block)
                        block_size = self.best_block_size(after - before, data_block_len)
 
                        stream.write(data_block)
                        block_size = self.best_block_size(after - before, data_block_len)
 
-               self.to_stdout('')
+                       # Apply rate limit
+                       self.slow_down(start, byte_counter)
+
+               self.report_finish()
                if data_len is not None and str(byte_counter) != data_len:
                        raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
 
                if data_len is not None and str(byte_counter) != data_len:
                        raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
 
@@ -344,6 +403,26 @@ class YoutubeIE(InfoExtractor):
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/'
        _NETRC_MACHINE = 'youtube'
 
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/'
        _NETRC_MACHINE = 'youtube'
 
+       def report_login(self):
+               """Report attempt to log in."""
+               self.to_stdout('[youtube] Logging in')
+       
+       def report_age_confirmation(self):
+               """Report attempt to confirm age."""
+               self.to_stdout('[youtube] Confirming age')
+       
+       def report_webpage_download(self, video_id):
+               """Report attempt to download webpage."""
+               self.to_stdout('[youtube] %s: Downloading video webpage' % video_id)
+       
+       def report_information_extraction(self, video_id):
+               """Report attempt to extract video information."""
+               self.to_stdout('[youtube] %s: Extracting video information' % video_id)
+       
+       def report_video_url(self, video_id, video_real_url):
+               """Report extracted video URL."""
+               self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url))
+
        def _real_initialize(self):
                if self._downloader is None:
                        return
        def _real_initialize(self):
                if self._downloader is None:
                        return
@@ -368,6 +447,7 @@ class YoutubeIE(InfoExtractor):
                                self.to_stderr('WARNING: parsing .netrc: %s' % str(err))
                                return
 
                                self.to_stderr('WARNING: parsing .netrc: %s' % str(err))
                                return
 
+               # No authentication to be performed
                if username is None:
                        return
 
                if username is None:
                        return
 
@@ -381,7 +461,7 @@ class YoutubeIE(InfoExtractor):
                                }
                request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
                try:
                                }
                request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
                try:
-                       self.to_stdout('[youtube] Logging in')
+                       self.report_login()
                        login_results = urllib2.urlopen(request).read()
                        if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                                self.to_stderr('WARNING: unable to log in: bad username or password')
                        login_results = urllib2.urlopen(request).read()
                        if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                                self.to_stderr('WARNING: unable to log in: bad username or password')
@@ -397,10 +477,11 @@ class YoutubeIE(InfoExtractor):
                                }
                request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
                try:
                                }
                request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
                try:
-                       self.to_stdout('[youtube] Confirming age')
+                       self.report_age_confirmation()
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       sys.exit('ERROR: unable to confirm age: %s' % str(err))
+                       self.to_stderr('ERROR: unable to confirm age: %s' % str(err))
+                       return
 
        def _real_extract(self, url):
                # Extract video id from URL
 
        def _real_extract(self, url):
                # Extract video id from URL
@@ -417,7 +498,7 @@ class YoutubeIE(InfoExtractor):
                        format_param = params.get('format', None)
 
                # Extension
                        format_param = params.get('format', None)
 
                # Extension
-               video_extension = {'18': 'mp4'}.get(format_param, 'flv')
+               video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv')
 
                # Normalize URL, including format
                normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id
 
                # Normalize URL, including format
                normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id
@@ -425,11 +506,12 @@ class YoutubeIE(InfoExtractor):
                        normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
                request = urllib2.Request(normalized_url, None, std_headers)
                try:
                        normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
                request = urllib2.Request(normalized_url, None, std_headers)
                try:
-                       self.to_stdout('[youtube] %s: Downloading video webpage' % video_id)
+                       self.report_webpage_download(video_id)
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       sys.exit('ERROR: unable to download video: %s' % str(err))
-               self.to_stdout('[youtube] %s: Extracting video information' % video_id)
+                       self.to_stderr('ERROR: unable to download video webpage: %s' % str(err))
+                       return [None]
+               self.report_information_extraction(video_id)
                
                # "t" param
                mobj = re.search(r', "t": "([^"]+)"', video_webpage)
                
                # "t" param
                mobj = re.search(r', "t": "([^"]+)"', video_webpage)
@@ -439,7 +521,7 @@ class YoutubeIE(InfoExtractor):
                video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
                if format_param is not None:
                        video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
                video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
                if format_param is not None:
                        video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-               self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url))
+               self.report_video_url(video_id, video_real_url)
 
                # uploader
                mobj = re.search(r'More From: ([^<]*)<', video_webpage)
 
                # uploader
                mobj = re.search(r'More From: ([^<]*)<', video_webpage)
@@ -485,7 +567,7 @@ if __name__ == '__main__':
                # Parse command line
                parser = optparse.OptionParser(
                                usage='Usage: %prog [options] url...',
                # Parse command line
                parser = optparse.OptionParser(
                                usage='Usage: %prog [options] url...',
-                               version='INTERNAL',
+                               version='2008.07.22',
                                conflict_handler='resolve',
                                )
                parser.add_option('-h', '--help',
                                conflict_handler='resolve',
                                )
                parser.add_option('-h', '--help',
@@ -515,9 +597,13 @@ if __name__ == '__main__':
                parser.add_option('-f', '--format',
                                dest='format', metavar='FMT', help='video format code')
                parser.add_option('-b', '--best-quality',
                parser.add_option('-f', '--format',
                                dest='format', metavar='FMT', help='video format code')
                parser.add_option('-b', '--best-quality',
-                               action='store_const', dest='video_format', help='alias for -f 18', const='18')
+                               action='store_const', dest='format', help='alias for -f 18', const='18')
+               parser.add_option('-m', '--mobile-version',
+                               action='store_const', dest='format', help='alias for -f 17', const='17')
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+               parser.add_option('-r', '--rate-limit',
+                               dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
                (opts, args) = parser.parse_args()
 
                # Conflicting, missing and erroneous options
                (opts, args) = parser.parse_args()
 
                # Conflicting, missing and erroneous options
@@ -533,6 +619,11 @@ if __name__ == '__main__':
                        sys.exit('ERROR: using title conflicts with using literal title')
                if opts.username is not None and opts.password is None:
                        opts.password = getpass.getpass('Type account password and press return:')
                        sys.exit('ERROR: using title conflicts with using literal title')
                if opts.username is not None and opts.password is None:
                        opts.password = getpass.getpass('Type account password and press return:')
+               if opts.ratelimit is not None:
+                       numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
+                       if numeric_limit is None:
+                               sys.exit('ERROR: invalid rate limit specified')
+                       opts.ratelimit = numeric_limit
 
                # Information extractors
                youtube_ie = YoutubeIE()
 
                # Information extractors
                youtube_ie = YoutubeIE()
@@ -552,10 +643,15 @@ if __name__ == '__main__':
                                or (opts.useliteral and '%(title)s-%(id)s.%(ext)s')
                                or '%(id)s.%(ext)s'),
                        'ignoreerrors': opts.ignoreerrors,
                                or (opts.useliteral and '%(title)s-%(id)s.%(ext)s')
                                or '%(id)s.%(ext)s'),
                        'ignoreerrors': opts.ignoreerrors,
+                       'ratelimit': opts.ratelimit,
                        })
                fd.add_info_extractor(youtube_ie)
                retcode = fd.download(args)
                sys.exit(retcode)
 
                        })
                fd.add_info_extractor(youtube_ie)
                retcode = fd.download(args)
                sys.exit(retcode)
 
+       except DownloadError:
+               sys.exit(1)
+       except SameFileError:
+               sys.exit('ERROR: fixed output name but more than one file to download')
        except KeyboardInterrupt:
                sys.exit('\nERROR: Interrupted by user')
        except KeyboardInterrupt:
                sys.exit('\nERROR: Interrupted by user')