]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/FileDownloader.py
release 2013.02.01
[youtube-dl.git] / youtube_dl / FileDownloader.py
index 756fc72ec8e823751e891cdc3e7041506a0f6fe8..e3131bbe6181585ffe1fd9c3dda003abdec6fa8c 100644 (file)
@@ -81,6 +81,7 @@ class FileDownloader(object):
     writesubtitles:    Write the video subtitles to a .srt file
     subtitleslang:     Language of the subtitles to download
     test:              Download only first bytes to test the downloader.
+    keepvideo:         Keep the video file after post-processing
     """
 
     params = None
@@ -94,6 +95,7 @@ class FileDownloader(object):
         """Create a FileDownloader object with the given options."""
         self._ies = []
         self._pps = []
+        self._progress_hooks = []
         self._download_retcode = 0
         self._num_downloads = 0
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -216,12 +218,15 @@ class FileDownloader(object):
         Depending on if the downloader has been configured to ignore
         download errors or not, this method may throw an exception or
         not when errors are found, after printing the message.
+
+        tb, if given, is additional traceback information.
         """
         if message is not None:
             self.to_stderr(message)
         if self.params.get('verbose'):
             if tb is None:
-                tb = u''.join(traceback.format_list(traceback.extract_stack()))
+                tb_data = traceback.format_list(traceback.extract_stack())
+                tb = u''.join(tb_data)
             self.to_stderr(tb)
         if not self.params.get('ignoreerrors', False):
             raise DownloadError(message)
@@ -497,7 +502,7 @@ class FileDownloader(object):
                 try:
                     videos = ie.extract(url)
                 except ExtractorError as de: # An error we somewhat expected
-                    self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
+                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
                     break
                 except Exception as e:
                     if self.params.get('ignoreerrors', False):
@@ -526,15 +531,29 @@ class FileDownloader(object):
         return self._download_retcode
 
     def post_process(self, filename, ie_info):
-        """Run the postprocessing chain on the given file."""
+        """Run all the postprocessors on the given file."""
         info = dict(ie_info)
         info['filepath'] = filename
+        keep_video = None
         for pp in self._pps:
-            info = pp.run(info)
-            if info is None:
-                break
+            try:
+                keep_video_wish,new_info = pp.run(info)
+                if keep_video_wish is not None:
+                    if keep_video_wish:
+                        keep_video = keep_video_wish
+                    elif keep_video is None:
+                        # No clear decision yet, let IE decide
+                        keep_video = keep_video_wish
+            except PostProcessingError as e:
+                self.to_stderr(u'ERROR: ' + e.msg)
+        if keep_video is False and not self.params.get('keepvideo', False):
+            try:
+                self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
+                os.remove(encodeFilename(filename))
+            except (IOError, OSError):
+                self.to_stderr(u'WARNING: Unable to remove downloaded video file')
 
-    def _download_with_rtmpdump(self, filename, url, player_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
@@ -548,7 +567,11 @@ class FileDownloader(object):
         # Download using rtmpdump. rtmpdump returns exit code 2 when
         # the connection was interrumpted and resuming appears to be
         # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+        if player_url is not None:
+            basic_args += ['-W', player_url]
+        if page_url is not None:
+            basic_args += ['--pageUrl', page_url]
         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
         if self.params.get('verbose', False):
             try:
@@ -572,8 +595,15 @@ class FileDownloader(object):
                 retval = 0
                 break
         if retval == 0:
-            self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
             self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
             return True
         else:
             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
@@ -581,22 +611,29 @@ class FileDownloader(object):
 
     def _do_download(self, filename, info_dict):
         url = info_dict['url']
-        player_url = info_dict.get('player_url', None)
 
         # Check file already present
         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
             self.report_file_already_downloaded(filename)
+            self._hook_progress({
+                'filename': filename,
+                'status': 'finished',
+            })
             return True
 
         # Attempt to download using rtmpdump
         if url.startswith('rtmp'):
-            return self._download_with_rtmpdump(filename, url, player_url)
+            return self._download_with_rtmpdump(filename, url,
+                                                info_dict.get('player_url', None),
+                                                info_dict.get('page_url', None))
 
         tmpfilename = self.temp_name(filename)
         stream = None
 
         # Do not include the Accept-Encoding header
         headers = {'Youtubedl-no-compression': 'True'}
+        if 'user_agent' in info_dict:
+            headers['Youtubedl-user-agent'] = info_dict['user_agent']
         basic_request = compat_urllib_request.Request(url, None, headers)
         request = compat_urllib_request.Request(url, None, headers)
 
@@ -653,6 +690,10 @@ class FileDownloader(object):
                             # the one in the hard drive.
                             self.report_file_already_downloaded(filename)
                             self.try_rename(tmpfilename, filename)
+                            self._hook_progress({
+                                'filename': filename,
+                                'status': 'finished',
+                            })
                             return True
                         else:
                             # The length does not match, we start the download over
@@ -711,6 +752,14 @@ class FileDownloader(object):
                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 
+            self._hook_progress({
+                'downloaded_bytes': byte_counter,
+                'total_bytes': data_len,
+                'tmpfilename': tmpfilename,
+                'filename': filename,
+                'status': 'downloading',
+            })
+
             # Apply rate limit
             self.slow_down(start, byte_counter - resume_len)
 
@@ -727,4 +776,31 @@ class FileDownloader(object):
         if self.params.get('updatetime', True):
             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+
         return True
+
+    def _hook_progress(self, status):
+        for ph in self._progress_hooks:
+            ph(status)
+
+    def add_progress_hook(self, ph):
+        """ ph gets called on download progress, with a dictionary with the entries
+        * filename: The final filename
+        * status: One of "downloading" and "finished"
+
+        It can also have some of the following entries:
+
+        * downloaded_bytes: Bytes on disks
+        * total_bytes: Total bytes, None if unknown
+        * tmpfilename: The filename we're currently writing to
+
+        Hooks are guaranteed to be called at least once (with status "finished")
+        if the download is successful.
+        """
+        self._progress_hooks.append(ph)