release 2013.02.01

[youtube-dl.git] / youtube_dl / FileDownloader.py
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index 756fc72ec8e823751e891cdc3e7041506a0f6fe8..e3131bbe6181585ffe1fd9c3dda003abdec6fa8c 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -81,6 +81,7 @@ class FileDownloader(object):
      writesubtitles:    Write the video subtitles to a .srt file
      subtitleslang:     Language of the subtitles to download
      test:              Download only first bytes to test the downloader.
+    keepvideo:         Keep the video file after post-processing
      """
  
      params = None
@@ -94,6 +95,7 @@ class FileDownloader(object):
          """Create a FileDownloader object with the given options."""
          self._ies = []
          self._pps = []
+        self._progress_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -216,12 +218,15 @@ class FileDownloader(object):
          Depending on if the downloader has been configured to ignore
          download errors or not, this method may throw an exception or
          not when errors are found, after printing the message.
+
+        tb, if given, is additional traceback information.
          """
          if message is not None:
              self.to_stderr(message)
          if self.params.get('verbose'):
              if tb is None:
-                tb = u''.join(traceback.format_list(traceback.extract_stack()))
+                tb_data = traceback.format_list(traceback.extract_stack())
+                tb = u''.join(tb_data)
              self.to_stderr(tb)
          if not self.params.get('ignoreerrors', False):
              raise DownloadError(message)
@@ -497,7 +502,7 @@ class FileDownloader(object):
                  try:
                      videos = ie.extract(url)
                  except ExtractorError as de: # An error we somewhat expected
-                    self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
+                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
                      break
                  except Exception as e:
                      if self.params.get('ignoreerrors', False):
@@ -526,15 +531,29 @@ class FileDownloader(object):
          return self._download_retcode
  
      def post_process(self, filename, ie_info):
-        """Run the postprocessing chain on the given file."""
+        """Run all the postprocessors on the given file."""
          info = dict(ie_info)
          info['filepath'] = filename
+        keep_video = None
          for pp in self._pps:
-            info = pp.run(info)
-            if info is None:
-                break
+            try:
+                keep_video_wish,new_info = pp.run(info)
+                if keep_video_wish is not None:
+                    if keep_video_wish:
+                        keep_video = keep_video_wish
+                    elif keep_video is None:
+                        # No clear decision yet, let IE decide
+                        keep_video = keep_video_wish
+            except PostProcessingError as e:
+                self.to_stderr(u'ERROR: ' + e.msg)
+        if keep_video is False and not self.params.get('keepvideo', False):
+            try:
+                self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
+                os.remove(encodeFilename(filename))
+            except (IOError, OSError):
+                self.to_stderr(u'WARNING: Unable to remove downloaded video file')
  
-    def _download_with_rtmpdump(self, filename, url, player_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
@@ -548,7 +567,11 @@ class FileDownloader(object):
          # Download using rtmpdump. rtmpdump returns exit code 2 when
          # the connection was interrumpted and resuming appears to be
          # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+        if player_url is not None:
+            basic_args += ['-W', player_url]
+        if page_url is not None:
+            basic_args += ['--pageUrl', page_url]
          args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
@@ -572,8 +595,15 @@ class FileDownloader(object):
                  retval = 0
                  break
          if retval == 0:
-            self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
              self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
              return True
          else:
              self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
@@ -581,22 +611,29 @@ class FileDownloader(object):
  
      def _do_download(self, filename, info_dict):
          url = info_dict['url']
-        player_url = info_dict.get('player_url', None)
  
          # Check file already present
          if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
              self.report_file_already_downloaded(filename)
+            self._hook_progress({
+                'filename': filename,
+                'status': 'finished',
+            })
              return True
  
          # Attempt to download using rtmpdump
          if url.startswith('rtmp'):
-            return self._download_with_rtmpdump(filename, url, player_url)
+            return self._download_with_rtmpdump(filename, url,
+                                                info_dict.get('player_url', None),
+                                                info_dict.get('page_url', None))
  
          tmpfilename = self.temp_name(filename)
          stream = None
  
          # Do not include the Accept-Encoding header
          headers = {'Youtubedl-no-compression': 'True'}
+        if 'user_agent' in info_dict:
+            headers['Youtubedl-user-agent'] = info_dict['user_agent']
          basic_request = compat_urllib_request.Request(url, None, headers)
          request = compat_urllib_request.Request(url, None, headers)
  
@@ -653,6 +690,10 @@ class FileDownloader(object):
                              # the one in the hard drive.
                              self.report_file_already_downloaded(filename)
                              self.try_rename(tmpfilename, filename)
+                            self._hook_progress({
+                                'filename': filename,
+                                'status': 'finished',
+                            })
                              return True
                          else:
                              # The length does not match, we start the download over
@@ -711,6 +752,14 @@ class FileDownloader(object):
                  eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                  self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
+            self._hook_progress({
+                'downloaded_bytes': byte_counter,
+                'total_bytes': data_len,
+                'tmpfilename': tmpfilename,
+                'filename': filename,
+                'status': 'downloading',
+            })
+
              # Apply rate limit
              self.slow_down(start, byte_counter - resume_len)
  
@@ -727,4 +776,31 @@ class FileDownloader(object):
          if self.params.get('updatetime', True):
              info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+
          return True
+
+    def _hook_progress(self, status):
+        for ph in self._progress_hooks:
+            ph(status)
+
+    def add_progress_hook(self, ph):
+        """ ph gets called on download progress, with a dictionary with the entries
+        * filename: The final filename
+        * status: One of "downloading" and "finished"
+
+        It can also have some of the following entries:
+
+        * downloaded_bytes: Bytes on disks
+        * total_bytes: Total bytes, None if unknown
+        * tmpfilename: The filename we're currently writing to
+
+        Hooks are guaranteed to be called at least once (with status "finished")
+        if the download is successful.
+        """
+        self._progress_hooks.append(ph)