release 2013.02.01

[youtube-dl.git] / youtube_dl / FileDownloader.py
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index 756fc72ec8e823751e891cdc3e7041506a0f6fe8..e3131bbe6181585ffe1fd9c3dda003abdec6fa8c 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -81,6 +81,7 @@ class FileDownloader(object):
      writesubtitles:    Write the video subtitles to a .srt file
      subtitleslang:     Language of the subtitles to download
      test:              Download only first bytes to test the downloader.
      writesubtitles:    Write the video subtitles to a .srt file
      subtitleslang:     Language of the subtitles to download
      test:              Download only first bytes to test the downloader.
+    keepvideo:         Keep the video file after post-processing
      """
  
      params = None
      """
  
      params = None
@@ -94,6 +95,7 @@ class FileDownloader(object):
          """Create a FileDownloader object with the given options."""
          self._ies = []
          self._pps = []
          """Create a FileDownloader object with the given options."""
          self._ies = []
          self._pps = []
+        self._progress_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -216,12 +218,15 @@ class FileDownloader(object):
          Depending on if the downloader has been configured to ignore
          download errors or not, this method may throw an exception or
          not when errors are found, after printing the message.
          Depending on if the downloader has been configured to ignore
          download errors or not, this method may throw an exception or
          not when errors are found, after printing the message.
+
+        tb, if given, is additional traceback information.
          """
          if message is not None:
              self.to_stderr(message)
          if self.params.get('verbose'):
              if tb is None:
          """
          if message is not None:
              self.to_stderr(message)
          if self.params.get('verbose'):
              if tb is None:
-                tb = u''.join(traceback.format_list(traceback.extract_stack()))
+                tb_data = traceback.format_list(traceback.extract_stack())
+                tb = u''.join(tb_data)
              self.to_stderr(tb)
          if not self.params.get('ignoreerrors', False):
              raise DownloadError(message)
              self.to_stderr(tb)
          if not self.params.get('ignoreerrors', False):
              raise DownloadError(message)
@@ -497,7 +502,7 @@ class FileDownloader(object):
                  try:
                      videos = ie.extract(url)
                  except ExtractorError as de: # An error we somewhat expected
                  try:
                      videos = ie.extract(url)
                  except ExtractorError as de: # An error we somewhat expected
-                    self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
+                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
                      break
                  except Exception as e:
                      if self.params.get('ignoreerrors', False):
                      break
                  except Exception as e:
                      if self.params.get('ignoreerrors', False):
@@ -526,15 +531,29 @@ class FileDownloader(object):
          return self._download_retcode
  
      def post_process(self, filename, ie_info):
          return self._download_retcode
  
      def post_process(self, filename, ie_info):
-        """Run the postprocessing chain on the given file."""
+        """Run all the postprocessors on the given file."""
          info = dict(ie_info)
          info['filepath'] = filename
          info = dict(ie_info)
          info['filepath'] = filename
+        keep_video = None
          for pp in self._pps:
          for pp in self._pps:
-            info = pp.run(info)
-            if info is None:
-                break
+            try:
+                keep_video_wish,new_info = pp.run(info)
+                if keep_video_wish is not None:
+                    if keep_video_wish:
+                        keep_video = keep_video_wish
+                    elif keep_video is None:
+                        # No clear decision yet, let IE decide
+                        keep_video = keep_video_wish
+            except PostProcessingError as e:
+                self.to_stderr(u'ERROR: ' + e.msg)
+        if keep_video is False and not self.params.get('keepvideo', False):
+            try:
+                self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
+                os.remove(encodeFilename(filename))
+            except (IOError, OSError):
+                self.to_stderr(u'WARNING: Unable to remove downloaded video file')
  
  
-    def _download_with_rtmpdump(self, filename, url, player_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
@@ -548,7 +567,11 @@ class FileDownloader(object):
          # Download using rtmpdump. rtmpdump returns exit code 2 when
          # the connection was interrumpted and resuming appears to be
          # possible. This is part of rtmpdump's normal usage, AFAIK.
          # Download using rtmpdump. rtmpdump returns exit code 2 when
          # the connection was interrumpted and resuming appears to be
          # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+        if player_url is not None:
+            basic_args += ['-W', player_url]
+        if page_url is not None:
+            basic_args += ['--pageUrl', page_url]
          args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
          args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
@@ -572,8 +595,15 @@ class FileDownloader(object):
                  retval = 0
                  break
          if retval == 0:
                  retval = 0
                  break
          if retval == 0:
-            self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
              self.try_rename(tmpfilename, filename)
              self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
              return True
          else:
              self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
              return True
          else:
              self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
@@ -581,22 +611,29 @@ class FileDownloader(object):
  
      def _do_download(self, filename, info_dict):
          url = info_dict['url']
  
      def _do_download(self, filename, info_dict):
          url = info_dict['url']
-        player_url = info_dict.get('player_url', None)
  
          # Check file already present
          if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
              self.report_file_already_downloaded(filename)
  
          # Check file already present
          if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
              self.report_file_already_downloaded(filename)
+            self._hook_progress({
+                'filename': filename,
+                'status': 'finished',
+            })
              return True
  
          # Attempt to download using rtmpdump
          if url.startswith('rtmp'):
              return True
  
          # Attempt to download using rtmpdump
          if url.startswith('rtmp'):
-            return self._download_with_rtmpdump(filename, url, player_url)
+            return self._download_with_rtmpdump(filename, url,
+                                                info_dict.get('player_url', None),
+                                                info_dict.get('page_url', None))
  
          tmpfilename = self.temp_name(filename)
          stream = None
  
          # Do not include the Accept-Encoding header
          headers = {'Youtubedl-no-compression': 'True'}
  
          tmpfilename = self.temp_name(filename)
          stream = None
  
          # Do not include the Accept-Encoding header
          headers = {'Youtubedl-no-compression': 'True'}
+        if 'user_agent' in info_dict:
+            headers['Youtubedl-user-agent'] = info_dict['user_agent']
          basic_request = compat_urllib_request.Request(url, None, headers)
          request = compat_urllib_request.Request(url, None, headers)
  
          basic_request = compat_urllib_request.Request(url, None, headers)
          request = compat_urllib_request.Request(url, None, headers)
  
@@ -653,6 +690,10 @@ class FileDownloader(object):
                              # the one in the hard drive.
                              self.report_file_already_downloaded(filename)
                              self.try_rename(tmpfilename, filename)
                              # the one in the hard drive.
                              self.report_file_already_downloaded(filename)
                              self.try_rename(tmpfilename, filename)
+                            self._hook_progress({
+                                'filename': filename,
+                                'status': 'finished',
+                            })
                              return True
                          else:
                              # The length does not match, we start the download over
                              return True
                          else:
                              # The length does not match, we start the download over
@@ -711,6 +752,14 @@ class FileDownloader(object):
                  eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                  self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
                  eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                  self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
+            self._hook_progress({
+                'downloaded_bytes': byte_counter,
+                'total_bytes': data_len,
+                'tmpfilename': tmpfilename,
+                'filename': filename,
+                'status': 'downloading',
+            })
+
              # Apply rate limit
              self.slow_down(start, byte_counter - resume_len)
  
              # Apply rate limit
              self.slow_down(start, byte_counter - resume_len)
  
@@ -727,4 +776,31 @@ class FileDownloader(object):
          if self.params.get('updatetime', True):
              info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  
          if self.params.get('updatetime', True):
              info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+
          return True
          return True
+
+    def _hook_progress(self, status):
+        for ph in self._progress_hooks:
+            ph(status)
+
+    def add_progress_hook(self, ph):
+        """ ph gets called on download progress, with a dictionary with the entries
+        * filename: The final filename
+        * status: One of "downloading" and "finished"
+
+        It can also have some of the following entries:
+
+        * downloaded_bytes: Bytes on disks
+        * total_bytes: Total bytes, None if unknown
+        * tmpfilename: The filename we're currently writing to
+
+        Hooks are guaranteed to be called at least once (with status "finished")
+        if the download is successful.
+        """
+        self._progress_hooks.append(ph)