Merge remote-tracking branch 'origin/master'

[youtube-dl.git] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 2a4ab674deb46eba6a874d044b02679403456dee..3c47eb9dc49d81f3409e93c44d7d6bb160e46ec1 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -47,12 +47,13 @@ from .utils import (
      subtitles_filename,
      takewhile_inclusive,
      UnavailableVideoError,
+    url_basename,
      write_json_file,
      write_string,
      YoutubeDLHandler,
  )
  from .extractor import get_info_extractor, gen_extractors
-from .FileDownloader import FileDownloader
+from .downloader import get_suitable_downloader
  from .version import __version__
  
  
@@ -147,6 +148,7 @@ class YoutubeDL(object):
      socket_timeout:    Time to wait for unresponsive hosts, in seconds
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
+    debug_printtraffic:Print out sent and received HTTP traffic
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -163,6 +165,8 @@ class YoutubeDL(object):
  
      def __init__(self, params=None):
          """Create a FileDownloader object with the given options."""
+        if params is None:
+            params = {}
          self._ies = []
          self._ies_instances = {}
          self._pps = []
@@ -171,7 +175,7 @@ class YoutubeDL(object):
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self._err_file = sys.stderr
-        self.params = {} if params is None else params
+        self.params = params
  
          if params.get('bidi_workaround', False):
              try:
@@ -182,12 +186,18 @@ class YoutubeDL(object):
                      width_args = []
                  else:
                      width_args = ['-w', str(width)]
-                self._fribidi = subprocess.Popen(
-                    ['fribidi', '-c', 'UTF-8'] + width_args,
+                sp_kwargs = dict(
                      stdin=subprocess.PIPE,
                      stdout=slave,
                      stderr=self._err_file)
-                self._fribidi_channel = os.fdopen(master, 'rb')
+                try:
+                    self._output_process = subprocess.Popen(
+                        ['bidiv'] + width_args, **sp_kwargs
+                    )
+                except OSError:
+                    self._output_process = subprocess.Popen(
+                        ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+                self._output_channel = os.fdopen(master, 'rb')
              except OSError as ose:
                  if ose.errno == 2:
                      self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
@@ -204,8 +214,6 @@ class YoutubeDL(object):
                  u'Set the LC_ALL environment variable to fix this.')
              self.params['restrictfilenames'] = True
  
-        self.fd = FileDownloader(self, self.params)
-
          if '%(stitle)s' in self.params.get('outtmpl', ''):
              self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
  
@@ -241,15 +249,20 @@ class YoutubeDL(object):
          self._pps.append(pp)
          pp.set_downloader(self)
  
+    def add_progress_hook(self, ph):
+        """Add the progress hook (currently only for the file downloader)"""
+        self._progress_hooks.append(ph)
+
      def _bidi_workaround(self, message):
-        if not hasattr(self, '_fribidi_channel'):
+        if not hasattr(self, '_output_channel'):
              return message
  
+        assert hasattr(self, '_output_process')
          assert type(message) == type(u'')
          line_count = message.count(u'\n') + 1
-        self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
-        self._fribidi.stdin.flush()
-        res = u''.join(self._fribidi_channel.readline().decode('utf-8')
+        self._output_process.stdin.write((message + u'\n').encode('utf-8'))
+        self._output_process.stdin.flush()
+        res = u''.join(self._output_channel.readline().decode('utf-8')
                         for _ in range(line_count))
          return res[:-len(u'\n')]
  
@@ -484,6 +497,7 @@ class YoutubeDL(object):
                      {
                          'extractor': ie.IE_NAME,
                          'webpage_url': url,
+                        'webpage_url_basename': url_basename(url),
                          'extractor_key': ie.ie_key(),
                      })
                  if process:
@@ -531,7 +545,7 @@ class YoutubeDL(object):
              def make_result(embedded_info):
                  new_result = ie_result.copy()
                  for f in ('_type', 'url', 'ext', 'player_url', 'formats',
-                          'entries', 'urlhandle', 'ie_key', 'duration',
+                          'entries', 'ie_key', 'duration',
                            'subtitles', 'annotations', 'format',
                            'thumbnail', 'thumbnails'):
                      if f in new_result:
@@ -557,16 +571,16 @@ class YoutubeDL(object):
  
              n_all_entries = len(ie_result['entries'])
              playliststart = self.params.get('playliststart', 1) - 1
-            playlistend = self.params.get('playlistend', -1)
-
+            playlistend = self.params.get('playlistend', None)
+            # For backwards compatibility, interpret -1 as whole list
              if playlistend == -1:
-                entries = ie_result['entries'][playliststart:]
-            else:
-                entries = ie_result['entries'][playliststart:playlistend]
+                playlistend = None
  
+            entries = ie_result['entries'][playliststart:playlistend]
              n_entries = len(entries)
  
-            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
+            self.to_screen(
+                u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
                  (ie_result['extractor'], playlist, n_all_entries, n_entries))
  
              for i, entry in enumerate(entries, 1):
@@ -576,6 +590,7 @@ class YoutubeDL(object):
                      'playlist_index': i + playliststart,
                      'extractor': ie_result['extractor'],
                      'webpage_url': ie_result['webpage_url'],
+                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
                      'extractor_key': ie_result['extractor_key'],
                  }
  
@@ -596,6 +611,7 @@ class YoutubeDL(object):
                      {
                          'extractor': ie_result['extractor'],
                          'webpage_url': ie_result['webpage_url'],
+                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
                          'extractor_key': ie_result['extractor_key'],
                      })
                  return r
@@ -632,7 +648,7 @@ class YoutubeDL(object):
              info_dict['playlist_index'] = None
  
          # This extractors handle format selection themselves
-        if info_dict['extractor'] in [u'youtube', u'Youku']:
+        if info_dict['extractor'] in [u'Youku']:
              if download:
                  self.process_info(info_dict)
              return info_dict
@@ -658,24 +674,23 @@ class YoutubeDL(object):
              if 'ext' not in format:
                  format['ext'] = determine_ext(format['url'])
  
-        if self.params.get('listformats', None):
-            self.list_formats(info_dict)
-            return
-
          format_limit = self.params.get('format_limit', None)
          if format_limit:
              formats = list(takewhile_inclusive(
                  lambda f: f['format_id'] != format_limit, formats
              ))
-        if self.params.get('prefer_free_formats'):
-            def _free_formats_key(f):
-                try:
-                    ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
-                except ValueError:
-                    ext_ord = -1
-                # We only compare the extension if they have the same height and width
-                return (f.get('height'), f.get('width'), ext_ord)
-            formats = sorted(formats, key=_free_formats_key)
+
+        # TODO Central sorting goes here
+
+        if formats[0] is not info_dict: 
+            # only set the 'formats' fields if the original info_dict list them
+            # otherwise we end up with a circular reference, the first (and unique)
+            # element in the 'formats' field in info_dict is info_dict itself, 
+            # wich can't be exported to json
+            info_dict['formats'] = formats
+        if self.params.get('listformats', None):
+            self.list_formats(info_dict)
+            return
  
          req_format = self.params.get('format', 'best')
          if req_format is None:
@@ -835,8 +850,7 @@ class YoutubeDL(object):
              else:
                  self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
                  try:
-                    json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
-                    write_json_file(json_info_dict, encodeFilename(infofn))
+                    write_json_file(info_dict, encodeFilename(infofn))
                  except (OSError, IOError):
                      self.report_error(u'Cannot write metadata to JSON file ' + infofn)
                      return
@@ -845,7 +859,7 @@ class YoutubeDL(object):
              if info_dict.get('thumbnail') is not None:
                  thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
                  thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                      self.to_screen(u'[%s] %s: Thumbnail is already present' %
                                     (info_dict['extractor'], info_dict['id']))
                  else:
@@ -866,7 +880,10 @@ class YoutubeDL(object):
                  success = True
              else:
                  try:
-                    success = self.fd._do_download(filename, info_dict)
+                    fd = get_suitable_downloader(info_dict)(self, self.params)
+                    for ph in self._progress_hooks:
+                        fd.add_progress_hook(ph)
+                    success = fd.download(filename, info_dict)
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      self.report_error(u'unable to download video data: %s' % str(err))
                      return
@@ -984,13 +1001,15 @@ class YoutubeDL(object):
      def format_resolution(format, default='unknown'):
          if format.get('vcodec') == 'none':
              return 'audio only'
-        if format.get('_resolution') is not None:
-            return format['_resolution']
+        if format.get('resolution') is not None:
+            return format['resolution']
          if format.get('height') is not None:
              if format.get('width') is not None:
                  res = u'%sx%s' % (format['width'], format['height'])
              else:
                  res = u'%sp' % format['height']
+        elif format.get('width') is not None:
+            res = u'?x%d' % format['width']
          else:
              res = default
          return res
@@ -998,15 +1017,19 @@ class YoutubeDL(object):
      def list_formats(self, info_dict):
          def format_note(fdict):
              res = u''
+            if f.get('ext') in ['f4f', 'f4m']:
+                res += u'(unsupported) '
              if fdict.get('format_note') is not None:
                  res += fdict['format_note'] + u' '
+            if fdict.get('tbr') is not None:
+                res += u'%4dk ' % fdict['tbr']
              if (fdict.get('vcodec') is not None and
                      fdict.get('vcodec') != 'none'):
-                res += u'%-5s' % fdict['vcodec']
-            elif fdict.get('vbr') is not None:
-                res += u'video'
+                res += u'%-5s@' % fdict['vcodec']
+            elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+                res += u'video@'
              if fdict.get('vbr') is not None:
-                res += u'@%4dk' % fdict['vbr']
+                res += u'%4dk' % fdict['vbr']
              if fdict.get('acodec') is not None:
                  if res:
                      res += u', '
@@ -1041,7 +1064,7 @@ class YoutubeDL(object):
  
          header_line = line({
              'format_id': u'format code', 'ext': u'extension',
-            '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
+            'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
          self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
                         (info_dict['id'], header_line, u"\n".join(formats_s)))
  
@@ -1104,10 +1127,13 @@ class YoutubeDL(object):
              if 'http' in proxies and 'https' not in proxies:
                  proxies['https'] = proxies['http']
          proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+
+        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
          https_handler = make_HTTPS_handler(
-            self.params.get('nocheckcertificate', False))
+            self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
+        ydlh = YoutubeDLHandler(debuglevel=debuglevel)
          opener = compat_urllib_request.build_opener(
-            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+            https_handler, proxy_handler, cookie_processor, ydlh)
          # Delete the default user-agent header, which would otherwise apply in
          # cases where our custom HTTP handler doesn't come into play
          # (See https://github.com/rg3/youtube-dl/issues/1309 for details)