2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
63 UnavailableVideoError,
73 from .cache import Cache
74 from .extractor import get_info_extractor, gen_extractors
75 from .downloader import get_suitable_downloader
76 from .downloader.rtmp import rtmpdump_version
77 from .postprocessor import (
79 FFmpegFixupStretchedPP,
84 from .version import __version__
87 class YoutubeDL(object):
90 YoutubeDL objects are the ones responsible of downloading the
91 actual video file and writing it to disk if the user has requested
92 it, among some other tasks. In most cases there should be one per
93 program. As, given a video URL, the downloader doesn't know how to
94 extract all the needed information, task that InfoExtractors do, it
95 has to pass the URL to one of them.
97 For this, YoutubeDL objects have a method that allows
98 InfoExtractors to be registered in a given order. When it is passed
99 a URL, the YoutubeDL object handles it to the first InfoExtractor it
100 finds that reports being able to handle it. The InfoExtractor extracts
101 all the information about the video or videos the URL refers to, and
102 YoutubeDL process the extracted information, possibly using a File
103 Downloader to download the video.
105 YoutubeDL objects accept a lot of parameters. In order not to saturate
106 the object constructor with arguments, it receives a dictionary of
107 options instead. These options are available through the params
108 attribute for the InfoExtractors to use. The YoutubeDL also
109 registers itself as the downloader in charge for the InfoExtractors
110 that are added to it, so this is a "mutual registration".
114 username: Username for authentication purposes.
115 password: Password for authentication purposes.
116 videopassword: Password for acces a video.
117 usenetrc: Use netrc for authentication instead.
118 verbose: Print additional info to stdout.
119 quiet: Do not print messages to stdout.
120 no_warnings: Do not print out anything for warnings.
121 forceurl: Force printing final URL.
122 forcetitle: Force printing title.
123 forceid: Force printing ID.
124 forcethumbnail: Force printing thumbnail URL.
125 forcedescription: Force printing description.
126 forcefilename: Force printing final filename.
127 forceduration: Force printing duration.
128 forcejson: Force printing info_dict as JSON.
129 dump_single_json: Force printing the info_dict of the whole playlist
130 (or video) as a single JSON line.
131 simulate: Do not download the video files.
132 format: Video format code. See options.py for more information.
133 format_limit: Highest quality format to try.
134 outtmpl: Template for output names.
135 restrictfilenames: Do not allow "&" and spaces in file names
136 ignoreerrors: Do not stop on download errors.
137 nooverwrites: Prevent overwriting files.
138 playliststart: Playlist item to start at.
139 playlistend: Playlist item to end at.
140 playlist_items: Specific indices of playlist to download.
141 playlistreverse: Download playlist items in reverse order.
142 matchtitle: Download only matching titles.
143 rejecttitle: Reject downloads for matching titles.
144 logger: Log messages to a logging.Logger instance.
145 logtostderr: Log messages to stderr instead of stdout.
146 writedescription: Write the video description to a .description file
147 writeinfojson: Write the video description to a .info.json file
148 writeannotations: Write the video annotations to a .annotations.xml file
149 writethumbnail: Write the thumbnail image to a file
150 write_all_thumbnails: Write all thumbnail formats to files
151 writesubtitles: Write the video subtitles to a file
152 writeautomaticsub: Write the automatic subtitles to a file
153 allsubtitles: Downloads all the subtitles of the video
154 (requires writesubtitles or writeautomaticsub)
155 listsubtitles: Lists all available subtitles for the video
156 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
157 subtitleslangs: List of languages of the subtitles to download
158 keepvideo: Keep the video file after post-processing
159 daterange: A DateRange object, download only if the upload_date is in the range.
160 skip_download: Skip the actual download of the video file
161 cachedir: Location of the cache files in the filesystem.
162 False to disable filesystem cache.
163 noplaylist: Download single video instead of a playlist if in doubt.
164 age_limit: An integer representing the user's age in years.
165 Unsuitable videos for the given age are skipped.
166 min_views: An integer representing the minimum view count the video
167 must have in order to not be skipped.
168 Videos without view count information are always
169 downloaded. None for no limit.
170 max_views: An integer representing the maximum view count.
171 Videos that are more popular than that are not
173 Videos without view count information are always
174 downloaded. None for no limit.
175 download_archive: File name of a file where all downloads are recorded.
176 Videos already present in the file are not downloaded
178 cookiefile: File name where cookies should be read from and dumped to.
179 nocheckcertificate:Do not verify SSL certificates
180 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
181 At the moment, this is only supported by YouTube.
182 proxy: URL of the proxy server to use
183 socket_timeout: Time to wait for unresponsive hosts, in seconds
184 bidi_workaround: Work around buggy terminals without bidirectional text
185 support, using fridibi
186 debug_printtraffic:Print out sent and received HTTP traffic
187 include_ads: Download ads as well
188 default_search: Prepend this string if an input url is not valid.
189 'auto' for elaborate guessing
190 encoding: Use this encoding instead of the system-specified.
191 extract_flat: Do not resolve URLs, return the immediate result.
192 Pass in 'in_playlist' to only show this behavior for
194 postprocessors: A list of dictionaries, each with an entry
195 * key: The name of the postprocessor. See
196 youtube_dl/postprocessor/__init__.py for a list.
197 as well as any further keyword arguments for the
199 progress_hooks: A list of functions that get called on download
200 progress, with a dictionary with the entries
201 * status: One of "downloading" and "finished".
202 Check this first and ignore unknown values.
204 If status is one of "downloading" or "finished", the
205 following properties may also be present:
206 * filename: The final filename (always present)
207 * downloaded_bytes: Bytes on disk
208 * total_bytes: Size of the whole file, None if unknown
209 * tmpfilename: The filename we're currently writing to
210 * eta: The estimated time in seconds, None if unknown
211 * speed: The download speed in bytes/second, None if
214 Progress hooks are guaranteed to be called at least once
215 (with status "finished") if the download is successful.
216 merge_output_format: Extension to use when merging formats.
217 fixup: Automatically correct known faults of the file.
219 - "never": do nothing
220 - "warn": only emit a warning
221 - "detect_or_warn": check whether we can do anything
222 about it, warn otherwise (default)
223 source_address: (Experimental) Client-side IP address to bind to.
224 call_home: Boolean, true iff we are allowed to contact the
225 youtube-dl servers for debugging.
226 sleep_interval: Number of seconds to sleep before each download.
227 external_downloader: Executable of the external downloader to call.
228 listformats: Print an overview of available video formats and exit.
229 list_thumbnails: Print a table of all thumbnails and exit.
232 The following parameters are not used by YoutubeDL itself, they are used by
234 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
235 noresizebuffer, retries, continuedl, noprogress, consoletitle,
238 The following options are used by the post processors:
239 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
240 otherwise prefer avconv.
241 exec_cmd: Arbitrary command to run after downloading
247 _download_retcode = None
248 _num_downloads = None
251 def __init__(self, params=None, auto_init=True):
252 """Create a FileDownloader object with the given options."""
256 self._ies_instances = {}
258 self._progress_hooks = []
259 self._download_retcode = 0
260 self._num_downloads = 0
261 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
262 self._err_file = sys.stderr
264 self.cache = Cache(self)
266 if params.get('bidi_workaround', False):
269 master, slave = pty.openpty()
270 width = get_term_width()
274 width_args = ['-w', str(width)]
276 stdin=subprocess.PIPE,
278 stderr=self._err_file)
280 self._output_process = subprocess.Popen(
281 ['bidiv'] + width_args, **sp_kwargs
284 self._output_process = subprocess.Popen(
285 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
286 self._output_channel = os.fdopen(master, 'rb')
287 except OSError as ose:
289 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
293 if (sys.version_info >= (3,) and sys.platform != 'win32' and
294 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
295 and not params.get('restrictfilenames', False)):
296 # On Python 3, the Unicode filesystem API will throw errors (#1474)
298 'Assuming --restrict-filenames since file system encoding '
299 'cannot encode all characters. '
300 'Set the LC_ALL environment variable to fix this.')
301 self.params['restrictfilenames'] = True
303 if '%(stitle)s' in self.params.get('outtmpl', ''):
304 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
309 self.print_debug_header()
310 self.add_default_info_extractors()
312 for pp_def_raw in self.params.get('postprocessors', []):
313 pp_class = get_postprocessor(pp_def_raw['key'])
314 pp_def = dict(pp_def_raw)
316 pp = pp_class(self, **compat_kwargs(pp_def))
317 self.add_post_processor(pp)
319 for ph in self.params.get('progress_hooks', []):
320 self.add_progress_hook(ph)
322 def warn_if_short_id(self, argv):
323 # short YouTube ID starting with dash?
325 i for i, a in enumerate(argv)
326 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
330 [a for i, a in enumerate(argv) if i not in idxs] +
331 ['--'] + [argv[i] for i in idxs]
334 'Long argument string detected. '
335 'Use -- to separate parameters and URLs, like this:\n%s\n' %
336 args_to_str(correct_argv))
338 def add_info_extractor(self, ie):
339 """Add an InfoExtractor object to the end of the list."""
341 self._ies_instances[ie.ie_key()] = ie
342 ie.set_downloader(self)
344 def get_info_extractor(self, ie_key):
346 Get an instance of an IE with name ie_key, it will try to get one from
347 the _ies list, if there's no instance it will create a new one and add
348 it to the extractor list.
350 ie = self._ies_instances.get(ie_key)
352 ie = get_info_extractor(ie_key)()
353 self.add_info_extractor(ie)
356 def add_default_info_extractors(self):
358 Add the InfoExtractors returned by gen_extractors to the end of the list
360 for ie in gen_extractors():
361 self.add_info_extractor(ie)
363 def add_post_processor(self, pp):
364 """Add a PostProcessor object to the end of the chain."""
366 pp.set_downloader(self)
368 def add_progress_hook(self, ph):
369 """Add the progress hook (currently only for the file downloader)"""
370 self._progress_hooks.append(ph)
372 def _bidi_workaround(self, message):
373 if not hasattr(self, '_output_channel'):
376 assert hasattr(self, '_output_process')
377 assert isinstance(message, compat_str)
378 line_count = message.count('\n') + 1
379 self._output_process.stdin.write((message + '\n').encode('utf-8'))
380 self._output_process.stdin.flush()
381 res = ''.join(self._output_channel.readline().decode('utf-8')
382 for _ in range(line_count))
383 return res[:-len('\n')]
385 def to_screen(self, message, skip_eol=False):
386 """Print message to stdout if not in quiet mode."""
387 return self.to_stdout(message, skip_eol, check_quiet=True)
389 def _write_string(self, s, out=None):
390 write_string(s, out=out, encoding=self.params.get('encoding'))
392 def to_stdout(self, message, skip_eol=False, check_quiet=False):
393 """Print message to stdout if not in quiet mode."""
394 if self.params.get('logger'):
395 self.params['logger'].debug(message)
396 elif not check_quiet or not self.params.get('quiet', False):
397 message = self._bidi_workaround(message)
398 terminator = ['\n', ''][skip_eol]
399 output = message + terminator
401 self._write_string(output, self._screen_file)
403 def to_stderr(self, message):
404 """Print message to stderr."""
405 assert isinstance(message, compat_str)
406 if self.params.get('logger'):
407 self.params['logger'].error(message)
409 message = self._bidi_workaround(message)
410 output = message + '\n'
411 self._write_string(output, self._err_file)
413 def to_console_title(self, message):
414 if not self.params.get('consoletitle', False):
416 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
417 # c_wchar_p() might not be necessary if `message` is
418 # already of type unicode()
419 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
420 elif 'TERM' in os.environ:
421 self._write_string('\033]0;%s\007' % message, self._screen_file)
423 def save_console_title(self):
424 if not self.params.get('consoletitle', False):
426 if 'TERM' in os.environ:
427 # Save the title on stack
428 self._write_string('\033[22;0t', self._screen_file)
430 def restore_console_title(self):
431 if not self.params.get('consoletitle', False):
433 if 'TERM' in os.environ:
434 # Restore the title from stack
435 self._write_string('\033[23;0t', self._screen_file)
438 self.save_console_title()
441 def __exit__(self, *args):
442 self.restore_console_title()
444 if self.params.get('cookiefile') is not None:
445 self.cookiejar.save()
447 def trouble(self, message=None, tb=None):
448 """Determine action to take when a download problem appears.
450 Depending on if the downloader has been configured to ignore
451 download errors or not, this method may throw an exception or
452 not when errors are found, after printing the message.
454 tb, if given, is additional traceback information.
456 if message is not None:
457 self.to_stderr(message)
458 if self.params.get('verbose'):
460 if sys.exc_info()[0]: # if .trouble has been called from an except block
462 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
463 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
464 tb += compat_str(traceback.format_exc())
466 tb_data = traceback.format_list(traceback.extract_stack())
467 tb = ''.join(tb_data)
469 if not self.params.get('ignoreerrors', False):
470 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
471 exc_info = sys.exc_info()[1].exc_info
473 exc_info = sys.exc_info()
474 raise DownloadError(message, exc_info)
475 self._download_retcode = 1
477 def report_warning(self, message):
479 Print the message to stderr, it will be prefixed with 'WARNING:'
480 If stderr is a tty file the 'WARNING:' will be colored
482 if self.params.get('logger') is not None:
483 self.params['logger'].warning(message)
485 if self.params.get('no_warnings'):
487 if self._err_file.isatty() and os.name != 'nt':
488 _msg_header = '\033[0;33mWARNING:\033[0m'
490 _msg_header = 'WARNING:'
491 warning_message = '%s %s' % (_msg_header, message)
492 self.to_stderr(warning_message)
494 def report_error(self, message, tb=None):
496 Do the same as trouble, but prefixes the message with 'ERROR:', colored
497 in red if stderr is a tty file.
499 if self._err_file.isatty() and os.name != 'nt':
500 _msg_header = '\033[0;31mERROR:\033[0m'
502 _msg_header = 'ERROR:'
503 error_message = '%s %s' % (_msg_header, message)
504 self.trouble(error_message, tb)
506 def report_file_already_downloaded(self, file_name):
507 """Report file has already been fully downloaded."""
509 self.to_screen('[download] %s has already been downloaded' % file_name)
510 except UnicodeEncodeError:
511 self.to_screen('[download] The file has already been downloaded')
513 def prepare_filename(self, info_dict):
514 """Generate the output filename."""
516 template_dict = dict(info_dict)
518 template_dict['epoch'] = int(time.time())
519 autonumber_size = self.params.get('autonumber_size')
520 if autonumber_size is None:
522 autonumber_templ = '%0' + str(autonumber_size) + 'd'
523 template_dict['autonumber'] = autonumber_templ % self._num_downloads
524 if template_dict.get('playlist_index') is not None:
525 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
526 if template_dict.get('resolution') is None:
527 if template_dict.get('width') and template_dict.get('height'):
528 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
529 elif template_dict.get('height'):
530 template_dict['resolution'] = '%sp' % template_dict['height']
531 elif template_dict.get('width'):
532 template_dict['resolution'] = '?x%d' % template_dict['width']
534 sanitize = lambda k, v: sanitize_filename(
536 restricted=self.params.get('restrictfilenames'),
538 template_dict = dict((k, sanitize(k, v))
539 for k, v in template_dict.items()
541 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
543 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
544 tmpl = compat_expanduser(outtmpl)
545 filename = tmpl % template_dict
546 # Temporary fix for #4787
547 # 'Treat' all problem characters by passing filename through preferredencoding
548 # to workaround encoding issues with subprocess on python2 @ Windows
549 if sys.version_info < (3, 0) and sys.platform == 'win32':
550 filename = encodeFilename(filename, True).decode(preferredencoding())
552 except ValueError as err:
553 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
556 def _match_entry(self, info_dict):
557 """ Returns None iff the file should be downloaded """
559 video_title = info_dict.get('title', info_dict.get('id', 'video'))
560 if 'title' in info_dict:
561 # This can happen when we're just evaluating the playlist
562 title = info_dict['title']
563 matchtitle = self.params.get('matchtitle', False)
565 if not re.search(matchtitle, title, re.IGNORECASE):
566 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
567 rejecttitle = self.params.get('rejecttitle', False)
569 if re.search(rejecttitle, title, re.IGNORECASE):
570 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
571 date = info_dict.get('upload_date', None)
573 dateRange = self.params.get('daterange', DateRange())
574 if date not in dateRange:
575 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
576 view_count = info_dict.get('view_count', None)
577 if view_count is not None:
578 min_views = self.params.get('min_views')
579 if min_views is not None and view_count < min_views:
580 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
581 max_views = self.params.get('max_views')
582 if max_views is not None and view_count > max_views:
583 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
584 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
585 return 'Skipping "%s" because it is age restricted' % title
586 if self.in_download_archive(info_dict):
587 return '%s has already been recorded in archive' % video_title
591 def add_extra_info(info_dict, extra_info):
592 '''Set the keys from extra_info in info dict if they are missing'''
593 for key, value in extra_info.items():
594 info_dict.setdefault(key, value)
596 def extract_info(self, url, download=True, ie_key=None, extra_info={},
599 Returns a list with a dictionary for each video we find.
600 If 'download', also downloads the videos.
601 extra_info is a dict containing the extra values to add to each result
605 ies = [self.get_info_extractor(ie_key)]
610 if not ie.suitable(url):
614 self.report_warning('The program functionality for this site has been marked as broken, '
615 'and will probably not work.')
618 ie_result = ie.extract(url)
619 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
621 if isinstance(ie_result, list):
622 # Backwards compatibility: old IE result format
624 '_type': 'compat_list',
625 'entries': ie_result,
627 self.add_default_extra_info(ie_result, ie, url)
629 return self.process_ie_result(ie_result, download, extra_info)
632 except ExtractorError as de: # An error we somewhat expected
633 self.report_error(compat_str(de), de.format_traceback())
635 except MaxDownloadsReached:
637 except Exception as e:
638 if self.params.get('ignoreerrors', False):
639 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
644 self.report_error('no suitable InfoExtractor for URL %s' % url)
646 def add_default_extra_info(self, ie_result, ie, url):
647 self.add_extra_info(ie_result, {
648 'extractor': ie.IE_NAME,
650 'webpage_url_basename': url_basename(url),
651 'extractor_key': ie.ie_key(),
654 def process_ie_result(self, ie_result, download=True, extra_info={}):
656 Take the result of the ie(may be modified) and resolve all unresolved
657 references (URLs, playlist items).
659 It will also download the videos if 'download'.
660 Returns the resolved ie_result.
663 result_type = ie_result.get('_type', 'video')
665 if result_type in ('url', 'url_transparent'):
666 extract_flat = self.params.get('extract_flat', False)
667 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
668 extract_flat is True):
669 if self.params.get('forcejson', False):
670 self.to_stdout(json.dumps(ie_result))
673 if result_type == 'video':
674 self.add_extra_info(ie_result, extra_info)
675 return self.process_video_result(ie_result, download=download)
676 elif result_type == 'url':
677 # We have to add extra_info to the results because it may be
678 # contained in a playlist
679 return self.extract_info(ie_result['url'],
681 ie_key=ie_result.get('ie_key'),
682 extra_info=extra_info)
683 elif result_type == 'url_transparent':
684 # Use the information from the embedding page
685 info = self.extract_info(
686 ie_result['url'], ie_key=ie_result.get('ie_key'),
687 extra_info=extra_info, download=False, process=False)
689 force_properties = dict(
690 (k, v) for k, v in ie_result.items() if v is not None)
691 for f in ('_type', 'url'):
692 if f in force_properties:
693 del force_properties[f]
694 new_result = info.copy()
695 new_result.update(force_properties)
697 assert new_result.get('_type') != 'url_transparent'
699 return self.process_ie_result(
700 new_result, download=download, extra_info=extra_info)
701 elif result_type == 'playlist' or result_type == 'multi_video':
702 # We process each entry in the playlist
703 playlist = ie_result.get('title', None) or ie_result.get('id', None)
704 self.to_screen('[download] Downloading playlist: %s' % playlist)
706 playlist_results = []
708 playliststart = self.params.get('playliststart', 1) - 1
709 playlistend = self.params.get('playlistend', None)
710 # For backwards compatibility, interpret -1 as whole list
711 if playlistend == -1:
714 playlistitems_str = self.params.get('playlist_items', None)
716 if playlistitems_str is not None:
717 def iter_playlistitems(format):
718 for string_segment in format.split(','):
719 if '-' in string_segment:
720 start, end = string_segment.split('-')
721 for item in range(int(start), int(end) + 1):
724 yield int(string_segment)
725 playlistitems = iter_playlistitems(playlistitems_str)
727 ie_entries = ie_result['entries']
728 if isinstance(ie_entries, list):
729 n_all_entries = len(ie_entries)
731 entries = [ie_entries[i - 1] for i in playlistitems]
733 entries = ie_entries[playliststart:playlistend]
734 n_entries = len(entries)
736 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
737 (ie_result['extractor'], playlist, n_all_entries, n_entries))
738 elif isinstance(ie_entries, PagedList):
741 for item in playlistitems:
742 entries.extend(ie_entries.getslice(
746 entries = ie_entries.getslice(
747 playliststart, playlistend)
748 n_entries = len(entries)
750 "[%s] playlist %s: Downloading %d videos" %
751 (ie_result['extractor'], playlist, n_entries))
754 entry_list = list(ie_entries)
755 entries = [entry_list[i - 1] for i in playlistitems]
757 entries = list(itertools.islice(
758 ie_entries, playliststart, playlistend))
759 n_entries = len(entries)
761 "[%s] playlist %s: Downloading %d videos" %
762 (ie_result['extractor'], playlist, n_entries))
764 if self.params.get('playlistreverse', False):
765 entries = entries[::-1]
767 for i, entry in enumerate(entries, 1):
768 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
770 'n_entries': n_entries,
771 'playlist': playlist,
772 'playlist_id': ie_result.get('id'),
773 'playlist_title': ie_result.get('title'),
774 'playlist_index': i + playliststart,
775 'extractor': ie_result['extractor'],
776 'webpage_url': ie_result['webpage_url'],
777 'webpage_url_basename': url_basename(ie_result['webpage_url']),
778 'extractor_key': ie_result['extractor_key'],
781 reason = self._match_entry(entry)
782 if reason is not None:
783 self.to_screen('[download] ' + reason)
786 entry_result = self.process_ie_result(entry,
789 playlist_results.append(entry_result)
790 ie_result['entries'] = playlist_results
792 elif result_type == 'compat_list':
794 'Extractor %s returned a compat_list result. '
795 'It needs to be updated.' % ie_result.get('extractor'))
801 'extractor': ie_result['extractor'],
802 'webpage_url': ie_result['webpage_url'],
803 'webpage_url_basename': url_basename(ie_result['webpage_url']),
804 'extractor_key': ie_result['extractor_key'],
808 ie_result['entries'] = [
809 self.process_ie_result(_fixup(r), download, extra_info)
810 for r in ie_result['entries']
814 raise Exception('Invalid result type: %s' % result_type)
816 def _apply_format_filter(self, format_spec, available_formats):
817 " Returns a tuple of the remaining format_spec and filtered formats "
827 operator_rex = re.compile(r'''(?x)\s*\[
828 (?P<key>width|height|tbr|abr|vbr|filesize|fps)
829 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
830 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
832 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
833 m = operator_rex.search(format_spec)
835 raise ValueError('Invalid format specification %r' % format_spec)
838 comparison_value = int(m.group('value'))
840 comparison_value = parse_filesize(m.group('value'))
841 if comparison_value is None:
842 comparison_value = parse_filesize(m.group('value') + 'B')
843 if comparison_value is None:
845 'Invalid value %r in format specification %r' % (
846 m.group('value'), format_spec))
847 op = OPERATORS[m.group('op')]
850 actual_value = f.get(m.group('key'))
851 if actual_value is None:
852 return m.group('none_inclusive')
853 return op(actual_value, comparison_value)
854 new_formats = [f for f in available_formats if _filter(f)]
856 new_format_spec = format_spec[:-len(m.group(0))]
857 if not new_format_spec:
858 new_format_spec = 'best'
860 return (new_format_spec, new_formats)
862 def select_format(self, format_spec, available_formats):
863 while format_spec.endswith(']'):
864 format_spec, available_formats = self._apply_format_filter(
865 format_spec, available_formats)
866 if not available_formats:
869 if format_spec == 'best' or format_spec is None:
870 return available_formats[-1]
871 elif format_spec == 'worst':
872 return available_formats[0]
873 elif format_spec == 'bestaudio':
875 f for f in available_formats
876 if f.get('vcodec') == 'none']
878 return audio_formats[-1]
879 elif format_spec == 'worstaudio':
881 f for f in available_formats
882 if f.get('vcodec') == 'none']
884 return audio_formats[0]
885 elif format_spec == 'bestvideo':
887 f for f in available_formats
888 if f.get('acodec') == 'none']
890 return video_formats[-1]
891 elif format_spec == 'worstvideo':
893 f for f in available_formats
894 if f.get('acodec') == 'none']
896 return video_formats[0]
898 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
899 if format_spec in extensions:
900 filter_f = lambda f: f['ext'] == format_spec
902 filter_f = lambda f: f['format_id'] == format_spec
903 matches = list(filter(filter_f, available_formats))
908 def _calc_headers(self, info_dict):
909 res = std_headers.copy()
911 add_headers = info_dict.get('http_headers')
913 res.update(add_headers)
915 cookies = self._calc_cookies(info_dict)
917 res['Cookie'] = cookies
921 def _calc_cookies(self, info_dict):
922 class _PseudoRequest(object):
923 def __init__(self, url):
926 self.unverifiable = False
928 def add_unredirected_header(self, k, v):
931 def get_full_url(self):
934 def is_unverifiable(self):
935 return self.unverifiable
937 def has_header(self, h):
938 return h in self.headers
940 pr = _PseudoRequest(info_dict['url'])
941 self.cookiejar.add_cookie_header(pr)
942 return pr.headers.get('Cookie')
944 def process_video_result(self, info_dict, download=True):
945 assert info_dict.get('_type', 'video') == 'video'
947 if 'id' not in info_dict:
948 raise ExtractorError('Missing "id" field in extractor result')
949 if 'title' not in info_dict:
950 raise ExtractorError('Missing "title" field in extractor result')
952 if 'playlist' not in info_dict:
953 # It isn't part of a playlist
954 info_dict['playlist'] = None
955 info_dict['playlist_index'] = None
957 thumbnails = info_dict.get('thumbnails')
958 if thumbnails is None:
959 thumbnail = info_dict.get('thumbnail')
961 thumbnails = [{'url': thumbnail}]
963 thumbnails.sort(key=lambda t: (
964 t.get('preference'), t.get('width'), t.get('height'),
965 t.get('id'), t.get('url')))
967 if 'width' in t and 'height' in t:
968 t['resolution'] = '%dx%d' % (t['width'], t['height'])
970 if thumbnails and 'thumbnail' not in info_dict:
971 info_dict['thumbnail'] = thumbnails[-1]['url']
973 if 'display_id' not in info_dict and 'id' in info_dict:
974 info_dict['display_id'] = info_dict['id']
976 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
977 # Working around negative timestamps in Windows
978 # (see http://bugs.python.org/issue1646728)
979 if info_dict['timestamp'] < 0 and os.name == 'nt':
980 info_dict['timestamp'] = 0
981 upload_date = datetime.datetime.utcfromtimestamp(
982 info_dict['timestamp'])
983 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
985 # This extractors handle format selection themselves
986 if info_dict['extractor'] in ['Youku']:
988 self.process_info(info_dict)
991 # We now pick which formats have to be downloaded
992 if info_dict.get('formats') is None:
993 # There's only one format available
994 formats = [info_dict]
996 formats = info_dict['formats']
999 raise ExtractorError('No video formats found!')
1001 # We check that all the formats have the format and format_id fields
1002 for i, format in enumerate(formats):
1003 if 'url' not in format:
1004 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1006 if format.get('format_id') is None:
1007 format['format_id'] = compat_str(i)
1008 if format.get('format') is None:
1009 format['format'] = '{id} - {res}{note}'.format(
1010 id=format['format_id'],
1011 res=self.format_resolution(format),
1012 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1014 # Automatically determine file extension if missing
1015 if 'ext' not in format:
1016 format['ext'] = determine_ext(format['url']).lower()
1017 # Add HTTP headers, so that external programs can use them from the
1019 full_format_info = info_dict.copy()
1020 full_format_info.update(format)
1021 format['http_headers'] = self._calc_headers(full_format_info)
1023 format_limit = self.params.get('format_limit', None)
1025 formats = list(takewhile_inclusive(
1026 lambda f: f['format_id'] != format_limit, formats
1029 # TODO Central sorting goes here
1031 if formats[0] is not info_dict:
1032 # only set the 'formats' fields if the original info_dict list them
1033 # otherwise we end up with a circular reference, the first (and unique)
1034 # element in the 'formats' field in info_dict is info_dict itself,
1035 # wich can't be exported to json
1036 info_dict['formats'] = formats
1037 if self.params.get('listformats'):
1038 self.list_formats(info_dict)
1040 if self.params.get('list_thumbnails'):
1041 self.list_thumbnails(info_dict)
1044 req_format = self.params.get('format')
1045 if req_format is None:
1047 formats_to_download = []
1048 # The -1 is for supporting YoutubeIE
1049 if req_format in ('-1', 'all'):
1050 formats_to_download = formats
1052 for rfstr in req_format.split(','):
1053 # We can accept formats requested in the format: 34/5/best, we pick
1054 # the first that is available, starting from left
1055 req_formats = rfstr.split('/')
1056 for rf in req_formats:
1057 if re.match(r'.+?\+.+?', rf) is not None:
1058 # Two formats have been requested like '137+139'
1059 format_1, format_2 = rf.split('+')
1060 formats_info = (self.select_format(format_1, formats),
1061 self.select_format(format_2, formats))
1062 if all(formats_info):
1063 # The first format must contain the video and the
1065 if formats_info[0].get('vcodec') == 'none':
1066 self.report_error('The first format must '
1067 'contain the video, try using '
1068 '"-f %s+%s"' % (format_2, format_1))
1071 formats_info[0]['ext']
1072 if self.params.get('merge_output_format') is None
1073 else self.params['merge_output_format'])
1075 'requested_formats': formats_info,
1078 'ext': formats_info[0]['ext'],
1079 'width': formats_info[0].get('width'),
1080 'height': formats_info[0].get('height'),
1081 'resolution': formats_info[0].get('resolution'),
1082 'fps': formats_info[0].get('fps'),
1083 'vcodec': formats_info[0].get('vcodec'),
1084 'vbr': formats_info[0].get('vbr'),
1085 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1086 'acodec': formats_info[1].get('acodec'),
1087 'abr': formats_info[1].get('abr'),
1091 selected_format = None
1093 selected_format = self.select_format(rf, formats)
1094 if selected_format is not None:
1095 formats_to_download.append(selected_format)
1097 if not formats_to_download:
1098 raise ExtractorError('requested format not available',
1102 if len(formats_to_download) > 1:
1103 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1104 for format in formats_to_download:
1105 new_info = dict(info_dict)
1106 new_info.update(format)
1107 self.process_info(new_info)
1108 # We update the info dict with the best quality format (backwards compatibility)
1109 info_dict.update(formats_to_download[-1])
1112 def process_info(self, info_dict):
1113 """Process a single resolved IE result."""
1115 assert info_dict.get('_type', 'video') == 'video'
1117 max_downloads = self.params.get('max_downloads')
1118 if max_downloads is not None:
1119 if self._num_downloads >= int(max_downloads):
1120 raise MaxDownloadsReached()
1122 info_dict['fulltitle'] = info_dict['title']
1123 if len(info_dict['title']) > 200:
1124 info_dict['title'] = info_dict['title'][:197] + '...'
1126 # Keep for backwards compatibility
1127 info_dict['stitle'] = info_dict['title']
1129 if 'format' not in info_dict:
1130 info_dict['format'] = info_dict['ext']
1132 reason = self._match_entry(info_dict)
1133 if reason is not None:
1134 self.to_screen('[download] ' + reason)
1137 self._num_downloads += 1
1139 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1142 if self.params.get('forcetitle', False):
1143 self.to_stdout(info_dict['fulltitle'])
1144 if self.params.get('forceid', False):
1145 self.to_stdout(info_dict['id'])
1146 if self.params.get('forceurl', False):
1147 if info_dict.get('requested_formats') is not None:
1148 for f in info_dict['requested_formats']:
1149 self.to_stdout(f['url'] + f.get('play_path', ''))
1151 # For RTMP URLs, also include the playpath
1152 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1153 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1154 self.to_stdout(info_dict['thumbnail'])
1155 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1156 self.to_stdout(info_dict['description'])
1157 if self.params.get('forcefilename', False) and filename is not None:
1158 self.to_stdout(filename)
1159 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1160 self.to_stdout(formatSeconds(info_dict['duration']))
1161 if self.params.get('forceformat', False):
1162 self.to_stdout(info_dict['format'])
1163 if self.params.get('forcejson', False):
1164 self.to_stdout(json.dumps(info_dict))
1166 # Do nothing else if in simulate mode
1167 if self.params.get('simulate', False):
1170 if filename is None:
1174 dn = os.path.dirname(encodeFilename(filename))
1175 if dn and not os.path.exists(dn):
1177 except (OSError, IOError) as err:
1178 self.report_error('unable to create directory ' + compat_str(err))
1181 if self.params.get('writedescription', False):
1182 descfn = filename + '.description'
1183 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1184 self.to_screen('[info] Video description is already present')
1185 elif info_dict.get('description') is None:
1186 self.report_warning('There\'s no description to write.')
1189 self.to_screen('[info] Writing video description to: ' + descfn)
1190 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1191 descfile.write(info_dict['description'])
1192 except (OSError, IOError):
1193 self.report_error('Cannot write description file ' + descfn)
1196 if self.params.get('writeannotations', False):
1197 annofn = filename + '.annotations.xml'
1198 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1199 self.to_screen('[info] Video annotations are already present')
1202 self.to_screen('[info] Writing video annotations to: ' + annofn)
1203 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1204 annofile.write(info_dict['annotations'])
1205 except (KeyError, TypeError):
1206 self.report_warning('There are no annotations to write.')
1207 except (OSError, IOError):
1208 self.report_error('Cannot write annotations file: ' + annofn)
1211 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1212 self.params.get('writeautomaticsub')])
1214 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1215 # subtitles download errors are already managed as troubles in relevant IE
1216 # that way it will silently go on when used with unsupporting IE
1217 subtitles = info_dict['subtitles']
1218 sub_format = self.params.get('subtitlesformat', 'srt')
1219 for sub_lang in subtitles.keys():
1220 sub = subtitles[sub_lang]
1224 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1225 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1226 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1228 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1229 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1231 except (OSError, IOError):
1232 self.report_error('Cannot write subtitles file ' + sub_filename)
1235 if self.params.get('writeinfojson', False):
1236 infofn = os.path.splitext(filename)[0] + '.info.json'
1237 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1238 self.to_screen('[info] Video description metadata is already present')
1240 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1242 write_json_file(info_dict, infofn)
1243 except (OSError, IOError):
1244 self.report_error('Cannot write metadata to JSON file ' + infofn)
1247 self._write_thumbnails(info_dict, filename)
1249 if not self.params.get('skip_download', False):
1252 fd = get_suitable_downloader(info, self.params)(self, self.params)
1253 for ph in self._progress_hooks:
1254 fd.add_progress_hook(ph)
1255 if self.params.get('verbose'):
1256 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1257 return fd.download(name, info)
1259 if info_dict.get('requested_formats') is not None:
1262 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1263 if not merger._executable:
1265 self.report_warning('You have requested multiple '
1266 'formats but ffmpeg or avconv are not installed.'
1267 ' The formats won\'t be merged')
1269 postprocessors = [merger]
1270 for f in info_dict['requested_formats']:
1271 new_info = dict(info_dict)
1273 fname = self.prepare_filename(new_info)
1274 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1275 downloaded.append(fname)
1276 partial_success = dl(fname, new_info)
1277 success = success and partial_success
1278 info_dict['__postprocessors'] = postprocessors
1279 info_dict['__files_to_merge'] = downloaded
1281 # Just a single file
1282 success = dl(filename, info_dict)
1283 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1284 self.report_error('unable to download video data: %s' % str(err))
1286 except (OSError, IOError) as err:
1287 raise UnavailableVideoError(err)
1288 except (ContentTooShortError, ) as err:
1289 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1294 fixup_policy = self.params.get('fixup')
1295 if fixup_policy is None:
1296 fixup_policy = 'detect_or_warn'
1298 stretched_ratio = info_dict.get('stretched_ratio')
1299 if stretched_ratio is not None and stretched_ratio != 1:
1300 if fixup_policy == 'warn':
1301 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1302 info_dict['id'], stretched_ratio))
1303 elif fixup_policy == 'detect_or_warn':
1304 stretched_pp = FFmpegFixupStretchedPP(self)
1305 if stretched_pp.available:
1306 info_dict.setdefault('__postprocessors', [])
1307 info_dict['__postprocessors'].append(stretched_pp)
1309 self.report_warning(
1310 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1311 info_dict['id'], stretched_ratio))
1313 assert fixup_policy in ('ignore', 'never')
1315 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1316 if fixup_policy == 'warn':
1317 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1319 elif fixup_policy == 'detect_or_warn':
1320 fixup_pp = FFmpegFixupM4aPP(self)
1321 if fixup_pp.available:
1322 info_dict.setdefault('__postprocessors', [])
1323 info_dict['__postprocessors'].append(fixup_pp)
1325 self.report_warning(
1326 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1329 assert fixup_policy in ('ignore', 'never')
1332 self.post_process(filename, info_dict)
1333 except (PostProcessingError) as err:
1334 self.report_error('postprocessing: %s' % str(err))
1336 self.record_download_archive(info_dict)
1338 def download(self, url_list):
1339 """Download a given list of URLs."""
1340 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1341 if (len(url_list) > 1 and
1343 and self.params.get('max_downloads') != 1):
1344 raise SameFileError(outtmpl)
1346 for url in url_list:
1348 # It also downloads the videos
1349 res = self.extract_info(url)
1350 except UnavailableVideoError:
1351 self.report_error('unable to download video')
1352 except MaxDownloadsReached:
1353 self.to_screen('[info] Maximum number of downloaded files reached.')
1356 if self.params.get('dump_single_json', False):
1357 self.to_stdout(json.dumps(res))
1359 return self._download_retcode
1361 def download_with_info_file(self, info_filename):
1362 with io.open(info_filename, 'r', encoding='utf-8') as f:
1365 self.process_ie_result(info, download=True)
1366 except DownloadError:
1367 webpage_url = info.get('webpage_url')
1368 if webpage_url is not None:
1369 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1370 return self.download([webpage_url])
1373 return self._download_retcode
1375 def post_process(self, filename, ie_info):
1376 """Run all the postprocessors on the given file."""
1377 info = dict(ie_info)
1378 info['filepath'] = filename
1380 if ie_info.get('__postprocessors') is not None:
1381 pps_chain.extend(ie_info['__postprocessors'])
1382 pps_chain.extend(self._pps)
1383 for pp in pps_chain:
1385 old_filename = info['filepath']
1387 keep_video_wish, info = pp.run(info)
1388 if keep_video_wish is not None:
1390 keep_video = keep_video_wish
1391 elif keep_video is None:
1392 # No clear decision yet, let IE decide
1393 keep_video = keep_video_wish
1394 except PostProcessingError as e:
1395 self.report_error(e.msg)
1396 if keep_video is False and not self.params.get('keepvideo', False):
1398 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1399 os.remove(encodeFilename(old_filename))
1400 except (IOError, OSError):
1401 self.report_warning('Unable to remove downloaded video file')
1403 def _make_archive_id(self, info_dict):
1404 # Future-proof against any change in case
1405 # and backwards compatibility with prior versions
1406 extractor = info_dict.get('extractor_key')
1407 if extractor is None:
1408 if 'id' in info_dict:
1409 extractor = info_dict.get('ie_key') # key in a playlist
1410 if extractor is None:
1411 return None # Incomplete video information
1412 return extractor.lower() + ' ' + info_dict['id']
1414 def in_download_archive(self, info_dict):
1415 fn = self.params.get('download_archive')
1419 vid_id = self._make_archive_id(info_dict)
1421 return False # Incomplete video information
1424 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1425 for line in archive_file:
1426 if line.strip() == vid_id:
1428 except IOError as ioe:
1429 if ioe.errno != errno.ENOENT:
1433 def record_download_archive(self, info_dict):
1434 fn = self.params.get('download_archive')
1437 vid_id = self._make_archive_id(info_dict)
1439 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1440 archive_file.write(vid_id + '\n')
1443 def format_resolution(format, default='unknown'):
1444 if format.get('vcodec') == 'none':
1446 if format.get('resolution') is not None:
1447 return format['resolution']
1448 if format.get('height') is not None:
1449 if format.get('width') is not None:
1450 res = '%sx%s' % (format['width'], format['height'])
1452 res = '%sp' % format['height']
1453 elif format.get('width') is not None:
1454 res = '?x%d' % format['width']
1459 def _format_note(self, fdict):
1461 if fdict.get('ext') in ['f4f', 'f4m']:
1462 res += '(unsupported) '
1463 if fdict.get('format_note') is not None:
1464 res += fdict['format_note'] + ' '
1465 if fdict.get('tbr') is not None:
1466 res += '%4dk ' % fdict['tbr']
1467 if fdict.get('container') is not None:
1470 res += '%s container' % fdict['container']
1471 if (fdict.get('vcodec') is not None and
1472 fdict.get('vcodec') != 'none'):
1475 res += fdict['vcodec']
1476 if fdict.get('vbr') is not None:
1478 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1480 if fdict.get('vbr') is not None:
1481 res += '%4dk' % fdict['vbr']
1482 if fdict.get('fps') is not None:
1483 res += ', %sfps' % fdict['fps']
1484 if fdict.get('acodec') is not None:
1487 if fdict['acodec'] == 'none':
1490 res += '%-5s' % fdict['acodec']
1491 elif fdict.get('abr') is not None:
1495 if fdict.get('abr') is not None:
1496 res += '@%3dk' % fdict['abr']
1497 if fdict.get('asr') is not None:
1498 res += ' (%5dHz)' % fdict['asr']
1499 if fdict.get('filesize') is not None:
1502 res += format_bytes(fdict['filesize'])
1503 elif fdict.get('filesize_approx') is not None:
1506 res += '~' + format_bytes(fdict['filesize_approx'])
1509 def list_formats(self, info_dict):
1510 def line(format, idlen=20):
1511 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1512 format['format_id'],
1514 self.format_resolution(format),
1515 self._format_note(format),
1518 formats = info_dict.get('formats', [info_dict])
1519 idlen = max(len('format code'),
1520 max(len(f['format_id']) for f in formats))
1522 line(f, idlen) for f in formats
1523 if f.get('preference') is None or f['preference'] >= -1000]
1524 if len(formats) > 1:
1525 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1526 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1528 header_line = line({
1529 'format_id': 'format code', 'ext': 'extension',
1530 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1532 '[info] Available formats for %s:\n%s\n%s' %
1533 (info_dict['id'], header_line, '\n'.join(formats_s)))
1535 def list_thumbnails(self, info_dict):
1536 thumbnails = info_dict.get('thumbnails')
1538 tn_url = info_dict.get('thumbnail')
1540 thumbnails = [{'id': '0', 'url': tn_url}]
1543 '[info] No thumbnails present for %s' % info_dict['id'])
1547 '[info] Thumbnails for %s:' % info_dict['id'])
1548 self.to_screen(render_table(
1549 ['ID', 'width', 'height', 'URL'],
1550 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1552 def urlopen(self, req):
1553 """ Start an HTTP download """
1555 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1556 # always respected by websites, some tend to give out URLs with non percent-encoded
1557 # non-ASCII characters (see telemb.py, ard.py [#3412])
1558 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1559 # To work around aforementioned issue we will replace request's original URL with
1560 # percent-encoded one
1561 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1562 url = req if req_is_string else req.get_full_url()
1563 url_escaped = escape_url(url)
1565 # Substitute URL if any change after escaping
1566 if url != url_escaped:
1570 req = compat_urllib_request.Request(
1571 url_escaped, data=req.data, headers=req.headers,
1572 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1574 return self._opener.open(req, timeout=self._socket_timeout)
1576 def print_debug_header(self):
1577 if not self.params.get('verbose'):
1580 if type('') is not compat_str:
1581 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1582 self.report_warning(
1583 'Your Python is broken! Update to a newer and supported version')
1585 stdout_encoding = getattr(
1586 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1588 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1589 locale.getpreferredencoding(),
1590 sys.getfilesystemencoding(),
1592 self.get_encoding()))
1593 write_string(encoding_str, encoding=None)
1595 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1597 sp = subprocess.Popen(
1598 ['git', 'rev-parse', '--short', 'HEAD'],
1599 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1600 cwd=os.path.dirname(os.path.abspath(__file__)))
1601 out, err = sp.communicate()
1602 out = out.decode().strip()
1603 if re.match('[0-9a-f]+', out):
1604 self._write_string('[debug] Git HEAD: ' + out + '\n')
1610 self._write_string('[debug] Python version %s - %s\n' % (
1611 platform.python_version(), platform_name()))
1613 exe_versions = FFmpegPostProcessor.get_versions()
1614 exe_versions['rtmpdump'] = rtmpdump_version()
1615 exe_str = ', '.join(
1617 for exe, v in sorted(exe_versions.items())
1622 self._write_string('[debug] exe versions: %s\n' % exe_str)
1625 for handler in self._opener.handlers:
1626 if hasattr(handler, 'proxies'):
1627 proxy_map.update(handler.proxies)
1628 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1630 if self.params.get('call_home', False):
1631 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1632 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1633 latest_version = self.urlopen(
1634 'https://yt-dl.org/latest/version').read().decode('utf-8')
1635 if version_tuple(latest_version) > version_tuple(__version__):
1636 self.report_warning(
1637 'You are using an outdated version (newest version: %s)! '
1638 'See https://yt-dl.org/update if you need help updating.' %
1641 def _setup_opener(self):
1642 timeout_val = self.params.get('socket_timeout')
1643 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1645 opts_cookiefile = self.params.get('cookiefile')
1646 opts_proxy = self.params.get('proxy')
1648 if opts_cookiefile is None:
1649 self.cookiejar = compat_cookiejar.CookieJar()
1651 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1653 if os.access(opts_cookiefile, os.R_OK):
1654 self.cookiejar.load()
1656 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1658 if opts_proxy is not None:
1659 if opts_proxy == '':
1662 proxies = {'http': opts_proxy, 'https': opts_proxy}
1664 proxies = compat_urllib_request.getproxies()
1665 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1666 if 'http' in proxies and 'https' not in proxies:
1667 proxies['https'] = proxies['http']
1668 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1670 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1671 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1672 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1673 opener = compat_urllib_request.build_opener(
1674 https_handler, proxy_handler, cookie_processor, ydlh)
1675 # Delete the default user-agent header, which would otherwise apply in
1676 # cases where our custom HTTP handler doesn't come into play
1677 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1678 opener.addheaders = []
1679 self._opener = opener
1681 def encode(self, s):
1682 if isinstance(s, bytes):
1683 return s # Already encoded
1686 return s.encode(self.get_encoding())
1687 except UnicodeEncodeError as err:
1688 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1691 def get_encoding(self):
1692 encoding = self.params.get('encoding')
1693 if encoding is None:
1694 encoding = preferredencoding()
1697 def _write_thumbnails(self, info_dict, filename):
1698 if self.params.get('writethumbnail', False):
1699 thumbnails = info_dict.get('thumbnails')
1701 thumbnails = [thumbnails[-1]]
1702 elif self.params.get('write_all_thumbnails', False):
1703 thumbnails = info_dict.get('thumbnails')
1708 # No thumbnails present, so return immediately
1711 for t in thumbnails:
1712 thumb_ext = determine_ext(t['url'], 'jpg')
1713 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1714 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1715 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1717 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1718 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1719 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1721 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1722 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1724 uf = self.urlopen(t['url'])
1725 with open(thumb_filename, 'wb') as thumbf:
1726 shutil.copyfileobj(uf, thumbf)
1727 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1728 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1729 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1730 self.report_warning('Unable to download thumbnail "%s": %s' %
1731 (t['url'], compat_str(err)))