2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
63 UnavailableVideoError,
73 from .cache import Cache
74 from .extractor import get_info_extractor, gen_extractors
75 from .downloader import get_suitable_downloader
76 from .downloader.rtmp import rtmpdump_version
77 from .postprocessor import (
79 FFmpegFixupStretchedPP,
84 from .version import __version__
87 class YoutubeDL(object):
90 YoutubeDL objects are the ones responsible of downloading the
91 actual video file and writing it to disk if the user has requested
92 it, among some other tasks. In most cases there should be one per
93 program. As, given a video URL, the downloader doesn't know how to
94 extract all the needed information, task that InfoExtractors do, it
95 has to pass the URL to one of them.
97 For this, YoutubeDL objects have a method that allows
98 InfoExtractors to be registered in a given order. When it is passed
99 a URL, the YoutubeDL object handles it to the first InfoExtractor it
100 finds that reports being able to handle it. The InfoExtractor extracts
101 all the information about the video or videos the URL refers to, and
102 YoutubeDL process the extracted information, possibly using a File
103 Downloader to download the video.
105 YoutubeDL objects accept a lot of parameters. In order not to saturate
106 the object constructor with arguments, it receives a dictionary of
107 options instead. These options are available through the params
108 attribute for the InfoExtractors to use. The YoutubeDL also
109 registers itself as the downloader in charge for the InfoExtractors
110 that are added to it, so this is a "mutual registration".
114 username: Username for authentication purposes.
115 password: Password for authentication purposes.
116 videopassword: Password for acces a video.
117 usenetrc: Use netrc for authentication instead.
118 verbose: Print additional info to stdout.
119 quiet: Do not print messages to stdout.
120 no_warnings: Do not print out anything for warnings.
121 forceurl: Force printing final URL.
122 forcetitle: Force printing title.
123 forceid: Force printing ID.
124 forcethumbnail: Force printing thumbnail URL.
125 forcedescription: Force printing description.
126 forcefilename: Force printing final filename.
127 forceduration: Force printing duration.
128 forcejson: Force printing info_dict as JSON.
129 dump_single_json: Force printing the info_dict of the whole playlist
130 (or video) as a single JSON line.
131 simulate: Do not download the video files.
132 format: Video format code. See options.py for more information.
133 format_limit: Highest quality format to try.
134 outtmpl: Template for output names.
135 restrictfilenames: Do not allow "&" and spaces in file names
136 ignoreerrors: Do not stop on download errors.
137 nooverwrites: Prevent overwriting files.
138 playliststart: Playlist item to start at.
139 playlistend: Playlist item to end at.
140 playlist_items: Specific indices of playlist to download.
141 playlistreverse: Download playlist items in reverse order.
142 matchtitle: Download only matching titles.
143 rejecttitle: Reject downloads for matching titles.
144 logger: Log messages to a logging.Logger instance.
145 logtostderr: Log messages to stderr instead of stdout.
146 writedescription: Write the video description to a .description file
147 writeinfojson: Write the video description to a .info.json file
148 writeannotations: Write the video annotations to a .annotations.xml file
149 writethumbnail: Write the thumbnail image to a file
150 write_all_thumbnails: Write all thumbnail formats to files
151 writesubtitles: Write the video subtitles to a file
152 writeautomaticsub: Write the automatic subtitles to a file
153 allsubtitles: Downloads all the subtitles of the video
154 (requires writesubtitles or writeautomaticsub)
155 listsubtitles: Lists all available subtitles for the video
156 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
157 subtitleslangs: List of languages of the subtitles to download
158 keepvideo: Keep the video file after post-processing
159 daterange: A DateRange object, download only if the upload_date is in the range.
160 skip_download: Skip the actual download of the video file
161 cachedir: Location of the cache files in the filesystem.
162 False to disable filesystem cache.
163 noplaylist: Download single video instead of a playlist if in doubt.
164 age_limit: An integer representing the user's age in years.
165 Unsuitable videos for the given age are skipped.
166 min_views: An integer representing the minimum view count the video
167 must have in order to not be skipped.
168 Videos without view count information are always
169 downloaded. None for no limit.
170 max_views: An integer representing the maximum view count.
171 Videos that are more popular than that are not
173 Videos without view count information are always
174 downloaded. None for no limit.
175 download_archive: File name of a file where all downloads are recorded.
176 Videos already present in the file are not downloaded
178 cookiefile: File name where cookies should be read from and dumped to.
179 nocheckcertificate:Do not verify SSL certificates
180 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
181 At the moment, this is only supported by YouTube.
182 proxy: URL of the proxy server to use
183 socket_timeout: Time to wait for unresponsive hosts, in seconds
184 bidi_workaround: Work around buggy terminals without bidirectional text
185 support, using fridibi
186 debug_printtraffic:Print out sent and received HTTP traffic
187 include_ads: Download ads as well
188 default_search: Prepend this string if an input url is not valid.
189 'auto' for elaborate guessing
190 encoding: Use this encoding instead of the system-specified.
191 extract_flat: Do not resolve URLs, return the immediate result.
192 Pass in 'in_playlist' to only show this behavior for
194 postprocessors: A list of dictionaries, each with an entry
195 * key: The name of the postprocessor. See
196 youtube_dl/postprocessor/__init__.py for a list.
197 as well as any further keyword arguments for the
199 progress_hooks: A list of functions that get called on download
200 progress, with a dictionary with the entries
201 * filename: The final filename
202 * status: One of "downloading" and "finished"
204 The dict may also have some of the following entries:
206 * downloaded_bytes: Bytes on disk
207 * total_bytes: Size of the whole file, None if unknown
208 * tmpfilename: The filename we're currently writing to
209 * eta: The estimated time in seconds, None if unknown
210 * speed: The download speed in bytes/second, None if
213 Progress hooks are guaranteed to be called at least once
214 (with status "finished") if the download is successful.
215 merge_output_format: Extension to use when merging formats.
216 fixup: Automatically correct known faults of the file.
218 - "never": do nothing
219 - "warn": only emit a warning
220 - "detect_or_warn": check whether we can do anything
221 about it, warn otherwise (default)
222 source_address: (Experimental) Client-side IP address to bind to.
223 call_home: Boolean, true iff we are allowed to contact the
224 youtube-dl servers for debugging.
225 sleep_interval: Number of seconds to sleep before each download.
226 external_downloader: Executable of the external downloader to call.
227 listformats: Print an overview of available video formats and exit.
228 list_thumbnails: Print a table of all thumbnails and exit.
231 The following parameters are not used by YoutubeDL itself, they are used by
233 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
234 noresizebuffer, retries, continuedl, noprogress, consoletitle,
237 The following options are used by the post processors:
238 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
239 otherwise prefer avconv.
240 exec_cmd: Arbitrary command to run after downloading
246 _download_retcode = None
247 _num_downloads = None
250 def __init__(self, params=None, auto_init=True):
251 """Create a FileDownloader object with the given options."""
255 self._ies_instances = {}
257 self._progress_hooks = []
258 self._download_retcode = 0
259 self._num_downloads = 0
260 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
261 self._err_file = sys.stderr
263 self.cache = Cache(self)
265 if params.get('bidi_workaround', False):
268 master, slave = pty.openpty()
269 width = get_term_width()
273 width_args = ['-w', str(width)]
275 stdin=subprocess.PIPE,
277 stderr=self._err_file)
279 self._output_process = subprocess.Popen(
280 ['bidiv'] + width_args, **sp_kwargs
283 self._output_process = subprocess.Popen(
284 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
285 self._output_channel = os.fdopen(master, 'rb')
286 except OSError as ose:
288 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
292 if (sys.version_info >= (3,) and sys.platform != 'win32' and
293 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
294 and not params.get('restrictfilenames', False)):
295 # On Python 3, the Unicode filesystem API will throw errors (#1474)
297 'Assuming --restrict-filenames since file system encoding '
298 'cannot encode all characters. '
299 'Set the LC_ALL environment variable to fix this.')
300 self.params['restrictfilenames'] = True
302 if '%(stitle)s' in self.params.get('outtmpl', ''):
303 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
308 self.print_debug_header()
309 self.add_default_info_extractors()
311 for pp_def_raw in self.params.get('postprocessors', []):
312 pp_class = get_postprocessor(pp_def_raw['key'])
313 pp_def = dict(pp_def_raw)
315 pp = pp_class(self, **compat_kwargs(pp_def))
316 self.add_post_processor(pp)
318 for ph in self.params.get('progress_hooks', []):
319 self.add_progress_hook(ph)
321 def warn_if_short_id(self, argv):
322 # short YouTube ID starting with dash?
324 i for i, a in enumerate(argv)
325 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
329 [a for i, a in enumerate(argv) if i not in idxs] +
330 ['--'] + [argv[i] for i in idxs]
333 'Long argument string detected. '
334 'Use -- to separate parameters and URLs, like this:\n%s\n' %
335 args_to_str(correct_argv))
337 def add_info_extractor(self, ie):
338 """Add an InfoExtractor object to the end of the list."""
340 self._ies_instances[ie.ie_key()] = ie
341 ie.set_downloader(self)
343 def get_info_extractor(self, ie_key):
345 Get an instance of an IE with name ie_key, it will try to get one from
346 the _ies list, if there's no instance it will create a new one and add
347 it to the extractor list.
349 ie = self._ies_instances.get(ie_key)
351 ie = get_info_extractor(ie_key)()
352 self.add_info_extractor(ie)
355 def add_default_info_extractors(self):
357 Add the InfoExtractors returned by gen_extractors to the end of the list
359 for ie in gen_extractors():
360 self.add_info_extractor(ie)
362 def add_post_processor(self, pp):
363 """Add a PostProcessor object to the end of the chain."""
365 pp.set_downloader(self)
367 def add_progress_hook(self, ph):
368 """Add the progress hook (currently only for the file downloader)"""
369 self._progress_hooks.append(ph)
371 def _bidi_workaround(self, message):
372 if not hasattr(self, '_output_channel'):
375 assert hasattr(self, '_output_process')
376 assert isinstance(message, compat_str)
377 line_count = message.count('\n') + 1
378 self._output_process.stdin.write((message + '\n').encode('utf-8'))
379 self._output_process.stdin.flush()
380 res = ''.join(self._output_channel.readline().decode('utf-8')
381 for _ in range(line_count))
382 return res[:-len('\n')]
384 def to_screen(self, message, skip_eol=False):
385 """Print message to stdout if not in quiet mode."""
386 return self.to_stdout(message, skip_eol, check_quiet=True)
388 def _write_string(self, s, out=None):
389 write_string(s, out=out, encoding=self.params.get('encoding'))
391 def to_stdout(self, message, skip_eol=False, check_quiet=False):
392 """Print message to stdout if not in quiet mode."""
393 if self.params.get('logger'):
394 self.params['logger'].debug(message)
395 elif not check_quiet or not self.params.get('quiet', False):
396 message = self._bidi_workaround(message)
397 terminator = ['\n', ''][skip_eol]
398 output = message + terminator
400 self._write_string(output, self._screen_file)
402 def to_stderr(self, message):
403 """Print message to stderr."""
404 assert isinstance(message, compat_str)
405 if self.params.get('logger'):
406 self.params['logger'].error(message)
408 message = self._bidi_workaround(message)
409 output = message + '\n'
410 self._write_string(output, self._err_file)
412 def to_console_title(self, message):
413 if not self.params.get('consoletitle', False):
415 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
416 # c_wchar_p() might not be necessary if `message` is
417 # already of type unicode()
418 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
419 elif 'TERM' in os.environ:
420 self._write_string('\033]0;%s\007' % message, self._screen_file)
422 def save_console_title(self):
423 if not self.params.get('consoletitle', False):
425 if 'TERM' in os.environ:
426 # Save the title on stack
427 self._write_string('\033[22;0t', self._screen_file)
429 def restore_console_title(self):
430 if not self.params.get('consoletitle', False):
432 if 'TERM' in os.environ:
433 # Restore the title from stack
434 self._write_string('\033[23;0t', self._screen_file)
437 self.save_console_title()
440 def __exit__(self, *args):
441 self.restore_console_title()
443 if self.params.get('cookiefile') is not None:
444 self.cookiejar.save()
446 def trouble(self, message=None, tb=None):
447 """Determine action to take when a download problem appears.
449 Depending on if the downloader has been configured to ignore
450 download errors or not, this method may throw an exception or
451 not when errors are found, after printing the message.
453 tb, if given, is additional traceback information.
455 if message is not None:
456 self.to_stderr(message)
457 if self.params.get('verbose'):
459 if sys.exc_info()[0]: # if .trouble has been called from an except block
461 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
462 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
463 tb += compat_str(traceback.format_exc())
465 tb_data = traceback.format_list(traceback.extract_stack())
466 tb = ''.join(tb_data)
468 if not self.params.get('ignoreerrors', False):
469 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
470 exc_info = sys.exc_info()[1].exc_info
472 exc_info = sys.exc_info()
473 raise DownloadError(message, exc_info)
474 self._download_retcode = 1
476 def report_warning(self, message):
478 Print the message to stderr, it will be prefixed with 'WARNING:'
479 If stderr is a tty file the 'WARNING:' will be colored
481 if self.params.get('logger') is not None:
482 self.params['logger'].warning(message)
484 if self.params.get('no_warnings'):
486 if self._err_file.isatty() and os.name != 'nt':
487 _msg_header = '\033[0;33mWARNING:\033[0m'
489 _msg_header = 'WARNING:'
490 warning_message = '%s %s' % (_msg_header, message)
491 self.to_stderr(warning_message)
493 def report_error(self, message, tb=None):
495 Do the same as trouble, but prefixes the message with 'ERROR:', colored
496 in red if stderr is a tty file.
498 if self._err_file.isatty() and os.name != 'nt':
499 _msg_header = '\033[0;31mERROR:\033[0m'
501 _msg_header = 'ERROR:'
502 error_message = '%s %s' % (_msg_header, message)
503 self.trouble(error_message, tb)
505 def report_file_already_downloaded(self, file_name):
506 """Report file has already been fully downloaded."""
508 self.to_screen('[download] %s has already been downloaded' % file_name)
509 except UnicodeEncodeError:
510 self.to_screen('[download] The file has already been downloaded')
512 def prepare_filename(self, info_dict):
513 """Generate the output filename."""
515 template_dict = dict(info_dict)
517 template_dict['epoch'] = int(time.time())
518 autonumber_size = self.params.get('autonumber_size')
519 if autonumber_size is None:
521 autonumber_templ = '%0' + str(autonumber_size) + 'd'
522 template_dict['autonumber'] = autonumber_templ % self._num_downloads
523 if template_dict.get('playlist_index') is not None:
524 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
525 if template_dict.get('resolution') is None:
526 if template_dict.get('width') and template_dict.get('height'):
527 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
528 elif template_dict.get('height'):
529 template_dict['resolution'] = '%sp' % template_dict['height']
530 elif template_dict.get('width'):
531 template_dict['resolution'] = '?x%d' % template_dict['width']
533 sanitize = lambda k, v: sanitize_filename(
535 restricted=self.params.get('restrictfilenames'),
537 template_dict = dict((k, sanitize(k, v))
538 for k, v in template_dict.items()
540 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
542 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
543 tmpl = compat_expanduser(outtmpl)
544 filename = tmpl % template_dict
546 except ValueError as err:
547 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
550 def _match_entry(self, info_dict):
551 """ Returns None iff the file should be downloaded """
553 video_title = info_dict.get('title', info_dict.get('id', 'video'))
554 if 'title' in info_dict:
555 # This can happen when we're just evaluating the playlist
556 title = info_dict['title']
557 matchtitle = self.params.get('matchtitle', False)
559 if not re.search(matchtitle, title, re.IGNORECASE):
560 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
561 rejecttitle = self.params.get('rejecttitle', False)
563 if re.search(rejecttitle, title, re.IGNORECASE):
564 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
565 date = info_dict.get('upload_date', None)
567 dateRange = self.params.get('daterange', DateRange())
568 if date not in dateRange:
569 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
570 view_count = info_dict.get('view_count', None)
571 if view_count is not None:
572 min_views = self.params.get('min_views')
573 if min_views is not None and view_count < min_views:
574 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
575 max_views = self.params.get('max_views')
576 if max_views is not None and view_count > max_views:
577 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
578 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
579 return 'Skipping "%s" because it is age restricted' % title
580 if self.in_download_archive(info_dict):
581 return '%s has already been recorded in archive' % video_title
585 def add_extra_info(info_dict, extra_info):
586 '''Set the keys from extra_info in info dict if they are missing'''
587 for key, value in extra_info.items():
588 info_dict.setdefault(key, value)
590 def extract_info(self, url, download=True, ie_key=None, extra_info={},
593 Returns a list with a dictionary for each video we find.
594 If 'download', also downloads the videos.
595 extra_info is a dict containing the extra values to add to each result
599 ies = [self.get_info_extractor(ie_key)]
604 if not ie.suitable(url):
608 self.report_warning('The program functionality for this site has been marked as broken, '
609 'and will probably not work.')
612 ie_result = ie.extract(url)
613 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
615 if isinstance(ie_result, list):
616 # Backwards compatibility: old IE result format
618 '_type': 'compat_list',
619 'entries': ie_result,
621 self.add_default_extra_info(ie_result, ie, url)
623 return self.process_ie_result(ie_result, download, extra_info)
626 except ExtractorError as de: # An error we somewhat expected
627 self.report_error(compat_str(de), de.format_traceback())
629 except MaxDownloadsReached:
631 except Exception as e:
632 if self.params.get('ignoreerrors', False):
633 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
638 self.report_error('no suitable InfoExtractor for URL %s' % url)
640 def add_default_extra_info(self, ie_result, ie, url):
641 self.add_extra_info(ie_result, {
642 'extractor': ie.IE_NAME,
644 'webpage_url_basename': url_basename(url),
645 'extractor_key': ie.ie_key(),
648 def process_ie_result(self, ie_result, download=True, extra_info={}):
650 Take the result of the ie(may be modified) and resolve all unresolved
651 references (URLs, playlist items).
653 It will also download the videos if 'download'.
654 Returns the resolved ie_result.
657 result_type = ie_result.get('_type', 'video')
659 if result_type in ('url', 'url_transparent'):
660 extract_flat = self.params.get('extract_flat', False)
661 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
662 extract_flat is True):
663 if self.params.get('forcejson', False):
664 self.to_stdout(json.dumps(ie_result))
667 if result_type == 'video':
668 self.add_extra_info(ie_result, extra_info)
669 return self.process_video_result(ie_result, download=download)
670 elif result_type == 'url':
671 # We have to add extra_info to the results because it may be
672 # contained in a playlist
673 return self.extract_info(ie_result['url'],
675 ie_key=ie_result.get('ie_key'),
676 extra_info=extra_info)
677 elif result_type == 'url_transparent':
678 # Use the information from the embedding page
679 info = self.extract_info(
680 ie_result['url'], ie_key=ie_result.get('ie_key'),
681 extra_info=extra_info, download=False, process=False)
683 force_properties = dict(
684 (k, v) for k, v in ie_result.items() if v is not None)
685 for f in ('_type', 'url'):
686 if f in force_properties:
687 del force_properties[f]
688 new_result = info.copy()
689 new_result.update(force_properties)
691 assert new_result.get('_type') != 'url_transparent'
693 return self.process_ie_result(
694 new_result, download=download, extra_info=extra_info)
695 elif result_type == 'playlist' or result_type == 'multi_video':
696 # We process each entry in the playlist
697 playlist = ie_result.get('title', None) or ie_result.get('id', None)
698 self.to_screen('[download] Downloading playlist: %s' % playlist)
700 playlist_results = []
702 playliststart = self.params.get('playliststart', 1) - 1
703 playlistend = self.params.get('playlistend', None)
704 # For backwards compatibility, interpret -1 as whole list
705 if playlistend == -1:
708 playlistitems_str = self.params.get('playlist_items', None)
710 if playlistitems_str is not None:
711 def iter_playlistitems(format):
712 for string_segment in format.split(','):
713 if '-' in string_segment:
714 start, end = string_segment.split('-')
715 for item in range(int(start), int(end) + 1):
718 yield int(string_segment)
719 playlistitems = iter_playlistitems(playlistitems_str)
721 ie_entries = ie_result['entries']
722 if isinstance(ie_entries, list):
723 n_all_entries = len(ie_entries)
725 entries = [ie_entries[i - 1] for i in playlistitems]
727 entries = ie_entries[playliststart:playlistend]
728 n_entries = len(entries)
730 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
731 (ie_result['extractor'], playlist, n_all_entries, n_entries))
732 elif isinstance(ie_entries, PagedList):
735 for item in playlistitems:
736 entries.extend(ie_entries.getslice(
740 entries = ie_entries.getslice(
741 playliststart, playlistend)
742 n_entries = len(entries)
744 "[%s] playlist %s: Downloading %d videos" %
745 (ie_result['extractor'], playlist, n_entries))
748 entry_list = list(ie_entries)
749 entries = [entry_list[i - 1] for i in playlistitems]
751 entries = list(itertools.islice(
752 ie_entries, playliststart, playlistend))
753 n_entries = len(entries)
755 "[%s] playlist %s: Downloading %d videos" %
756 (ie_result['extractor'], playlist, n_entries))
758 if self.params.get('playlistreverse', False):
759 entries = entries[::-1]
761 for i, entry in enumerate(entries, 1):
762 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
764 'n_entries': n_entries,
765 'playlist': playlist,
766 'playlist_id': ie_result.get('id'),
767 'playlist_title': ie_result.get('title'),
768 'playlist_index': i + playliststart,
769 'extractor': ie_result['extractor'],
770 'webpage_url': ie_result['webpage_url'],
771 'webpage_url_basename': url_basename(ie_result['webpage_url']),
772 'extractor_key': ie_result['extractor_key'],
775 reason = self._match_entry(entry)
776 if reason is not None:
777 self.to_screen('[download] ' + reason)
780 entry_result = self.process_ie_result(entry,
783 playlist_results.append(entry_result)
784 ie_result['entries'] = playlist_results
786 elif result_type == 'compat_list':
788 'Extractor %s returned a compat_list result. '
789 'It needs to be updated.' % ie_result.get('extractor'))
795 'extractor': ie_result['extractor'],
796 'webpage_url': ie_result['webpage_url'],
797 'webpage_url_basename': url_basename(ie_result['webpage_url']),
798 'extractor_key': ie_result['extractor_key'],
802 ie_result['entries'] = [
803 self.process_ie_result(_fixup(r), download, extra_info)
804 for r in ie_result['entries']
808 raise Exception('Invalid result type: %s' % result_type)
810 def _apply_format_filter(self, format_spec, available_formats):
811 " Returns a tuple of the remaining format_spec and filtered formats "
821 operator_rex = re.compile(r'''(?x)\s*\[
822 (?P<key>width|height|tbr|abr|vbr|filesize)
823 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
824 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
826 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
827 m = operator_rex.search(format_spec)
829 raise ValueError('Invalid format specification %r' % format_spec)
832 comparison_value = int(m.group('value'))
834 comparison_value = parse_filesize(m.group('value'))
835 if comparison_value is None:
836 comparison_value = parse_filesize(m.group('value') + 'B')
837 if comparison_value is None:
839 'Invalid value %r in format specification %r' % (
840 m.group('value'), format_spec))
841 op = OPERATORS[m.group('op')]
844 actual_value = f.get(m.group('key'))
845 if actual_value is None:
846 return m.group('none_inclusive')
847 return op(actual_value, comparison_value)
848 new_formats = [f for f in available_formats if _filter(f)]
850 new_format_spec = format_spec[:-len(m.group(0))]
851 if not new_format_spec:
852 new_format_spec = 'best'
854 return (new_format_spec, new_formats)
856 def select_format(self, format_spec, available_formats):
857 while format_spec.endswith(']'):
858 format_spec, available_formats = self._apply_format_filter(
859 format_spec, available_formats)
860 if not available_formats:
863 if format_spec == 'best' or format_spec is None:
864 return available_formats[-1]
865 elif format_spec == 'worst':
866 return available_formats[0]
867 elif format_spec == 'bestaudio':
869 f for f in available_formats
870 if f.get('vcodec') == 'none']
872 return audio_formats[-1]
873 elif format_spec == 'worstaudio':
875 f for f in available_formats
876 if f.get('vcodec') == 'none']
878 return audio_formats[0]
879 elif format_spec == 'bestvideo':
881 f for f in available_formats
882 if f.get('acodec') == 'none']
884 return video_formats[-1]
885 elif format_spec == 'worstvideo':
887 f for f in available_formats
888 if f.get('acodec') == 'none']
890 return video_formats[0]
892 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
893 if format_spec in extensions:
894 filter_f = lambda f: f['ext'] == format_spec
896 filter_f = lambda f: f['format_id'] == format_spec
897 matches = list(filter(filter_f, available_formats))
902 def _calc_headers(self, info_dict):
903 res = std_headers.copy()
905 add_headers = info_dict.get('http_headers')
907 res.update(add_headers)
909 cookies = self._calc_cookies(info_dict)
911 res['Cookie'] = cookies
915 def _calc_cookies(self, info_dict):
916 class _PseudoRequest(object):
917 def __init__(self, url):
920 self.unverifiable = False
922 def add_unredirected_header(self, k, v):
925 def get_full_url(self):
928 def is_unverifiable(self):
929 return self.unverifiable
931 def has_header(self, h):
932 return h in self.headers
934 pr = _PseudoRequest(info_dict['url'])
935 self.cookiejar.add_cookie_header(pr)
936 return pr.headers.get('Cookie')
938 def process_video_result(self, info_dict, download=True):
939 assert info_dict.get('_type', 'video') == 'video'
941 if 'id' not in info_dict:
942 raise ExtractorError('Missing "id" field in extractor result')
943 if 'title' not in info_dict:
944 raise ExtractorError('Missing "title" field in extractor result')
946 if 'playlist' not in info_dict:
947 # It isn't part of a playlist
948 info_dict['playlist'] = None
949 info_dict['playlist_index'] = None
951 thumbnails = info_dict.get('thumbnails')
952 if thumbnails is None:
953 thumbnail = info_dict.get('thumbnail')
955 thumbnails = [{'url': thumbnail}]
957 thumbnails.sort(key=lambda t: (
958 t.get('preference'), t.get('width'), t.get('height'),
959 t.get('id'), t.get('url')))
961 if 'width' in t and 'height' in t:
962 t['resolution'] = '%dx%d' % (t['width'], t['height'])
964 if thumbnails and 'thumbnail' not in info_dict:
965 info_dict['thumbnail'] = thumbnails[-1]['url']
967 if 'display_id' not in info_dict and 'id' in info_dict:
968 info_dict['display_id'] = info_dict['id']
970 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
971 # Working around negative timestamps in Windows
972 # (see http://bugs.python.org/issue1646728)
973 if info_dict['timestamp'] < 0 and os.name == 'nt':
974 info_dict['timestamp'] = 0
975 upload_date = datetime.datetime.utcfromtimestamp(
976 info_dict['timestamp'])
977 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
979 # This extractors handle format selection themselves
980 if info_dict['extractor'] in ['Youku']:
982 self.process_info(info_dict)
985 # We now pick which formats have to be downloaded
986 if info_dict.get('formats') is None:
987 # There's only one format available
988 formats = [info_dict]
990 formats = info_dict['formats']
993 raise ExtractorError('No video formats found!')
995 # We check that all the formats have the format and format_id fields
996 for i, format in enumerate(formats):
997 if 'url' not in format:
998 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1000 if format.get('format_id') is None:
1001 format['format_id'] = compat_str(i)
1002 if format.get('format') is None:
1003 format['format'] = '{id} - {res}{note}'.format(
1004 id=format['format_id'],
1005 res=self.format_resolution(format),
1006 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1008 # Automatically determine file extension if missing
1009 if 'ext' not in format:
1010 format['ext'] = determine_ext(format['url']).lower()
1011 # Add HTTP headers, so that external programs can use them from the
1013 full_format_info = info_dict.copy()
1014 full_format_info.update(format)
1015 format['http_headers'] = self._calc_headers(full_format_info)
1017 format_limit = self.params.get('format_limit', None)
1019 formats = list(takewhile_inclusive(
1020 lambda f: f['format_id'] != format_limit, formats
1023 # TODO Central sorting goes here
1025 if formats[0] is not info_dict:
1026 # only set the 'formats' fields if the original info_dict list them
1027 # otherwise we end up with a circular reference, the first (and unique)
1028 # element in the 'formats' field in info_dict is info_dict itself,
1029 # wich can't be exported to json
1030 info_dict['formats'] = formats
1031 if self.params.get('listformats'):
1032 self.list_formats(info_dict)
1034 if self.params.get('list_thumbnails'):
1035 self.list_thumbnails(info_dict)
1038 req_format = self.params.get('format')
1039 if req_format is None:
1041 formats_to_download = []
1042 # The -1 is for supporting YoutubeIE
1043 if req_format in ('-1', 'all'):
1044 formats_to_download = formats
1046 for rfstr in req_format.split(','):
1047 # We can accept formats requested in the format: 34/5/best, we pick
1048 # the first that is available, starting from left
1049 req_formats = rfstr.split('/')
1050 for rf in req_formats:
1051 if re.match(r'.+?\+.+?', rf) is not None:
1052 # Two formats have been requested like '137+139'
1053 format_1, format_2 = rf.split('+')
1054 formats_info = (self.select_format(format_1, formats),
1055 self.select_format(format_2, formats))
1056 if all(formats_info):
1057 # The first format must contain the video and the
1059 if formats_info[0].get('vcodec') == 'none':
1060 self.report_error('The first format must '
1061 'contain the video, try using '
1062 '"-f %s+%s"' % (format_2, format_1))
1065 formats_info[0]['ext']
1066 if self.params.get('merge_output_format') is None
1067 else self.params['merge_output_format'])
1069 'requested_formats': formats_info,
1071 'ext': formats_info[0]['ext'],
1072 'width': formats_info[0].get('width'),
1073 'height': formats_info[0].get('height'),
1074 'resolution': formats_info[0].get('resolution'),
1075 'fps': formats_info[0].get('fps'),
1076 'vcodec': formats_info[0].get('vcodec'),
1077 'vbr': formats_info[0].get('vbr'),
1078 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1079 'acodec': formats_info[1].get('acodec'),
1080 'abr': formats_info[1].get('abr'),
1084 selected_format = None
1086 selected_format = self.select_format(rf, formats)
1087 if selected_format is not None:
1088 formats_to_download.append(selected_format)
1090 if not formats_to_download:
1091 raise ExtractorError('requested format not available',
1095 if len(formats_to_download) > 1:
1096 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1097 for format in formats_to_download:
1098 new_info = dict(info_dict)
1099 new_info.update(format)
1100 self.process_info(new_info)
1101 # We update the info dict with the best quality format (backwards compatibility)
1102 info_dict.update(formats_to_download[-1])
1105 def process_info(self, info_dict):
1106 """Process a single resolved IE result."""
1108 assert info_dict.get('_type', 'video') == 'video'
1110 max_downloads = self.params.get('max_downloads')
1111 if max_downloads is not None:
1112 if self._num_downloads >= int(max_downloads):
1113 raise MaxDownloadsReached()
1115 info_dict['fulltitle'] = info_dict['title']
1116 if len(info_dict['title']) > 200:
1117 info_dict['title'] = info_dict['title'][:197] + '...'
1119 # Keep for backwards compatibility
1120 info_dict['stitle'] = info_dict['title']
1122 if 'format' not in info_dict:
1123 info_dict['format'] = info_dict['ext']
1125 reason = self._match_entry(info_dict)
1126 if reason is not None:
1127 self.to_screen('[download] ' + reason)
1130 self._num_downloads += 1
1132 filename = self.prepare_filename(info_dict)
1135 if self.params.get('forcetitle', False):
1136 self.to_stdout(info_dict['fulltitle'])
1137 if self.params.get('forceid', False):
1138 self.to_stdout(info_dict['id'])
1139 if self.params.get('forceurl', False):
1140 if info_dict.get('requested_formats') is not None:
1141 for f in info_dict['requested_formats']:
1142 self.to_stdout(f['url'] + f.get('play_path', ''))
1144 # For RTMP URLs, also include the playpath
1145 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1146 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1147 self.to_stdout(info_dict['thumbnail'])
1148 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1149 self.to_stdout(info_dict['description'])
1150 if self.params.get('forcefilename', False) and filename is not None:
1151 self.to_stdout(filename)
1152 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1153 self.to_stdout(formatSeconds(info_dict['duration']))
1154 if self.params.get('forceformat', False):
1155 self.to_stdout(info_dict['format'])
1156 if self.params.get('forcejson', False):
1157 info_dict['_filename'] = filename
1158 self.to_stdout(json.dumps(info_dict))
1159 if self.params.get('dump_single_json', False):
1160 info_dict['_filename'] = filename
1162 # Do nothing else if in simulate mode
1163 if self.params.get('simulate', False):
1166 if filename is None:
1170 dn = os.path.dirname(encodeFilename(filename))
1171 if dn and not os.path.exists(dn):
1173 except (OSError, IOError) as err:
1174 self.report_error('unable to create directory ' + compat_str(err))
1177 if self.params.get('writedescription', False):
1178 descfn = filename + '.description'
1179 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1180 self.to_screen('[info] Video description is already present')
1181 elif info_dict.get('description') is None:
1182 self.report_warning('There\'s no description to write.')
1185 self.to_screen('[info] Writing video description to: ' + descfn)
1186 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1187 descfile.write(info_dict['description'])
1188 except (OSError, IOError):
1189 self.report_error('Cannot write description file ' + descfn)
1192 if self.params.get('writeannotations', False):
1193 annofn = filename + '.annotations.xml'
1194 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1195 self.to_screen('[info] Video annotations are already present')
1198 self.to_screen('[info] Writing video annotations to: ' + annofn)
1199 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1200 annofile.write(info_dict['annotations'])
1201 except (KeyError, TypeError):
1202 self.report_warning('There are no annotations to write.')
1203 except (OSError, IOError):
1204 self.report_error('Cannot write annotations file: ' + annofn)
1207 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1208 self.params.get('writeautomaticsub')])
1210 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1211 # subtitles download errors are already managed as troubles in relevant IE
1212 # that way it will silently go on when used with unsupporting IE
1213 subtitles = info_dict['subtitles']
1214 sub_format = self.params.get('subtitlesformat', 'srt')
1215 for sub_lang in subtitles.keys():
1216 sub = subtitles[sub_lang]
1220 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1221 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1222 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1224 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1225 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1227 except (OSError, IOError):
1228 self.report_error('Cannot write subtitles file ' + sub_filename)
1231 if self.params.get('writeinfojson', False):
1232 infofn = os.path.splitext(filename)[0] + '.info.json'
1233 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1234 self.to_screen('[info] Video description metadata is already present')
1236 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1238 write_json_file(info_dict, infofn)
1239 except (OSError, IOError):
1240 self.report_error('Cannot write metadata to JSON file ' + infofn)
1243 self._write_thumbnails(info_dict, filename)
1245 if not self.params.get('skip_download', False):
1248 fd = get_suitable_downloader(info, self.params)(self, self.params)
1249 for ph in self._progress_hooks:
1250 fd.add_progress_hook(ph)
1251 if self.params.get('verbose'):
1252 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1253 return fd.download(name, info)
1254 if info_dict.get('requested_formats') is not None:
1257 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1258 if not merger._executable:
1260 self.report_warning('You have requested multiple '
1261 'formats but ffmpeg or avconv are not installed.'
1262 ' The formats won\'t be merged')
1264 postprocessors = [merger]
1265 for f in info_dict['requested_formats']:
1266 new_info = dict(info_dict)
1268 fname = self.prepare_filename(new_info)
1269 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1270 downloaded.append(fname)
1271 partial_success = dl(fname, new_info)
1272 success = success and partial_success
1273 info_dict['__postprocessors'] = postprocessors
1274 info_dict['__files_to_merge'] = downloaded
1276 # Just a single file
1277 success = dl(filename, info_dict)
1278 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1279 self.report_error('unable to download video data: %s' % str(err))
1281 except (OSError, IOError) as err:
1282 raise UnavailableVideoError(err)
1283 except (ContentTooShortError, ) as err:
1284 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1289 fixup_policy = self.params.get('fixup')
1290 if fixup_policy is None:
1291 fixup_policy = 'detect_or_warn'
1293 stretched_ratio = info_dict.get('stretched_ratio')
1294 if stretched_ratio is not None and stretched_ratio != 1:
1295 if fixup_policy == 'warn':
1296 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1297 info_dict['id'], stretched_ratio))
1298 elif fixup_policy == 'detect_or_warn':
1299 stretched_pp = FFmpegFixupStretchedPP(self)
1300 if stretched_pp.available:
1301 info_dict.setdefault('__postprocessors', [])
1302 info_dict['__postprocessors'].append(stretched_pp)
1304 self.report_warning(
1305 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1306 info_dict['id'], stretched_ratio))
1308 assert fixup_policy in ('ignore', 'never')
1310 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1311 if fixup_policy == 'warn':
1312 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1314 elif fixup_policy == 'detect_or_warn':
1315 fixup_pp = FFmpegFixupM4aPP(self)
1316 if fixup_pp.available:
1317 info_dict.setdefault('__postprocessors', [])
1318 info_dict['__postprocessors'].append(fixup_pp)
1320 self.report_warning(
1321 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1324 assert fixup_policy in ('ignore', 'never')
1327 self.post_process(filename, info_dict)
1328 except (PostProcessingError) as err:
1329 self.report_error('postprocessing: %s' % str(err))
1331 self.record_download_archive(info_dict)
1333 def download(self, url_list):
1334 """Download a given list of URLs."""
1335 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1336 if (len(url_list) > 1 and
1338 and self.params.get('max_downloads') != 1):
1339 raise SameFileError(outtmpl)
1341 for url in url_list:
1343 # It also downloads the videos
1344 res = self.extract_info(url)
1345 except UnavailableVideoError:
1346 self.report_error('unable to download video')
1347 except MaxDownloadsReached:
1348 self.to_screen('[info] Maximum number of downloaded files reached.')
1351 if self.params.get('dump_single_json', False):
1352 self.to_stdout(json.dumps(res))
1354 return self._download_retcode
1356 def download_with_info_file(self, info_filename):
1357 with io.open(info_filename, 'r', encoding='utf-8') as f:
1360 self.process_ie_result(info, download=True)
1361 except DownloadError:
1362 webpage_url = info.get('webpage_url')
1363 if webpage_url is not None:
1364 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1365 return self.download([webpage_url])
1368 return self._download_retcode
1370 def post_process(self, filename, ie_info):
1371 """Run all the postprocessors on the given file."""
1372 info = dict(ie_info)
1373 info['filepath'] = filename
1375 if ie_info.get('__postprocessors') is not None:
1376 pps_chain.extend(ie_info['__postprocessors'])
1377 pps_chain.extend(self._pps)
1378 for pp in pps_chain:
1380 old_filename = info['filepath']
1382 keep_video_wish, info = pp.run(info)
1383 if keep_video_wish is not None:
1385 keep_video = keep_video_wish
1386 elif keep_video is None:
1387 # No clear decision yet, let IE decide
1388 keep_video = keep_video_wish
1389 except PostProcessingError as e:
1390 self.report_error(e.msg)
1391 if keep_video is False and not self.params.get('keepvideo', False):
1393 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1394 os.remove(encodeFilename(old_filename))
1395 except (IOError, OSError):
1396 self.report_warning('Unable to remove downloaded video file')
1398 def _make_archive_id(self, info_dict):
1399 # Future-proof against any change in case
1400 # and backwards compatibility with prior versions
1401 extractor = info_dict.get('extractor_key')
1402 if extractor is None:
1403 if 'id' in info_dict:
1404 extractor = info_dict.get('ie_key') # key in a playlist
1405 if extractor is None:
1406 return None # Incomplete video information
1407 return extractor.lower() + ' ' + info_dict['id']
1409 def in_download_archive(self, info_dict):
1410 fn = self.params.get('download_archive')
1414 vid_id = self._make_archive_id(info_dict)
1416 return False # Incomplete video information
1419 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1420 for line in archive_file:
1421 if line.strip() == vid_id:
1423 except IOError as ioe:
1424 if ioe.errno != errno.ENOENT:
1428 def record_download_archive(self, info_dict):
1429 fn = self.params.get('download_archive')
1432 vid_id = self._make_archive_id(info_dict)
1434 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1435 archive_file.write(vid_id + '\n')
1438 def format_resolution(format, default='unknown'):
1439 if format.get('vcodec') == 'none':
1441 if format.get('resolution') is not None:
1442 return format['resolution']
1443 if format.get('height') is not None:
1444 if format.get('width') is not None:
1445 res = '%sx%s' % (format['width'], format['height'])
1447 res = '%sp' % format['height']
1448 elif format.get('width') is not None:
1449 res = '?x%d' % format['width']
1454 def _format_note(self, fdict):
1456 if fdict.get('ext') in ['f4f', 'f4m']:
1457 res += '(unsupported) '
1458 if fdict.get('format_note') is not None:
1459 res += fdict['format_note'] + ' '
1460 if fdict.get('tbr') is not None:
1461 res += '%4dk ' % fdict['tbr']
1462 if fdict.get('container') is not None:
1465 res += '%s container' % fdict['container']
1466 if (fdict.get('vcodec') is not None and
1467 fdict.get('vcodec') != 'none'):
1470 res += fdict['vcodec']
1471 if fdict.get('vbr') is not None:
1473 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1475 if fdict.get('vbr') is not None:
1476 res += '%4dk' % fdict['vbr']
1477 if fdict.get('fps') is not None:
1478 res += ', %sfps' % fdict['fps']
1479 if fdict.get('acodec') is not None:
1482 if fdict['acodec'] == 'none':
1485 res += '%-5s' % fdict['acodec']
1486 elif fdict.get('abr') is not None:
1490 if fdict.get('abr') is not None:
1491 res += '@%3dk' % fdict['abr']
1492 if fdict.get('asr') is not None:
1493 res += ' (%5dHz)' % fdict['asr']
1494 if fdict.get('filesize') is not None:
1497 res += format_bytes(fdict['filesize'])
1498 elif fdict.get('filesize_approx') is not None:
1501 res += '~' + format_bytes(fdict['filesize_approx'])
1504 def list_formats(self, info_dict):
1505 def line(format, idlen=20):
1506 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1507 format['format_id'],
1509 self.format_resolution(format),
1510 self._format_note(format),
1513 formats = info_dict.get('formats', [info_dict])
1514 idlen = max(len('format code'),
1515 max(len(f['format_id']) for f in formats))
1517 line(f, idlen) for f in formats
1518 if f.get('preference') is None or f['preference'] >= -1000]
1519 if len(formats) > 1:
1520 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1521 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1523 header_line = line({
1524 'format_id': 'format code', 'ext': 'extension',
1525 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1527 '[info] Available formats for %s:\n%s\n%s' %
1528 (info_dict['id'], header_line, '\n'.join(formats_s)))
1530 def list_thumbnails(self, info_dict):
1531 thumbnails = info_dict.get('thumbnails')
1533 tn_url = info_dict.get('thumbnail')
1535 thumbnails = [{'id': '0', 'url': tn_url}]
1538 '[info] No thumbnails present for %s' % info_dict['id'])
1542 '[info] Thumbnails for %s:' % info_dict['id'])
1543 self.to_screen(render_table(
1544 ['ID', 'width', 'height', 'URL'],
1545 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1547 def urlopen(self, req):
1548 """ Start an HTTP download """
1550 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1551 # always respected by websites, some tend to give out URLs with non percent-encoded
1552 # non-ASCII characters (see telemb.py, ard.py [#3412])
1553 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1554 # To work around aforementioned issue we will replace request's original URL with
1555 # percent-encoded one
1556 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1557 url = req if req_is_string else req.get_full_url()
1558 url_escaped = escape_url(url)
1560 # Substitute URL if any change after escaping
1561 if url != url_escaped:
1565 req = compat_urllib_request.Request(
1566 url_escaped, data=req.data, headers=req.headers,
1567 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1569 return self._opener.open(req, timeout=self._socket_timeout)
1571 def print_debug_header(self):
1572 if not self.params.get('verbose'):
1575 if type('') is not compat_str:
1576 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1577 self.report_warning(
1578 'Your Python is broken! Update to a newer and supported version')
1580 stdout_encoding = getattr(
1581 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1583 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1584 locale.getpreferredencoding(),
1585 sys.getfilesystemencoding(),
1587 self.get_encoding()))
1588 write_string(encoding_str, encoding=None)
1590 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1592 sp = subprocess.Popen(
1593 ['git', 'rev-parse', '--short', 'HEAD'],
1594 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1595 cwd=os.path.dirname(os.path.abspath(__file__)))
1596 out, err = sp.communicate()
1597 out = out.decode().strip()
1598 if re.match('[0-9a-f]+', out):
1599 self._write_string('[debug] Git HEAD: ' + out + '\n')
1605 self._write_string('[debug] Python version %s - %s\n' % (
1606 platform.python_version(), platform_name()))
1608 exe_versions = FFmpegPostProcessor.get_versions()
1609 exe_versions['rtmpdump'] = rtmpdump_version()
1610 exe_str = ', '.join(
1612 for exe, v in sorted(exe_versions.items())
1617 self._write_string('[debug] exe versions: %s\n' % exe_str)
1620 for handler in self._opener.handlers:
1621 if hasattr(handler, 'proxies'):
1622 proxy_map.update(handler.proxies)
1623 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1625 if self.params.get('call_home', False):
1626 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1627 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1628 latest_version = self.urlopen(
1629 'https://yt-dl.org/latest/version').read().decode('utf-8')
1630 if version_tuple(latest_version) > version_tuple(__version__):
1631 self.report_warning(
1632 'You are using an outdated version (newest version: %s)! '
1633 'See https://yt-dl.org/update if you need help updating.' %
1636 def _setup_opener(self):
1637 timeout_val = self.params.get('socket_timeout')
1638 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1640 opts_cookiefile = self.params.get('cookiefile')
1641 opts_proxy = self.params.get('proxy')
1643 if opts_cookiefile is None:
1644 self.cookiejar = compat_cookiejar.CookieJar()
1646 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1648 if os.access(opts_cookiefile, os.R_OK):
1649 self.cookiejar.load()
1651 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1653 if opts_proxy is not None:
1654 if opts_proxy == '':
1657 proxies = {'http': opts_proxy, 'https': opts_proxy}
1659 proxies = compat_urllib_request.getproxies()
1660 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1661 if 'http' in proxies and 'https' not in proxies:
1662 proxies['https'] = proxies['http']
1663 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1665 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1666 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1667 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1668 opener = compat_urllib_request.build_opener(
1669 https_handler, proxy_handler, cookie_processor, ydlh)
1670 # Delete the default user-agent header, which would otherwise apply in
1671 # cases where our custom HTTP handler doesn't come into play
1672 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1673 opener.addheaders = []
1674 self._opener = opener
1676 def encode(self, s):
1677 if isinstance(s, bytes):
1678 return s # Already encoded
1681 return s.encode(self.get_encoding())
1682 except UnicodeEncodeError as err:
1683 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1686 def get_encoding(self):
1687 encoding = self.params.get('encoding')
1688 if encoding is None:
1689 encoding = preferredencoding()
1692 def _write_thumbnails(self, info_dict, filename):
1693 if self.params.get('writethumbnail', False):
1694 thumbnails = info_dict.get('thumbnails')
1696 thumbnails = [thumbnails[-1]]
1697 elif self.params.get('write_all_thumbnails', False):
1698 thumbnails = info_dict.get('thumbnails')
1703 # No thumbnails present, so return immediately
1706 for t in thumbnails:
1707 thumb_ext = determine_ext(t['url'], 'jpg')
1708 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1709 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1710 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1712 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1713 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1714 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1716 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1717 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1719 uf = self.urlopen(t['url'])
1720 with open(thumb_filename, 'wb') as thumbf:
1721 shutil.copyfileobj(uf, thumbf)
1722 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1723 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1724 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1725 self.report_warning('Unable to download thumbnail "%s": %s' %
1726 (t['url'], compat_str(err)))