2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_get_terminal_size,
38 compat_urllib_request,
57 PerRequestProxyHandler,
68 UnavailableVideoError,
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
84 FFmpegFixupStretchedPP,
89 from .version import __version__
92 class YoutubeDL(object):
95 YoutubeDL objects are the ones responsible of downloading the
96 actual video file and writing it to disk if the user has requested
97 it, among some other tasks. In most cases there should be one per
98 program. As, given a video URL, the downloader doesn't know how to
99 extract all the needed information, task that InfoExtractors do, it
100 has to pass the URL to one of them.
102 For this, YoutubeDL objects have a method that allows
103 InfoExtractors to be registered in a given order. When it is passed
104 a URL, the YoutubeDL object handles it to the first InfoExtractor it
105 finds that reports being able to handle it. The InfoExtractor extracts
106 all the information about the video or videos the URL refers to, and
107 YoutubeDL process the extracted information, possibly using a File
108 Downloader to download the video.
110 YoutubeDL objects accept a lot of parameters. In order not to saturate
111 the object constructor with arguments, it receives a dictionary of
112 options instead. These options are available through the params
113 attribute for the InfoExtractors to use. The YoutubeDL also
114 registers itself as the downloader in charge for the InfoExtractors
115 that are added to it, so this is a "mutual registration".
119 username: Username for authentication purposes.
120 password: Password for authentication purposes.
121 videopassword: Password for acces a video.
122 usenetrc: Use netrc for authentication instead.
123 verbose: Print additional info to stdout.
124 quiet: Do not print messages to stdout.
125 no_warnings: Do not print out anything for warnings.
126 forceurl: Force printing final URL.
127 forcetitle: Force printing title.
128 forceid: Force printing ID.
129 forcethumbnail: Force printing thumbnail URL.
130 forcedescription: Force printing description.
131 forcefilename: Force printing final filename.
132 forceduration: Force printing duration.
133 forcejson: Force printing info_dict as JSON.
134 dump_single_json: Force printing the info_dict of the whole playlist
135 (or video) as a single JSON line.
136 simulate: Do not download the video files.
137 format: Video format code. See options.py for more information.
138 format_limit: Highest quality format to try.
139 outtmpl: Template for output names.
140 restrictfilenames: Do not allow "&" and spaces in file names
141 ignoreerrors: Do not stop on download errors.
142 nooverwrites: Prevent overwriting files.
143 playliststart: Playlist item to start at.
144 playlistend: Playlist item to end at.
145 playlist_items: Specific indices of playlist to download.
146 playlistreverse: Download playlist items in reverse order.
147 matchtitle: Download only matching titles.
148 rejecttitle: Reject downloads for matching titles.
149 logger: Log messages to a logging.Logger instance.
150 logtostderr: Log messages to stderr instead of stdout.
151 writedescription: Write the video description to a .description file
152 writeinfojson: Write the video description to a .info.json file
153 writeannotations: Write the video annotations to a .annotations.xml file
154 writethumbnail: Write the thumbnail image to a file
155 write_all_thumbnails: Write all thumbnail formats to files
156 writesubtitles: Write the video subtitles to a file
157 writeautomaticsub: Write the automatic subtitles to a file
158 allsubtitles: Downloads all the subtitles of the video
159 (requires writesubtitles or writeautomaticsub)
160 listsubtitles: Lists all available subtitles for the video
161 subtitlesformat: The format code for subtitles
162 subtitleslangs: List of languages of the subtitles to download
163 keepvideo: Keep the video file after post-processing
164 daterange: A DateRange object, download only if the upload_date is in the range.
165 skip_download: Skip the actual download of the video file
166 cachedir: Location of the cache files in the filesystem.
167 False to disable filesystem cache.
168 noplaylist: Download single video instead of a playlist if in doubt.
169 age_limit: An integer representing the user's age in years.
170 Unsuitable videos for the given age are skipped.
171 min_views: An integer representing the minimum view count the video
172 must have in order to not be skipped.
173 Videos without view count information are always
174 downloaded. None for no limit.
175 max_views: An integer representing the maximum view count.
176 Videos that are more popular than that are not
178 Videos without view count information are always
179 downloaded. None for no limit.
180 download_archive: File name of a file where all downloads are recorded.
181 Videos already present in the file are not downloaded
183 cookiefile: File name where cookies should be read from and dumped to.
184 nocheckcertificate:Do not verify SSL certificates
185 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
186 At the moment, this is only supported by YouTube.
187 proxy: URL of the proxy server to use
188 cn_verification_proxy: URL of the proxy to use for IP address verification
189 on Chinese sites. (Experimental)
190 socket_timeout: Time to wait for unresponsive hosts, in seconds
191 bidi_workaround: Work around buggy terminals without bidirectional text
192 support, using fridibi
193 debug_printtraffic:Print out sent and received HTTP traffic
194 include_ads: Download ads as well
195 default_search: Prepend this string if an input url is not valid.
196 'auto' for elaborate guessing
197 encoding: Use this encoding instead of the system-specified.
198 extract_flat: Do not resolve URLs, return the immediate result.
199 Pass in 'in_playlist' to only show this behavior for
201 postprocessors: A list of dictionaries, each with an entry
202 * key: The name of the postprocessor. See
203 youtube_dl/postprocessor/__init__.py for a list.
204 as well as any further keyword arguments for the
206 progress_hooks: A list of functions that get called on download
207 progress, with a dictionary with the entries
208 * status: One of "downloading", "error", or "finished".
209 Check this first and ignore unknown values.
211 If status is one of "downloading", or "finished", the
212 following properties may also be present:
213 * filename: The final filename (always present)
214 * tmpfilename: The filename we're currently writing to
215 * downloaded_bytes: Bytes on disk
216 * total_bytes: Size of the whole file, None if unknown
217 * total_bytes_estimate: Guess of the eventual file size,
219 * elapsed: The number of seconds since download started.
220 * eta: The estimated time in seconds, None if unknown
221 * speed: The download speed in bytes/second, None if
223 * fragment_index: The counter of the currently
224 downloaded video fragment.
225 * fragment_count: The number of fragments (= individual
226 files that will be merged)
228 Progress hooks are guaranteed to be called at least once
229 (with status "finished") if the download is successful.
230 merge_output_format: Extension to use when merging formats.
231 fixup: Automatically correct known faults of the file.
233 - "never": do nothing
234 - "warn": only emit a warning
235 - "detect_or_warn": check whether we can do anything
236 about it, warn otherwise (default)
237 source_address: (Experimental) Client-side IP address to bind to.
238 call_home: Boolean, true iff we are allowed to contact the
239 youtube-dl servers for debugging.
240 sleep_interval: Number of seconds to sleep before each download.
241 listformats: Print an overview of available video formats and exit.
242 list_thumbnails: Print a table of all thumbnails and exit.
243 match_filter: A function that gets called with the info_dict of
245 If it returns a message, the video is ignored.
246 If it returns None, the video is downloaded.
247 match_filter_func in utils.py is one example for this.
248 no_color: Do not emit color codes in output.
250 The following options determine which downloader is picked:
251 external_downloader: Executable of the external downloader to call.
252 None or unset for standard (built-in) downloader.
253 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
255 The following parameters are not used by YoutubeDL itself, they are used by
256 the downloader (see youtube_dl/downloader/common.py):
257 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258 noresizebuffer, retries, continuedl, noprogress, consoletitle,
259 xattr_set_filesize, external_downloader_args.
261 The following options are used by the post processors:
262 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
263 otherwise prefer avconv.
264 exec_cmd: Arbitrary command to run after downloading
270 _download_retcode = None
271 _num_downloads = None
274 def __init__(self, params=None, auto_init=True):
275 """Create a FileDownloader object with the given options."""
279 self._ies_instances = {}
281 self._progress_hooks = []
282 self._download_retcode = 0
283 self._num_downloads = 0
284 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
285 self._err_file = sys.stderr
287 self.cache = Cache(self)
289 if params.get('bidi_workaround', False):
292 master, slave = pty.openpty()
293 width = compat_get_terminal_size().columns
297 width_args = ['-w', str(width)]
299 stdin=subprocess.PIPE,
301 stderr=self._err_file)
303 self._output_process = subprocess.Popen(
304 ['bidiv'] + width_args, **sp_kwargs
307 self._output_process = subprocess.Popen(
308 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
309 self._output_channel = os.fdopen(master, 'rb')
310 except OSError as ose:
312 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
316 if (sys.version_info >= (3,) and sys.platform != 'win32' and
317 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
318 not params.get('restrictfilenames', False)):
319 # On Python 3, the Unicode filesystem API will throw errors (#1474)
321 'Assuming --restrict-filenames since file system encoding '
322 'cannot encode all characters. '
323 'Set the LC_ALL environment variable to fix this.')
324 self.params['restrictfilenames'] = True
326 if isinstance(params.get('outtmpl'), bytes):
328 'Parameter outtmpl is bytes, but should be a unicode string. '
329 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
331 if '%(stitle)s' in self.params.get('outtmpl', ''):
332 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
337 self.print_debug_header()
338 self.add_default_info_extractors()
340 for pp_def_raw in self.params.get('postprocessors', []):
341 pp_class = get_postprocessor(pp_def_raw['key'])
342 pp_def = dict(pp_def_raw)
344 pp = pp_class(self, **compat_kwargs(pp_def))
345 self.add_post_processor(pp)
347 for ph in self.params.get('progress_hooks', []):
348 self.add_progress_hook(ph)
350 def warn_if_short_id(self, argv):
351 # short YouTube ID starting with dash?
353 i for i, a in enumerate(argv)
354 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
358 [a for i, a in enumerate(argv) if i not in idxs] +
359 ['--'] + [argv[i] for i in idxs]
362 'Long argument string detected. '
363 'Use -- to separate parameters and URLs, like this:\n%s\n' %
364 args_to_str(correct_argv))
366 def add_info_extractor(self, ie):
367 """Add an InfoExtractor object to the end of the list."""
369 self._ies_instances[ie.ie_key()] = ie
370 ie.set_downloader(self)
372 def get_info_extractor(self, ie_key):
374 Get an instance of an IE with name ie_key, it will try to get one from
375 the _ies list, if there's no instance it will create a new one and add
376 it to the extractor list.
378 ie = self._ies_instances.get(ie_key)
380 ie = get_info_extractor(ie_key)()
381 self.add_info_extractor(ie)
384 def add_default_info_extractors(self):
386 Add the InfoExtractors returned by gen_extractors to the end of the list
388 for ie in gen_extractors():
389 self.add_info_extractor(ie)
391 def add_post_processor(self, pp):
392 """Add a PostProcessor object to the end of the chain."""
394 pp.set_downloader(self)
396 def add_progress_hook(self, ph):
397 """Add the progress hook (currently only for the file downloader)"""
398 self._progress_hooks.append(ph)
400 def _bidi_workaround(self, message):
401 if not hasattr(self, '_output_channel'):
404 assert hasattr(self, '_output_process')
405 assert isinstance(message, compat_str)
406 line_count = message.count('\n') + 1
407 self._output_process.stdin.write((message + '\n').encode('utf-8'))
408 self._output_process.stdin.flush()
409 res = ''.join(self._output_channel.readline().decode('utf-8')
410 for _ in range(line_count))
411 return res[:-len('\n')]
413 def to_screen(self, message, skip_eol=False):
414 """Print message to stdout if not in quiet mode."""
415 return self.to_stdout(message, skip_eol, check_quiet=True)
417 def _write_string(self, s, out=None):
418 write_string(s, out=out, encoding=self.params.get('encoding'))
420 def to_stdout(self, message, skip_eol=False, check_quiet=False):
421 """Print message to stdout if not in quiet mode."""
422 if self.params.get('logger'):
423 self.params['logger'].debug(message)
424 elif not check_quiet or not self.params.get('quiet', False):
425 message = self._bidi_workaround(message)
426 terminator = ['\n', ''][skip_eol]
427 output = message + terminator
429 self._write_string(output, self._screen_file)
431 def to_stderr(self, message):
432 """Print message to stderr."""
433 assert isinstance(message, compat_str)
434 if self.params.get('logger'):
435 self.params['logger'].error(message)
437 message = self._bidi_workaround(message)
438 output = message + '\n'
439 self._write_string(output, self._err_file)
441 def to_console_title(self, message):
442 if not self.params.get('consoletitle', False):
444 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
445 # c_wchar_p() might not be necessary if `message` is
446 # already of type unicode()
447 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
448 elif 'TERM' in os.environ:
449 self._write_string('\033]0;%s\007' % message, self._screen_file)
451 def save_console_title(self):
452 if not self.params.get('consoletitle', False):
454 if 'TERM' in os.environ:
455 # Save the title on stack
456 self._write_string('\033[22;0t', self._screen_file)
458 def restore_console_title(self):
459 if not self.params.get('consoletitle', False):
461 if 'TERM' in os.environ:
462 # Restore the title from stack
463 self._write_string('\033[23;0t', self._screen_file)
466 self.save_console_title()
469 def __exit__(self, *args):
470 self.restore_console_title()
472 if self.params.get('cookiefile') is not None:
473 self.cookiejar.save()
475 def trouble(self, message=None, tb=None):
476 """Determine action to take when a download problem appears.
478 Depending on if the downloader has been configured to ignore
479 download errors or not, this method may throw an exception or
480 not when errors are found, after printing the message.
482 tb, if given, is additional traceback information.
484 if message is not None:
485 self.to_stderr(message)
486 if self.params.get('verbose'):
488 if sys.exc_info()[0]: # if .trouble has been called from an except block
490 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
491 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
492 tb += compat_str(traceback.format_exc())
494 tb_data = traceback.format_list(traceback.extract_stack())
495 tb = ''.join(tb_data)
497 if not self.params.get('ignoreerrors', False):
498 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
499 exc_info = sys.exc_info()[1].exc_info
501 exc_info = sys.exc_info()
502 raise DownloadError(message, exc_info)
503 self._download_retcode = 1
505 def report_warning(self, message):
507 Print the message to stderr, it will be prefixed with 'WARNING:'
508 If stderr is a tty file the 'WARNING:' will be colored
510 if self.params.get('logger') is not None:
511 self.params['logger'].warning(message)
513 if self.params.get('no_warnings'):
515 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
516 _msg_header = '\033[0;33mWARNING:\033[0m'
518 _msg_header = 'WARNING:'
519 warning_message = '%s %s' % (_msg_header, message)
520 self.to_stderr(warning_message)
522 def report_error(self, message, tb=None):
524 Do the same as trouble, but prefixes the message with 'ERROR:', colored
525 in red if stderr is a tty file.
527 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
528 _msg_header = '\033[0;31mERROR:\033[0m'
530 _msg_header = 'ERROR:'
531 error_message = '%s %s' % (_msg_header, message)
532 self.trouble(error_message, tb)
534 def report_file_already_downloaded(self, file_name):
535 """Report file has already been fully downloaded."""
537 self.to_screen('[download] %s has already been downloaded' % file_name)
538 except UnicodeEncodeError:
539 self.to_screen('[download] The file has already been downloaded')
541 def prepare_filename(self, info_dict):
542 """Generate the output filename."""
544 template_dict = dict(info_dict)
546 template_dict['epoch'] = int(time.time())
547 autonumber_size = self.params.get('autonumber_size')
548 if autonumber_size is None:
550 autonumber_templ = '%0' + str(autonumber_size) + 'd'
551 template_dict['autonumber'] = autonumber_templ % self._num_downloads
552 if template_dict.get('playlist_index') is not None:
553 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
554 if template_dict.get('resolution') is None:
555 if template_dict.get('width') and template_dict.get('height'):
556 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
557 elif template_dict.get('height'):
558 template_dict['resolution'] = '%sp' % template_dict['height']
559 elif template_dict.get('width'):
560 template_dict['resolution'] = '?x%d' % template_dict['width']
562 sanitize = lambda k, v: sanitize_filename(
564 restricted=self.params.get('restrictfilenames'),
566 template_dict = dict((k, sanitize(k, v))
567 for k, v in template_dict.items()
569 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
571 outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
572 tmpl = compat_expanduser(outtmpl)
573 filename = tmpl % template_dict
574 # Temporary fix for #4787
575 # 'Treat' all problem characters by passing filename through preferredencoding
576 # to workaround encoding issues with subprocess on python2 @ Windows
577 if sys.version_info < (3, 0) and sys.platform == 'win32':
578 filename = encodeFilename(filename, True).decode(preferredencoding())
580 except ValueError as err:
581 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
584 def _match_entry(self, info_dict, incomplete):
585 """ Returns None iff the file should be downloaded """
587 video_title = info_dict.get('title', info_dict.get('id', 'video'))
588 if 'title' in info_dict:
589 # This can happen when we're just evaluating the playlist
590 title = info_dict['title']
591 matchtitle = self.params.get('matchtitle', False)
593 if not re.search(matchtitle, title, re.IGNORECASE):
594 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
595 rejecttitle = self.params.get('rejecttitle', False)
597 if re.search(rejecttitle, title, re.IGNORECASE):
598 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
599 date = info_dict.get('upload_date', None)
601 dateRange = self.params.get('daterange', DateRange())
602 if date not in dateRange:
603 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
604 view_count = info_dict.get('view_count', None)
605 if view_count is not None:
606 min_views = self.params.get('min_views')
607 if min_views is not None and view_count < min_views:
608 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
609 max_views = self.params.get('max_views')
610 if max_views is not None and view_count > max_views:
611 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
612 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
613 return 'Skipping "%s" because it is age restricted' % video_title
614 if self.in_download_archive(info_dict):
615 return '%s has already been recorded in archive' % video_title
618 match_filter = self.params.get('match_filter')
619 if match_filter is not None:
620 ret = match_filter(info_dict)
627 def add_extra_info(info_dict, extra_info):
628 '''Set the keys from extra_info in info dict if they are missing'''
629 for key, value in extra_info.items():
630 info_dict.setdefault(key, value)
632 def extract_info(self, url, download=True, ie_key=None, extra_info={},
635 Returns a list with a dictionary for each video we find.
636 If 'download', also downloads the videos.
637 extra_info is a dict containing the extra values to add to each result
641 ies = [self.get_info_extractor(ie_key)]
646 if not ie.suitable(url):
650 self.report_warning('The program functionality for this site has been marked as broken, '
651 'and will probably not work.')
654 ie_result = ie.extract(url)
655 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
657 if isinstance(ie_result, list):
658 # Backwards compatibility: old IE result format
660 '_type': 'compat_list',
661 'entries': ie_result,
663 self.add_default_extra_info(ie_result, ie, url)
665 return self.process_ie_result(ie_result, download, extra_info)
668 except ExtractorError as de: # An error we somewhat expected
669 self.report_error(compat_str(de), de.format_traceback())
671 except MaxDownloadsReached:
673 except Exception as e:
674 if self.params.get('ignoreerrors', False):
675 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
680 self.report_error('no suitable InfoExtractor for URL %s' % url)
682 def add_default_extra_info(self, ie_result, ie, url):
683 self.add_extra_info(ie_result, {
684 'extractor': ie.IE_NAME,
686 'webpage_url_basename': url_basename(url),
687 'extractor_key': ie.ie_key(),
690 def process_ie_result(self, ie_result, download=True, extra_info={}):
692 Take the result of the ie(may be modified) and resolve all unresolved
693 references (URLs, playlist items).
695 It will also download the videos if 'download'.
696 Returns the resolved ie_result.
699 result_type = ie_result.get('_type', 'video')
701 if result_type in ('url', 'url_transparent'):
702 extract_flat = self.params.get('extract_flat', False)
703 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
704 extract_flat is True):
705 if self.params.get('forcejson', False):
706 self.to_stdout(json.dumps(ie_result))
709 if result_type == 'video':
710 self.add_extra_info(ie_result, extra_info)
711 return self.process_video_result(ie_result, download=download)
712 elif result_type == 'url':
713 # We have to add extra_info to the results because it may be
714 # contained in a playlist
715 return self.extract_info(ie_result['url'],
717 ie_key=ie_result.get('ie_key'),
718 extra_info=extra_info)
719 elif result_type == 'url_transparent':
720 # Use the information from the embedding page
721 info = self.extract_info(
722 ie_result['url'], ie_key=ie_result.get('ie_key'),
723 extra_info=extra_info, download=False, process=False)
725 force_properties = dict(
726 (k, v) for k, v in ie_result.items() if v is not None)
727 for f in ('_type', 'url'):
728 if f in force_properties:
729 del force_properties[f]
730 new_result = info.copy()
731 new_result.update(force_properties)
733 assert new_result.get('_type') != 'url_transparent'
735 return self.process_ie_result(
736 new_result, download=download, extra_info=extra_info)
737 elif result_type == 'playlist' or result_type == 'multi_video':
738 # We process each entry in the playlist
739 playlist = ie_result.get('title', None) or ie_result.get('id', None)
740 self.to_screen('[download] Downloading playlist: %s' % playlist)
742 playlist_results = []
744 playliststart = self.params.get('playliststart', 1) - 1
745 playlistend = self.params.get('playlistend', None)
746 # For backwards compatibility, interpret -1 as whole list
747 if playlistend == -1:
750 playlistitems_str = self.params.get('playlist_items', None)
752 if playlistitems_str is not None:
753 def iter_playlistitems(format):
754 for string_segment in format.split(','):
755 if '-' in string_segment:
756 start, end = string_segment.split('-')
757 for item in range(int(start), int(end) + 1):
760 yield int(string_segment)
761 playlistitems = iter_playlistitems(playlistitems_str)
763 ie_entries = ie_result['entries']
764 if isinstance(ie_entries, list):
765 n_all_entries = len(ie_entries)
767 entries = [ie_entries[i - 1] for i in playlistitems]
769 entries = ie_entries[playliststart:playlistend]
770 n_entries = len(entries)
772 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
773 (ie_result['extractor'], playlist, n_all_entries, n_entries))
774 elif isinstance(ie_entries, PagedList):
777 for item in playlistitems:
778 entries.extend(ie_entries.getslice(
782 entries = ie_entries.getslice(
783 playliststart, playlistend)
784 n_entries = len(entries)
786 "[%s] playlist %s: Downloading %d videos" %
787 (ie_result['extractor'], playlist, n_entries))
790 entry_list = list(ie_entries)
791 entries = [entry_list[i - 1] for i in playlistitems]
793 entries = list(itertools.islice(
794 ie_entries, playliststart, playlistend))
795 n_entries = len(entries)
797 "[%s] playlist %s: Downloading %d videos" %
798 (ie_result['extractor'], playlist, n_entries))
800 if self.params.get('playlistreverse', False):
801 entries = entries[::-1]
803 for i, entry in enumerate(entries, 1):
804 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
806 'n_entries': n_entries,
807 'playlist': playlist,
808 'playlist_id': ie_result.get('id'),
809 'playlist_title': ie_result.get('title'),
810 'playlist_index': i + playliststart,
811 'extractor': ie_result['extractor'],
812 'webpage_url': ie_result['webpage_url'],
813 'webpage_url_basename': url_basename(ie_result['webpage_url']),
814 'extractor_key': ie_result['extractor_key'],
817 reason = self._match_entry(entry, incomplete=True)
818 if reason is not None:
819 self.to_screen('[download] ' + reason)
822 entry_result = self.process_ie_result(entry,
825 playlist_results.append(entry_result)
826 ie_result['entries'] = playlist_results
828 elif result_type == 'compat_list':
830 'Extractor %s returned a compat_list result. '
831 'It needs to be updated.' % ie_result.get('extractor'))
837 'extractor': ie_result['extractor'],
838 'webpage_url': ie_result['webpage_url'],
839 'webpage_url_basename': url_basename(ie_result['webpage_url']),
840 'extractor_key': ie_result['extractor_key'],
844 ie_result['entries'] = [
845 self.process_ie_result(_fixup(r), download, extra_info)
846 for r in ie_result['entries']
850 raise Exception('Invalid result type: %s' % result_type)
852 def _apply_format_filter(self, format_spec, available_formats):
853 " Returns a tuple of the remaining format_spec and filtered formats "
863 operator_rex = re.compile(r'''(?x)\s*\[
864 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
865 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
866 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
868 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
869 m = operator_rex.search(format_spec)
872 comparison_value = int(m.group('value'))
874 comparison_value = parse_filesize(m.group('value'))
875 if comparison_value is None:
876 comparison_value = parse_filesize(m.group('value') + 'B')
877 if comparison_value is None:
879 'Invalid value %r in format specification %r' % (
880 m.group('value'), format_spec))
881 op = OPERATORS[m.group('op')]
888 str_operator_rex = re.compile(r'''(?x)\s*\[
889 \s*(?P<key>ext|acodec|vcodec|container|protocol)
890 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
891 \s*(?P<value>[a-zA-Z0-9_-]+)
893 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
894 m = str_operator_rex.search(format_spec)
896 comparison_value = m.group('value')
897 op = STR_OPERATORS[m.group('op')]
900 raise ValueError('Invalid format specification %r' % format_spec)
903 actual_value = f.get(m.group('key'))
904 if actual_value is None:
905 return m.group('none_inclusive')
906 return op(actual_value, comparison_value)
907 new_formats = [f for f in available_formats if _filter(f)]
909 new_format_spec = format_spec[:-len(m.group(0))]
910 if not new_format_spec:
911 new_format_spec = 'best'
913 return (new_format_spec, new_formats)
915 def select_format(self, format_spec, available_formats):
916 while format_spec.endswith(']'):
917 format_spec, available_formats = self._apply_format_filter(
918 format_spec, available_formats)
919 if not available_formats:
922 if format_spec == 'best' or format_spec is None:
923 return available_formats[-1]
924 elif format_spec == 'worst':
925 return available_formats[0]
926 elif format_spec == 'bestaudio':
928 f for f in available_formats
929 if f.get('vcodec') == 'none']
931 return audio_formats[-1]
932 elif format_spec == 'worstaudio':
934 f for f in available_formats
935 if f.get('vcodec') == 'none']
937 return audio_formats[0]
938 elif format_spec == 'bestvideo':
940 f for f in available_formats
941 if f.get('acodec') == 'none']
943 return video_formats[-1]
944 elif format_spec == 'worstvideo':
946 f for f in available_formats
947 if f.get('acodec') == 'none']
949 return video_formats[0]
951 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
952 if format_spec in extensions:
953 filter_f = lambda f: f['ext'] == format_spec
955 filter_f = lambda f: f['format_id'] == format_spec
956 matches = list(filter(filter_f, available_formats))
961 def _calc_headers(self, info_dict):
962 res = std_headers.copy()
964 add_headers = info_dict.get('http_headers')
966 res.update(add_headers)
968 cookies = self._calc_cookies(info_dict)
970 res['Cookie'] = cookies
974 def _calc_cookies(self, info_dict):
975 pr = compat_urllib_request.Request(info_dict['url'])
976 self.cookiejar.add_cookie_header(pr)
977 return pr.get_header('Cookie')
979 def process_video_result(self, info_dict, download=True):
980 assert info_dict.get('_type', 'video') == 'video'
982 if 'id' not in info_dict:
983 raise ExtractorError('Missing "id" field in extractor result')
984 if 'title' not in info_dict:
985 raise ExtractorError('Missing "title" field in extractor result')
987 if 'playlist' not in info_dict:
988 # It isn't part of a playlist
989 info_dict['playlist'] = None
990 info_dict['playlist_index'] = None
992 thumbnails = info_dict.get('thumbnails')
993 if thumbnails is None:
994 thumbnail = info_dict.get('thumbnail')
996 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
998 thumbnails.sort(key=lambda t: (
999 t.get('preference'), t.get('width'), t.get('height'),
1000 t.get('id'), t.get('url')))
1001 for i, t in enumerate(thumbnails):
1002 if 'width' in t and 'height' in t:
1003 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1004 if t.get('id') is None:
1007 if thumbnails and 'thumbnail' not in info_dict:
1008 info_dict['thumbnail'] = thumbnails[-1]['url']
1010 if 'display_id' not in info_dict and 'id' in info_dict:
1011 info_dict['display_id'] = info_dict['id']
1013 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1014 # Working around negative timestamps in Windows
1015 # (see http://bugs.python.org/issue1646728)
1016 if info_dict['timestamp'] < 0 and os.name == 'nt':
1017 info_dict['timestamp'] = 0
1018 upload_date = datetime.datetime.utcfromtimestamp(
1019 info_dict['timestamp'])
1020 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1022 if self.params.get('listsubtitles', False):
1023 if 'automatic_captions' in info_dict:
1024 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1025 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1027 info_dict['requested_subtitles'] = self.process_subtitles(
1028 info_dict['id'], info_dict.get('subtitles'),
1029 info_dict.get('automatic_captions'))
1031 # This extractors handle format selection themselves
1032 if info_dict['extractor'] in ['Youku']:
1034 self.process_info(info_dict)
1037 # We now pick which formats have to be downloaded
1038 if info_dict.get('formats') is None:
1039 # There's only one format available
1040 formats = [info_dict]
1042 formats = info_dict['formats']
1045 raise ExtractorError('No video formats found!')
1047 # We check that all the formats have the format and format_id fields
1048 for i, format in enumerate(formats):
1049 if 'url' not in format:
1050 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1052 if format.get('format_id') is None:
1053 format['format_id'] = compat_str(i)
1054 if format.get('format') is None:
1055 format['format'] = '{id} - {res}{note}'.format(
1056 id=format['format_id'],
1057 res=self.format_resolution(format),
1058 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1060 # Automatically determine file extension if missing
1061 if 'ext' not in format:
1062 format['ext'] = determine_ext(format['url']).lower()
1063 # Add HTTP headers, so that external programs can use them from the
1065 full_format_info = info_dict.copy()
1066 full_format_info.update(format)
1067 format['http_headers'] = self._calc_headers(full_format_info)
1069 format_limit = self.params.get('format_limit', None)
1071 formats = list(takewhile_inclusive(
1072 lambda f: f['format_id'] != format_limit, formats
1075 # TODO Central sorting goes here
1077 if formats[0] is not info_dict:
1078 # only set the 'formats' fields if the original info_dict list them
1079 # otherwise we end up with a circular reference, the first (and unique)
1080 # element in the 'formats' field in info_dict is info_dict itself,
1081 # wich can't be exported to json
1082 info_dict['formats'] = formats
1083 if self.params.get('listformats'):
1084 self.list_formats(info_dict)
1086 if self.params.get('list_thumbnails'):
1087 self.list_thumbnails(info_dict)
1090 req_format = self.params.get('format')
1091 if req_format is None:
1093 formats_to_download = []
1094 if req_format == 'all':
1095 formats_to_download = formats
1097 for rfstr in req_format.split(','):
1098 # We can accept formats requested in the format: 34/5/best, we pick
1099 # the first that is available, starting from left
1100 req_formats = rfstr.split('/')
1101 for rf in req_formats:
1102 if re.match(r'.+?\+.+?', rf) is not None:
1103 # Two formats have been requested like '137+139'
1104 format_1, format_2 = rf.split('+')
1105 formats_info = (self.select_format(format_1, formats),
1106 self.select_format(format_2, formats))
1107 if all(formats_info):
1108 # The first format must contain the video and the
1110 if formats_info[0].get('vcodec') == 'none':
1111 self.report_error('The first format must '
1112 'contain the video, try using '
1113 '"-f %s+%s"' % (format_2, format_1))
1116 formats_info[0]['ext']
1117 if self.params.get('merge_output_format') is None
1118 else self.params['merge_output_format'])
1120 'requested_formats': formats_info,
1121 'format': '%s+%s' % (formats_info[0].get('format'),
1122 formats_info[1].get('format')),
1123 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1124 formats_info[1].get('format_id')),
1125 'width': formats_info[0].get('width'),
1126 'height': formats_info[0].get('height'),
1127 'resolution': formats_info[0].get('resolution'),
1128 'fps': formats_info[0].get('fps'),
1129 'vcodec': formats_info[0].get('vcodec'),
1130 'vbr': formats_info[0].get('vbr'),
1131 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1132 'acodec': formats_info[1].get('acodec'),
1133 'abr': formats_info[1].get('abr'),
1137 selected_format = None
1139 selected_format = self.select_format(rf, formats)
1140 if selected_format is not None:
1141 formats_to_download.append(selected_format)
1143 if not formats_to_download:
1144 raise ExtractorError('requested format not available',
1148 if len(formats_to_download) > 1:
1149 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1150 for format in formats_to_download:
1151 new_info = dict(info_dict)
1152 new_info.update(format)
1153 self.process_info(new_info)
1154 # We update the info dict with the best quality format (backwards compatibility)
1155 info_dict.update(formats_to_download[-1])
1158 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1159 """Select the requested subtitles and their format"""
1161 if normal_subtitles and self.params.get('writesubtitles'):
1162 available_subs.update(normal_subtitles)
1163 if automatic_captions and self.params.get('writeautomaticsub'):
1164 for lang, cap_info in automatic_captions.items():
1165 if lang not in available_subs:
1166 available_subs[lang] = cap_info
1168 if (not self.params.get('writesubtitles') and not
1169 self.params.get('writeautomaticsub') or not
1173 if self.params.get('allsubtitles', False):
1174 requested_langs = available_subs.keys()
1176 if self.params.get('subtitleslangs', False):
1177 requested_langs = self.params.get('subtitleslangs')
1178 elif 'en' in available_subs:
1179 requested_langs = ['en']
1181 requested_langs = [list(available_subs.keys())[0]]
1183 formats_query = self.params.get('subtitlesformat', 'best')
1184 formats_preference = formats_query.split('/') if formats_query else []
1186 for lang in requested_langs:
1187 formats = available_subs.get(lang)
1189 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1191 for ext in formats_preference:
1195 matches = list(filter(lambda f: f['ext'] == ext, formats))
1201 self.report_warning(
1202 'No subtitle format found matching "%s" for language %s, '
1203 'using %s' % (formats_query, lang, f['ext']))
1207 def process_info(self, info_dict):
1208 """Process a single resolved IE result."""
1210 assert info_dict.get('_type', 'video') == 'video'
1212 max_downloads = self.params.get('max_downloads')
1213 if max_downloads is not None:
1214 if self._num_downloads >= int(max_downloads):
1215 raise MaxDownloadsReached()
1217 info_dict['fulltitle'] = info_dict['title']
1218 if len(info_dict['title']) > 200:
1219 info_dict['title'] = info_dict['title'][:197] + '...'
1221 # Keep for backwards compatibility
1222 info_dict['stitle'] = info_dict['title']
1224 if 'format' not in info_dict:
1225 info_dict['format'] = info_dict['ext']
1227 reason = self._match_entry(info_dict, incomplete=False)
1228 if reason is not None:
1229 self.to_screen('[download] ' + reason)
1232 self._num_downloads += 1
1234 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1237 if self.params.get('forcetitle', False):
1238 self.to_stdout(info_dict['fulltitle'])
1239 if self.params.get('forceid', False):
1240 self.to_stdout(info_dict['id'])
1241 if self.params.get('forceurl', False):
1242 if info_dict.get('requested_formats') is not None:
1243 for f in info_dict['requested_formats']:
1244 self.to_stdout(f['url'] + f.get('play_path', ''))
1246 # For RTMP URLs, also include the playpath
1247 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1248 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1249 self.to_stdout(info_dict['thumbnail'])
1250 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1251 self.to_stdout(info_dict['description'])
1252 if self.params.get('forcefilename', False) and filename is not None:
1253 self.to_stdout(filename)
1254 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1255 self.to_stdout(formatSeconds(info_dict['duration']))
1256 if self.params.get('forceformat', False):
1257 self.to_stdout(info_dict['format'])
1258 if self.params.get('forcejson', False):
1259 self.to_stdout(json.dumps(info_dict))
1261 # Do nothing else if in simulate mode
1262 if self.params.get('simulate', False):
1265 if filename is None:
1269 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1270 if dn and not os.path.exists(dn):
1272 except (OSError, IOError) as err:
1273 self.report_error('unable to create directory ' + compat_str(err))
1276 if self.params.get('writedescription', False):
1277 descfn = filename + '.description'
1278 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1279 self.to_screen('[info] Video description is already present')
1280 elif info_dict.get('description') is None:
1281 self.report_warning('There\'s no description to write.')
1284 self.to_screen('[info] Writing video description to: ' + descfn)
1285 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1286 descfile.write(info_dict['description'])
1287 except (OSError, IOError):
1288 self.report_error('Cannot write description file ' + descfn)
1291 if self.params.get('writeannotations', False):
1292 annofn = filename + '.annotations.xml'
1293 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1294 self.to_screen('[info] Video annotations are already present')
1297 self.to_screen('[info] Writing video annotations to: ' + annofn)
1298 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1299 annofile.write(info_dict['annotations'])
1300 except (KeyError, TypeError):
1301 self.report_warning('There are no annotations to write.')
1302 except (OSError, IOError):
1303 self.report_error('Cannot write annotations file: ' + annofn)
1306 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1307 self.params.get('writeautomaticsub')])
1309 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1310 # subtitles download errors are already managed as troubles in relevant IE
1311 # that way it will silently go on when used with unsupporting IE
1312 subtitles = info_dict['requested_subtitles']
1313 ie = self.get_info_extractor(info_dict['extractor_key'])
1314 for sub_lang, sub_info in subtitles.items():
1315 sub_format = sub_info['ext']
1316 if sub_info.get('data') is not None:
1317 sub_data = sub_info['data']
1320 sub_data = ie._download_webpage(
1321 sub_info['url'], info_dict['id'], note=False)
1322 except ExtractorError as err:
1323 self.report_warning('Unable to download subtitle for "%s": %s' %
1324 (sub_lang, compat_str(err.cause)))
1327 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1328 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1329 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1331 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1332 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1333 subfile.write(sub_data)
1334 except (OSError, IOError):
1335 self.report_error('Cannot write subtitles file ' + sub_filename)
1338 if self.params.get('writeinfojson', False):
1339 infofn = os.path.splitext(filename)[0] + '.info.json'
1340 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1341 self.to_screen('[info] Video description metadata is already present')
1343 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1345 write_json_file(info_dict, infofn)
1346 except (OSError, IOError):
1347 self.report_error('Cannot write metadata to JSON file ' + infofn)
1350 self._write_thumbnails(info_dict, filename)
1352 if not self.params.get('skip_download', False):
1355 fd = get_suitable_downloader(info, self.params)(self, self.params)
1356 for ph in self._progress_hooks:
1357 fd.add_progress_hook(ph)
1358 if self.params.get('verbose'):
1359 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1360 return fd.download(name, info)
1362 if info_dict.get('requested_formats') is not None:
1365 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1366 if not merger.available:
1368 self.report_warning('You have requested multiple '
1369 'formats but ffmpeg or avconv are not installed.'
1370 ' The formats won\'t be merged')
1372 postprocessors = [merger]
1373 for f in info_dict['requested_formats']:
1374 new_info = dict(info_dict)
1376 fname = self.prepare_filename(new_info)
1377 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1378 downloaded.append(fname)
1379 partial_success = dl(fname, new_info)
1380 success = success and partial_success
1381 info_dict['__postprocessors'] = postprocessors
1382 info_dict['__files_to_merge'] = downloaded
1384 # Just a single file
1385 success = dl(filename, info_dict)
1386 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1387 self.report_error('unable to download video data: %s' % str(err))
1389 except (OSError, IOError) as err:
1390 raise UnavailableVideoError(err)
1391 except (ContentTooShortError, ) as err:
1392 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1397 fixup_policy = self.params.get('fixup')
1398 if fixup_policy is None:
1399 fixup_policy = 'detect_or_warn'
1401 stretched_ratio = info_dict.get('stretched_ratio')
1402 if stretched_ratio is not None and stretched_ratio != 1:
1403 if fixup_policy == 'warn':
1404 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1405 info_dict['id'], stretched_ratio))
1406 elif fixup_policy == 'detect_or_warn':
1407 stretched_pp = FFmpegFixupStretchedPP(self)
1408 if stretched_pp.available:
1409 info_dict.setdefault('__postprocessors', [])
1410 info_dict['__postprocessors'].append(stretched_pp)
1412 self.report_warning(
1413 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1414 info_dict['id'], stretched_ratio))
1416 assert fixup_policy in ('ignore', 'never')
1418 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1419 if fixup_policy == 'warn':
1420 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1422 elif fixup_policy == 'detect_or_warn':
1423 fixup_pp = FFmpegFixupM4aPP(self)
1424 if fixup_pp.available:
1425 info_dict.setdefault('__postprocessors', [])
1426 info_dict['__postprocessors'].append(fixup_pp)
1428 self.report_warning(
1429 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1432 assert fixup_policy in ('ignore', 'never')
1435 self.post_process(filename, info_dict)
1436 except (PostProcessingError) as err:
1437 self.report_error('postprocessing: %s' % str(err))
1439 self.record_download_archive(info_dict)
1441 def download(self, url_list):
1442 """Download a given list of URLs."""
1443 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1444 if (len(url_list) > 1 and
1445 '%' not in outtmpl and
1446 self.params.get('max_downloads') != 1):
1447 raise SameFileError(outtmpl)
1449 for url in url_list:
1451 # It also downloads the videos
1452 res = self.extract_info(url)
1453 except UnavailableVideoError:
1454 self.report_error('unable to download video')
1455 except MaxDownloadsReached:
1456 self.to_screen('[info] Maximum number of downloaded files reached.')
1459 if self.params.get('dump_single_json', False):
1460 self.to_stdout(json.dumps(res))
1462 return self._download_retcode
1464 def download_with_info_file(self, info_filename):
1465 with contextlib.closing(fileinput.FileInput(
1466 [info_filename], mode='r',
1467 openhook=fileinput.hook_encoded('utf-8'))) as f:
1468 # FileInput doesn't have a read method, we can't call json.load
1469 info = json.loads('\n'.join(f))
1471 self.process_ie_result(info, download=True)
1472 except DownloadError:
1473 webpage_url = info.get('webpage_url')
1474 if webpage_url is not None:
1475 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1476 return self.download([webpage_url])
1479 return self._download_retcode
1481 def post_process(self, filename, ie_info):
1482 """Run all the postprocessors on the given file."""
1483 info = dict(ie_info)
1484 info['filepath'] = filename
1486 if ie_info.get('__postprocessors') is not None:
1487 pps_chain.extend(ie_info['__postprocessors'])
1488 pps_chain.extend(self._pps)
1489 for pp in pps_chain:
1491 old_filename = info['filepath']
1493 keep_video_wish, info = pp.run(info)
1494 if keep_video_wish is not None:
1496 keep_video = keep_video_wish
1497 elif keep_video is None:
1498 # No clear decision yet, let IE decide
1499 keep_video = keep_video_wish
1500 except PostProcessingError as e:
1501 self.report_error(e.msg)
1502 if keep_video is False and not self.params.get('keepvideo', False):
1504 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1505 os.remove(encodeFilename(old_filename))
1506 except (IOError, OSError):
1507 self.report_warning('Unable to remove downloaded video file')
1509 def _make_archive_id(self, info_dict):
1510 # Future-proof against any change in case
1511 # and backwards compatibility with prior versions
1512 extractor = info_dict.get('extractor_key')
1513 if extractor is None:
1514 if 'id' in info_dict:
1515 extractor = info_dict.get('ie_key') # key in a playlist
1516 if extractor is None:
1517 return None # Incomplete video information
1518 return extractor.lower() + ' ' + info_dict['id']
1520 def in_download_archive(self, info_dict):
1521 fn = self.params.get('download_archive')
1525 vid_id = self._make_archive_id(info_dict)
1527 return False # Incomplete video information
1530 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1531 for line in archive_file:
1532 if line.strip() == vid_id:
1534 except IOError as ioe:
1535 if ioe.errno != errno.ENOENT:
1539 def record_download_archive(self, info_dict):
1540 fn = self.params.get('download_archive')
1543 vid_id = self._make_archive_id(info_dict)
1545 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1546 archive_file.write(vid_id + '\n')
1549 def format_resolution(format, default='unknown'):
1550 if format.get('vcodec') == 'none':
1552 if format.get('resolution') is not None:
1553 return format['resolution']
1554 if format.get('height') is not None:
1555 if format.get('width') is not None:
1556 res = '%sx%s' % (format['width'], format['height'])
1558 res = '%sp' % format['height']
1559 elif format.get('width') is not None:
1560 res = '?x%d' % format['width']
1565 def _format_note(self, fdict):
1567 if fdict.get('ext') in ['f4f', 'f4m']:
1568 res += '(unsupported) '
1569 if fdict.get('format_note') is not None:
1570 res += fdict['format_note'] + ' '
1571 if fdict.get('tbr') is not None:
1572 res += '%4dk ' % fdict['tbr']
1573 if fdict.get('container') is not None:
1576 res += '%s container' % fdict['container']
1577 if (fdict.get('vcodec') is not None and
1578 fdict.get('vcodec') != 'none'):
1581 res += fdict['vcodec']
1582 if fdict.get('vbr') is not None:
1584 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1586 if fdict.get('vbr') is not None:
1587 res += '%4dk' % fdict['vbr']
1588 if fdict.get('fps') is not None:
1589 res += ', %sfps' % fdict['fps']
1590 if fdict.get('acodec') is not None:
1593 if fdict['acodec'] == 'none':
1596 res += '%-5s' % fdict['acodec']
1597 elif fdict.get('abr') is not None:
1601 if fdict.get('abr') is not None:
1602 res += '@%3dk' % fdict['abr']
1603 if fdict.get('asr') is not None:
1604 res += ' (%5dHz)' % fdict['asr']
1605 if fdict.get('filesize') is not None:
1608 res += format_bytes(fdict['filesize'])
1609 elif fdict.get('filesize_approx') is not None:
1612 res += '~' + format_bytes(fdict['filesize_approx'])
1615 def list_formats(self, info_dict):
1616 formats = info_dict.get('formats', [info_dict])
1618 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1620 if f.get('preference') is None or f['preference'] >= -1000]
1621 if len(formats) > 1:
1622 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1624 header_line = ['format code', 'extension', 'resolution', 'note']
1626 '[info] Available formats for %s:\n%s' %
1627 (info_dict['id'], render_table(header_line, table)))
1629 def list_thumbnails(self, info_dict):
1630 thumbnails = info_dict.get('thumbnails')
1632 tn_url = info_dict.get('thumbnail')
1634 thumbnails = [{'id': '0', 'url': tn_url}]
1637 '[info] No thumbnails present for %s' % info_dict['id'])
1641 '[info] Thumbnails for %s:' % info_dict['id'])
1642 self.to_screen(render_table(
1643 ['ID', 'width', 'height', 'URL'],
1644 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1646 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1648 self.to_screen('%s has no %s' % (video_id, name))
1651 'Available %s for %s:' % (name, video_id))
1652 self.to_screen(render_table(
1653 ['Language', 'formats'],
1654 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1655 for lang, formats in subtitles.items()]))
1657 def urlopen(self, req):
1658 """ Start an HTTP download """
1660 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1661 # always respected by websites, some tend to give out URLs with non percent-encoded
1662 # non-ASCII characters (see telemb.py, ard.py [#3412])
1663 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1664 # To work around aforementioned issue we will replace request's original URL with
1665 # percent-encoded one
1666 req_is_string = isinstance(req, compat_basestring)
1667 url = req if req_is_string else req.get_full_url()
1668 url_escaped = escape_url(url)
1670 # Substitute URL if any change after escaping
1671 if url != url_escaped:
1675 req = compat_urllib_request.Request(
1676 url_escaped, data=req.data, headers=req.headers,
1677 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1679 return self._opener.open(req, timeout=self._socket_timeout)
1681 def print_debug_header(self):
1682 if not self.params.get('verbose'):
1685 if type('') is not compat_str:
1686 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1687 self.report_warning(
1688 'Your Python is broken! Update to a newer and supported version')
1690 stdout_encoding = getattr(
1691 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1693 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1694 locale.getpreferredencoding(),
1695 sys.getfilesystemencoding(),
1697 self.get_encoding()))
1698 write_string(encoding_str, encoding=None)
1700 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1702 sp = subprocess.Popen(
1703 ['git', 'rev-parse', '--short', 'HEAD'],
1704 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1705 cwd=os.path.dirname(os.path.abspath(__file__)))
1706 out, err = sp.communicate()
1707 out = out.decode().strip()
1708 if re.match('[0-9a-f]+', out):
1709 self._write_string('[debug] Git HEAD: ' + out + '\n')
1715 self._write_string('[debug] Python version %s - %s\n' % (
1716 platform.python_version(), platform_name()))
1718 exe_versions = FFmpegPostProcessor.get_versions(self)
1719 exe_versions['rtmpdump'] = rtmpdump_version()
1720 exe_str = ', '.join(
1722 for exe, v in sorted(exe_versions.items())
1727 self._write_string('[debug] exe versions: %s\n' % exe_str)
1730 for handler in self._opener.handlers:
1731 if hasattr(handler, 'proxies'):
1732 proxy_map.update(handler.proxies)
1733 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1735 if self.params.get('call_home', False):
1736 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1737 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1738 latest_version = self.urlopen(
1739 'https://yt-dl.org/latest/version').read().decode('utf-8')
1740 if version_tuple(latest_version) > version_tuple(__version__):
1741 self.report_warning(
1742 'You are using an outdated version (newest version: %s)! '
1743 'See https://yt-dl.org/update if you need help updating.' %
1746 def _setup_opener(self):
1747 timeout_val = self.params.get('socket_timeout')
1748 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1750 opts_cookiefile = self.params.get('cookiefile')
1751 opts_proxy = self.params.get('proxy')
1753 if opts_cookiefile is None:
1754 self.cookiejar = compat_cookiejar.CookieJar()
1756 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1758 if os.access(opts_cookiefile, os.R_OK):
1759 self.cookiejar.load()
1761 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1763 if opts_proxy is not None:
1764 if opts_proxy == '':
1767 proxies = {'http': opts_proxy, 'https': opts_proxy}
1769 proxies = compat_urllib_request.getproxies()
1770 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1771 if 'http' in proxies and 'https' not in proxies:
1772 proxies['https'] = proxies['http']
1773 proxy_handler = PerRequestProxyHandler(proxies)
1775 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1776 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1777 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1778 opener = compat_urllib_request.build_opener(
1779 proxy_handler, https_handler, cookie_processor, ydlh)
1781 # Delete the default user-agent header, which would otherwise apply in
1782 # cases where our custom HTTP handler doesn't come into play
1783 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1784 opener.addheaders = []
1785 self._opener = opener
1787 def encode(self, s):
1788 if isinstance(s, bytes):
1789 return s # Already encoded
1792 return s.encode(self.get_encoding())
1793 except UnicodeEncodeError as err:
1794 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1797 def get_encoding(self):
1798 encoding = self.params.get('encoding')
1799 if encoding is None:
1800 encoding = preferredencoding()
1803 def _write_thumbnails(self, info_dict, filename):
1804 if self.params.get('writethumbnail', False):
1805 thumbnails = info_dict.get('thumbnails')
1807 thumbnails = [thumbnails[-1]]
1808 elif self.params.get('write_all_thumbnails', False):
1809 thumbnails = info_dict.get('thumbnails')
1814 # No thumbnails present, so return immediately
1817 for t in thumbnails:
1818 thumb_ext = determine_ext(t['url'], 'jpg')
1819 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1820 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1821 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1823 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1824 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1825 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1827 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1828 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1830 uf = self.urlopen(t['url'])
1831 with open(thumb_filename, 'wb') as thumbf:
1832 shutil.copyfileobj(uf, thumbf)
1833 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1834 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1835 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1836 self.report_warning('Unable to download thumbnail "%s": %s' %
1837 (t['url'], compat_str(err)))