2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_get_terminal_size,
38 compat_tokenize_tokenize,
40 compat_urllib_request,
41 compat_urllib_request_DataHandler,
60 PerRequestProxyHandler,
71 UnavailableVideoError,
76 YoutubeDLCookieProcessor,
83 from .cache import Cache
84 from .extractor import get_info_extractor, gen_extractors
85 from .downloader import get_suitable_downloader
86 from .downloader.rtmp import rtmpdump_version
87 from .postprocessor import (
89 FFmpegFixupStretchedPP,
94 from .version import __version__
97 class YoutubeDL(object):
100 YoutubeDL objects are the ones responsible of downloading the
101 actual video file and writing it to disk if the user has requested
102 it, among some other tasks. In most cases there should be one per
103 program. As, given a video URL, the downloader doesn't know how to
104 extract all the needed information, task that InfoExtractors do, it
105 has to pass the URL to one of them.
107 For this, YoutubeDL objects have a method that allows
108 InfoExtractors to be registered in a given order. When it is passed
109 a URL, the YoutubeDL object handles it to the first InfoExtractor it
110 finds that reports being able to handle it. The InfoExtractor extracts
111 all the information about the video or videos the URL refers to, and
112 YoutubeDL process the extracted information, possibly using a File
113 Downloader to download the video.
115 YoutubeDL objects accept a lot of parameters. In order not to saturate
116 the object constructor with arguments, it receives a dictionary of
117 options instead. These options are available through the params
118 attribute for the InfoExtractors to use. The YoutubeDL also
119 registers itself as the downloader in charge for the InfoExtractors
120 that are added to it, so this is a "mutual registration".
124 username: Username for authentication purposes.
125 password: Password for authentication purposes.
126 videopassword: Password for accessing a video.
127 usenetrc: Use netrc for authentication instead.
128 verbose: Print additional info to stdout.
129 quiet: Do not print messages to stdout.
130 no_warnings: Do not print out anything for warnings.
131 forceurl: Force printing final URL.
132 forcetitle: Force printing title.
133 forceid: Force printing ID.
134 forcethumbnail: Force printing thumbnail URL.
135 forcedescription: Force printing description.
136 forcefilename: Force printing final filename.
137 forceduration: Force printing duration.
138 forcejson: Force printing info_dict as JSON.
139 dump_single_json: Force printing the info_dict of the whole playlist
140 (or video) as a single JSON line.
141 simulate: Do not download the video files.
142 format: Video format code. See options.py for more information.
143 outtmpl: Template for output names.
144 restrictfilenames: Do not allow "&" and spaces in file names
145 ignoreerrors: Do not stop on download errors.
146 force_generic_extractor: Force downloader to use the generic extractor
147 nooverwrites: Prevent overwriting files.
148 playliststart: Playlist item to start at.
149 playlistend: Playlist item to end at.
150 playlist_items: Specific indices of playlist to download.
151 playlistreverse: Download playlist items in reverse order.
152 matchtitle: Download only matching titles.
153 rejecttitle: Reject downloads for matching titles.
154 logger: Log messages to a logging.Logger instance.
155 logtostderr: Log messages to stderr instead of stdout.
156 writedescription: Write the video description to a .description file
157 writeinfojson: Write the video description to a .info.json file
158 writeannotations: Write the video annotations to a .annotations.xml file
159 writethumbnail: Write the thumbnail image to a file
160 write_all_thumbnails: Write all thumbnail formats to files
161 writesubtitles: Write the video subtitles to a file
162 writeautomaticsub: Write the automatically generated subtitles to a file
163 allsubtitles: Downloads all the subtitles of the video
164 (requires writesubtitles or writeautomaticsub)
165 listsubtitles: Lists all available subtitles for the video
166 subtitlesformat: The format code for subtitles
167 subtitleslangs: List of languages of the subtitles to download
168 keepvideo: Keep the video file after post-processing
169 daterange: A DateRange object, download only if the upload_date is in the range.
170 skip_download: Skip the actual download of the video file
171 cachedir: Location of the cache files in the filesystem.
172 False to disable filesystem cache.
173 noplaylist: Download single video instead of a playlist if in doubt.
174 age_limit: An integer representing the user's age in years.
175 Unsuitable videos for the given age are skipped.
176 min_views: An integer representing the minimum view count the video
177 must have in order to not be skipped.
178 Videos without view count information are always
179 downloaded. None for no limit.
180 max_views: An integer representing the maximum view count.
181 Videos that are more popular than that are not
183 Videos without view count information are always
184 downloaded. None for no limit.
185 download_archive: File name of a file where all downloads are recorded.
186 Videos already present in the file are not downloaded
188 cookiefile: File name where cookies should be read from and dumped to.
189 nocheckcertificate:Do not verify SSL certificates
190 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
191 At the moment, this is only supported by YouTube.
192 proxy: URL of the proxy server to use
193 cn_verification_proxy: URL of the proxy to use for IP address verification
194 on Chinese sites. (Experimental)
195 socket_timeout: Time to wait for unresponsive hosts, in seconds
196 bidi_workaround: Work around buggy terminals without bidirectional text
197 support, using fridibi
198 debug_printtraffic:Print out sent and received HTTP traffic
199 include_ads: Download ads as well
200 default_search: Prepend this string if an input url is not valid.
201 'auto' for elaborate guessing
202 encoding: Use this encoding instead of the system-specified.
203 extract_flat: Do not resolve URLs, return the immediate result.
204 Pass in 'in_playlist' to only show this behavior for
206 postprocessors: A list of dictionaries, each with an entry
207 * key: The name of the postprocessor. See
208 youtube_dl/postprocessor/__init__.py for a list.
209 as well as any further keyword arguments for the
211 progress_hooks: A list of functions that get called on download
212 progress, with a dictionary with the entries
213 * status: One of "downloading", "error", or "finished".
214 Check this first and ignore unknown values.
216 If status is one of "downloading", or "finished", the
217 following properties may also be present:
218 * filename: The final filename (always present)
219 * tmpfilename: The filename we're currently writing to
220 * downloaded_bytes: Bytes on disk
221 * total_bytes: Size of the whole file, None if unknown
222 * total_bytes_estimate: Guess of the eventual file size,
224 * elapsed: The number of seconds since download started.
225 * eta: The estimated time in seconds, None if unknown
226 * speed: The download speed in bytes/second, None if
228 * fragment_index: The counter of the currently
229 downloaded video fragment.
230 * fragment_count: The number of fragments (= individual
231 files that will be merged)
233 Progress hooks are guaranteed to be called at least once
234 (with status "finished") if the download is successful.
235 merge_output_format: Extension to use when merging formats.
236 fixup: Automatically correct known faults of the file.
238 - "never": do nothing
239 - "warn": only emit a warning
240 - "detect_or_warn": check whether we can do anything
241 about it, warn otherwise (default)
242 source_address: (Experimental) Client-side IP address to bind to.
243 call_home: Boolean, true iff we are allowed to contact the
244 youtube-dl servers for debugging.
245 sleep_interval: Number of seconds to sleep before each download.
246 listformats: Print an overview of available video formats and exit.
247 list_thumbnails: Print a table of all thumbnails and exit.
248 match_filter: A function that gets called with the info_dict of
250 If it returns a message, the video is ignored.
251 If it returns None, the video is downloaded.
252 match_filter_func in utils.py is one example for this.
253 no_color: Do not emit color codes in output.
255 The following options determine which downloader is picked:
256 external_downloader: Executable of the external downloader to call.
257 None or unset for standard (built-in) downloader.
258 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
260 The following parameters are not used by YoutubeDL itself, they are used by
261 the downloader (see youtube_dl/downloader/common.py):
262 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
263 noresizebuffer, retries, continuedl, noprogress, consoletitle,
264 xattr_set_filesize, external_downloader_args.
266 The following options are used by the post processors:
267 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
268 otherwise prefer avconv.
269 postprocessor_args: A list of additional command-line arguments for the
276 _download_retcode = None
277 _num_downloads = None
280 def __init__(self, params=None, auto_init=True):
281 """Create a FileDownloader object with the given options."""
285 self._ies_instances = {}
287 self._progress_hooks = []
288 self._download_retcode = 0
289 self._num_downloads = 0
290 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
291 self._err_file = sys.stderr
294 'nocheckcertificate': False,
296 self.params.update(params)
297 self.cache = Cache(self)
299 if params.get('bidi_workaround', False):
302 master, slave = pty.openpty()
303 width = compat_get_terminal_size().columns
307 width_args = ['-w', str(width)]
309 stdin=subprocess.PIPE,
311 stderr=self._err_file)
313 self._output_process = subprocess.Popen(
314 ['bidiv'] + width_args, **sp_kwargs
317 self._output_process = subprocess.Popen(
318 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
319 self._output_channel = os.fdopen(master, 'rb')
320 except OSError as ose:
322 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
326 if (sys.version_info >= (3,) and sys.platform != 'win32' and
327 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
328 not params.get('restrictfilenames', False)):
329 # On Python 3, the Unicode filesystem API will throw errors (#1474)
331 'Assuming --restrict-filenames since file system encoding '
332 'cannot encode all characters. '
333 'Set the LC_ALL environment variable to fix this.')
334 self.params['restrictfilenames'] = True
336 if isinstance(params.get('outtmpl'), bytes):
338 'Parameter outtmpl is bytes, but should be a unicode string. '
339 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
344 self.print_debug_header()
345 self.add_default_info_extractors()
347 for pp_def_raw in self.params.get('postprocessors', []):
348 pp_class = get_postprocessor(pp_def_raw['key'])
349 pp_def = dict(pp_def_raw)
351 pp = pp_class(self, **compat_kwargs(pp_def))
352 self.add_post_processor(pp)
354 for ph in self.params.get('progress_hooks', []):
355 self.add_progress_hook(ph)
357 def warn_if_short_id(self, argv):
358 # short YouTube ID starting with dash?
360 i for i, a in enumerate(argv)
361 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
365 [a for i, a in enumerate(argv) if i not in idxs] +
366 ['--'] + [argv[i] for i in idxs]
369 'Long argument string detected. '
370 'Use -- to separate parameters and URLs, like this:\n%s\n' %
371 args_to_str(correct_argv))
373 def add_info_extractor(self, ie):
374 """Add an InfoExtractor object to the end of the list."""
376 self._ies_instances[ie.ie_key()] = ie
377 ie.set_downloader(self)
379 def get_info_extractor(self, ie_key):
381 Get an instance of an IE with name ie_key, it will try to get one from
382 the _ies list, if there's no instance it will create a new one and add
383 it to the extractor list.
385 ie = self._ies_instances.get(ie_key)
387 ie = get_info_extractor(ie_key)()
388 self.add_info_extractor(ie)
391 def add_default_info_extractors(self):
393 Add the InfoExtractors returned by gen_extractors to the end of the list
395 for ie in gen_extractors():
396 self.add_info_extractor(ie)
398 def add_post_processor(self, pp):
399 """Add a PostProcessor object to the end of the chain."""
401 pp.set_downloader(self)
403 def add_progress_hook(self, ph):
404 """Add the progress hook (currently only for the file downloader)"""
405 self._progress_hooks.append(ph)
407 def _bidi_workaround(self, message):
408 if not hasattr(self, '_output_channel'):
411 assert hasattr(self, '_output_process')
412 assert isinstance(message, compat_str)
413 line_count = message.count('\n') + 1
414 self._output_process.stdin.write((message + '\n').encode('utf-8'))
415 self._output_process.stdin.flush()
416 res = ''.join(self._output_channel.readline().decode('utf-8')
417 for _ in range(line_count))
418 return res[:-len('\n')]
420 def to_screen(self, message, skip_eol=False):
421 """Print message to stdout if not in quiet mode."""
422 return self.to_stdout(message, skip_eol, check_quiet=True)
424 def _write_string(self, s, out=None):
425 write_string(s, out=out, encoding=self.params.get('encoding'))
427 def to_stdout(self, message, skip_eol=False, check_quiet=False):
428 """Print message to stdout if not in quiet mode."""
429 if self.params.get('logger'):
430 self.params['logger'].debug(message)
431 elif not check_quiet or not self.params.get('quiet', False):
432 message = self._bidi_workaround(message)
433 terminator = ['\n', ''][skip_eol]
434 output = message + terminator
436 self._write_string(output, self._screen_file)
438 def to_stderr(self, message):
439 """Print message to stderr."""
440 assert isinstance(message, compat_str)
441 if self.params.get('logger'):
442 self.params['logger'].error(message)
444 message = self._bidi_workaround(message)
445 output = message + '\n'
446 self._write_string(output, self._err_file)
448 def to_console_title(self, message):
449 if not self.params.get('consoletitle', False):
451 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
452 # c_wchar_p() might not be necessary if `message` is
453 # already of type unicode()
454 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
455 elif 'TERM' in os.environ:
456 self._write_string('\033]0;%s\007' % message, self._screen_file)
458 def save_console_title(self):
459 if not self.params.get('consoletitle', False):
461 if 'TERM' in os.environ:
462 # Save the title on stack
463 self._write_string('\033[22;0t', self._screen_file)
465 def restore_console_title(self):
466 if not self.params.get('consoletitle', False):
468 if 'TERM' in os.environ:
469 # Restore the title from stack
470 self._write_string('\033[23;0t', self._screen_file)
473 self.save_console_title()
476 def __exit__(self, *args):
477 self.restore_console_title()
479 if self.params.get('cookiefile') is not None:
480 self.cookiejar.save()
482 def trouble(self, message=None, tb=None):
483 """Determine action to take when a download problem appears.
485 Depending on if the downloader has been configured to ignore
486 download errors or not, this method may throw an exception or
487 not when errors are found, after printing the message.
489 tb, if given, is additional traceback information.
491 if message is not None:
492 self.to_stderr(message)
493 if self.params.get('verbose'):
495 if sys.exc_info()[0]: # if .trouble has been called from an except block
497 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
498 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
499 tb += compat_str(traceback.format_exc())
501 tb_data = traceback.format_list(traceback.extract_stack())
502 tb = ''.join(tb_data)
504 if not self.params.get('ignoreerrors', False):
505 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
506 exc_info = sys.exc_info()[1].exc_info
508 exc_info = sys.exc_info()
509 raise DownloadError(message, exc_info)
510 self._download_retcode = 1
512 def report_warning(self, message):
514 Print the message to stderr, it will be prefixed with 'WARNING:'
515 If stderr is a tty file the 'WARNING:' will be colored
517 if self.params.get('logger') is not None:
518 self.params['logger'].warning(message)
520 if self.params.get('no_warnings'):
522 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
523 _msg_header = '\033[0;33mWARNING:\033[0m'
525 _msg_header = 'WARNING:'
526 warning_message = '%s %s' % (_msg_header, message)
527 self.to_stderr(warning_message)
529 def report_error(self, message, tb=None):
531 Do the same as trouble, but prefixes the message with 'ERROR:', colored
532 in red if stderr is a tty file.
534 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
535 _msg_header = '\033[0;31mERROR:\033[0m'
537 _msg_header = 'ERROR:'
538 error_message = '%s %s' % (_msg_header, message)
539 self.trouble(error_message, tb)
541 def report_file_already_downloaded(self, file_name):
542 """Report file has already been fully downloaded."""
544 self.to_screen('[download] %s has already been downloaded' % file_name)
545 except UnicodeEncodeError:
546 self.to_screen('[download] The file has already been downloaded')
548 def prepare_filename(self, info_dict):
549 """Generate the output filename."""
551 template_dict = dict(info_dict)
553 template_dict['epoch'] = int(time.time())
554 autonumber_size = self.params.get('autonumber_size')
555 if autonumber_size is None:
557 autonumber_templ = '%0' + str(autonumber_size) + 'd'
558 template_dict['autonumber'] = autonumber_templ % self._num_downloads
559 if template_dict.get('playlist_index') is not None:
560 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
561 if template_dict.get('resolution') is None:
562 if template_dict.get('width') and template_dict.get('height'):
563 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
564 elif template_dict.get('height'):
565 template_dict['resolution'] = '%sp' % template_dict['height']
566 elif template_dict.get('width'):
567 template_dict['resolution'] = '?x%d' % template_dict['width']
569 sanitize = lambda k, v: sanitize_filename(
571 restricted=self.params.get('restrictfilenames'),
573 template_dict = dict((k, sanitize(k, v))
574 for k, v in template_dict.items()
576 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
578 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
579 tmpl = compat_expanduser(outtmpl)
580 filename = tmpl % template_dict
581 # Temporary fix for #4787
582 # 'Treat' all problem characters by passing filename through preferredencoding
583 # to workaround encoding issues with subprocess on python2 @ Windows
584 if sys.version_info < (3, 0) and sys.platform == 'win32':
585 filename = encodeFilename(filename, True).decode(preferredencoding())
586 return sanitize_path(filename)
587 except ValueError as err:
588 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
591 def _match_entry(self, info_dict, incomplete):
592 """ Returns None iff the file should be downloaded """
594 video_title = info_dict.get('title', info_dict.get('id', 'video'))
595 if 'title' in info_dict:
596 # This can happen when we're just evaluating the playlist
597 title = info_dict['title']
598 matchtitle = self.params.get('matchtitle', False)
600 if not re.search(matchtitle, title, re.IGNORECASE):
601 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
602 rejecttitle = self.params.get('rejecttitle', False)
604 if re.search(rejecttitle, title, re.IGNORECASE):
605 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
606 date = info_dict.get('upload_date', None)
608 dateRange = self.params.get('daterange', DateRange())
609 if date not in dateRange:
610 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
611 view_count = info_dict.get('view_count', None)
612 if view_count is not None:
613 min_views = self.params.get('min_views')
614 if min_views is not None and view_count < min_views:
615 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
616 max_views = self.params.get('max_views')
617 if max_views is not None and view_count > max_views:
618 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
619 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
620 return 'Skipping "%s" because it is age restricted' % video_title
621 if self.in_download_archive(info_dict):
622 return '%s has already been recorded in archive' % video_title
625 match_filter = self.params.get('match_filter')
626 if match_filter is not None:
627 ret = match_filter(info_dict)
634 def add_extra_info(info_dict, extra_info):
635 '''Set the keys from extra_info in info dict if they are missing'''
636 for key, value in extra_info.items():
637 info_dict.setdefault(key, value)
639 def extract_info(self, url, download=True, ie_key=None, extra_info={},
640 process=True, force_generic_extractor=False):
642 Returns a list with a dictionary for each video we find.
643 If 'download', also downloads the videos.
644 extra_info is a dict containing the extra values to add to each result
647 if not ie_key and force_generic_extractor:
651 ies = [self.get_info_extractor(ie_key)]
656 if not ie.suitable(url):
660 self.report_warning('The program functionality for this site has been marked as broken, '
661 'and will probably not work.')
664 ie_result = ie.extract(url)
665 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
667 if isinstance(ie_result, list):
668 # Backwards compatibility: old IE result format
670 '_type': 'compat_list',
671 'entries': ie_result,
673 self.add_default_extra_info(ie_result, ie, url)
675 return self.process_ie_result(ie_result, download, extra_info)
678 except ExtractorError as e: # An error we somewhat expected
679 self.report_error(error_to_str(e), e.format_traceback())
681 except MaxDownloadsReached:
683 except Exception as e:
684 if self.params.get('ignoreerrors', False):
685 self.report_error(error_to_str(e), tb=compat_str(traceback.format_exc()))
690 self.report_error('no suitable InfoExtractor for URL %s' % url)
692 def add_default_extra_info(self, ie_result, ie, url):
693 self.add_extra_info(ie_result, {
694 'extractor': ie.IE_NAME,
696 'webpage_url_basename': url_basename(url),
697 'extractor_key': ie.ie_key(),
700 def process_ie_result(self, ie_result, download=True, extra_info={}):
702 Take the result of the ie(may be modified) and resolve all unresolved
703 references (URLs, playlist items).
705 It will also download the videos if 'download'.
706 Returns the resolved ie_result.
709 result_type = ie_result.get('_type', 'video')
711 if result_type in ('url', 'url_transparent'):
712 extract_flat = self.params.get('extract_flat', False)
713 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
714 extract_flat is True):
715 if self.params.get('forcejson', False):
716 self.to_stdout(json.dumps(ie_result))
719 if result_type == 'video':
720 self.add_extra_info(ie_result, extra_info)
721 return self.process_video_result(ie_result, download=download)
722 elif result_type == 'url':
723 # We have to add extra_info to the results because it may be
724 # contained in a playlist
725 return self.extract_info(ie_result['url'],
727 ie_key=ie_result.get('ie_key'),
728 extra_info=extra_info)
729 elif result_type == 'url_transparent':
730 # Use the information from the embedding page
731 info = self.extract_info(
732 ie_result['url'], ie_key=ie_result.get('ie_key'),
733 extra_info=extra_info, download=False, process=False)
735 force_properties = dict(
736 (k, v) for k, v in ie_result.items() if v is not None)
737 for f in ('_type', 'url'):
738 if f in force_properties:
739 del force_properties[f]
740 new_result = info.copy()
741 new_result.update(force_properties)
743 assert new_result.get('_type') != 'url_transparent'
745 return self.process_ie_result(
746 new_result, download=download, extra_info=extra_info)
747 elif result_type == 'playlist' or result_type == 'multi_video':
748 # We process each entry in the playlist
749 playlist = ie_result.get('title', None) or ie_result.get('id', None)
750 self.to_screen('[download] Downloading playlist: %s' % playlist)
752 playlist_results = []
754 playliststart = self.params.get('playliststart', 1) - 1
755 playlistend = self.params.get('playlistend', None)
756 # For backwards compatibility, interpret -1 as whole list
757 if playlistend == -1:
760 playlistitems_str = self.params.get('playlist_items', None)
762 if playlistitems_str is not None:
763 def iter_playlistitems(format):
764 for string_segment in format.split(','):
765 if '-' in string_segment:
766 start, end = string_segment.split('-')
767 for item in range(int(start), int(end) + 1):
770 yield int(string_segment)
771 playlistitems = iter_playlistitems(playlistitems_str)
773 ie_entries = ie_result['entries']
774 if isinstance(ie_entries, list):
775 n_all_entries = len(ie_entries)
778 ie_entries[i - 1] for i in playlistitems
779 if -n_all_entries <= i - 1 < n_all_entries]
781 entries = ie_entries[playliststart:playlistend]
782 n_entries = len(entries)
784 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
785 (ie_result['extractor'], playlist, n_all_entries, n_entries))
786 elif isinstance(ie_entries, PagedList):
789 for item in playlistitems:
790 entries.extend(ie_entries.getslice(
794 entries = ie_entries.getslice(
795 playliststart, playlistend)
796 n_entries = len(entries)
798 "[%s] playlist %s: Downloading %d videos" %
799 (ie_result['extractor'], playlist, n_entries))
802 entry_list = list(ie_entries)
803 entries = [entry_list[i - 1] for i in playlistitems]
805 entries = list(itertools.islice(
806 ie_entries, playliststart, playlistend))
807 n_entries = len(entries)
809 "[%s] playlist %s: Downloading %d videos" %
810 (ie_result['extractor'], playlist, n_entries))
812 if self.params.get('playlistreverse', False):
813 entries = entries[::-1]
815 for i, entry in enumerate(entries, 1):
816 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
818 'n_entries': n_entries,
819 'playlist': playlist,
820 'playlist_id': ie_result.get('id'),
821 'playlist_title': ie_result.get('title'),
822 'playlist_index': i + playliststart,
823 'extractor': ie_result['extractor'],
824 'webpage_url': ie_result['webpage_url'],
825 'webpage_url_basename': url_basename(ie_result['webpage_url']),
826 'extractor_key': ie_result['extractor_key'],
829 reason = self._match_entry(entry, incomplete=True)
830 if reason is not None:
831 self.to_screen('[download] ' + reason)
834 entry_result = self.process_ie_result(entry,
837 playlist_results.append(entry_result)
838 ie_result['entries'] = playlist_results
839 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
841 elif result_type == 'compat_list':
843 'Extractor %s returned a compat_list result. '
844 'It needs to be updated.' % ie_result.get('extractor'))
850 'extractor': ie_result['extractor'],
851 'webpage_url': ie_result['webpage_url'],
852 'webpage_url_basename': url_basename(ie_result['webpage_url']),
853 'extractor_key': ie_result['extractor_key'],
857 ie_result['entries'] = [
858 self.process_ie_result(_fixup(r), download, extra_info)
859 for r in ie_result['entries']
863 raise Exception('Invalid result type: %s' % result_type)
865 def _build_format_filter(self, filter_spec):
866 " Returns a function to filter the formats according to the filter_spec "
876 operator_rex = re.compile(r'''(?x)\s*
877 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
878 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
879 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
881 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
882 m = operator_rex.search(filter_spec)
885 comparison_value = int(m.group('value'))
887 comparison_value = parse_filesize(m.group('value'))
888 if comparison_value is None:
889 comparison_value = parse_filesize(m.group('value') + 'B')
890 if comparison_value is None:
892 'Invalid value %r in format specification %r' % (
893 m.group('value'), filter_spec))
894 op = OPERATORS[m.group('op')]
901 str_operator_rex = re.compile(r'''(?x)
902 \s*(?P<key>ext|acodec|vcodec|container|protocol)
903 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
904 \s*(?P<value>[a-zA-Z0-9_-]+)
906 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
907 m = str_operator_rex.search(filter_spec)
909 comparison_value = m.group('value')
910 op = STR_OPERATORS[m.group('op')]
913 raise ValueError('Invalid filter specification %r' % filter_spec)
916 actual_value = f.get(m.group('key'))
917 if actual_value is None:
918 return m.group('none_inclusive')
919 return op(actual_value, comparison_value)
922 def build_format_selector(self, format_spec):
923 def syntax_error(note, start):
925 'Invalid format specification: '
926 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
927 return SyntaxError(message)
929 PICKFIRST = 'PICKFIRST'
933 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
935 def _parse_filter(tokens):
937 for type, string, start, _, _ in tokens:
938 if type == tokenize.OP and string == ']':
939 return ''.join(filter_parts)
941 filter_parts.append(string)
943 def _remove_unused_ops(tokens):
944 # Remove operators that we don't use and join them with the surrounding strings
945 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
946 ALLOWED_OPS = ('/', '+', ',', '(', ')')
947 last_string, last_start, last_end, last_line = None, None, None, None
948 for type, string, start, end, line in tokens:
949 if type == tokenize.OP and string == '[':
951 yield tokenize.NAME, last_string, last_start, last_end, last_line
953 yield type, string, start, end, line
954 # everything inside brackets will be handled by _parse_filter
955 for type, string, start, end, line in tokens:
956 yield type, string, start, end, line
957 if type == tokenize.OP and string == ']':
959 elif type == tokenize.OP and string in ALLOWED_OPS:
961 yield tokenize.NAME, last_string, last_start, last_end, last_line
963 yield type, string, start, end, line
964 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
970 last_string += string
972 yield tokenize.NAME, last_string, last_start, last_end, last_line
974 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
976 current_selector = None
977 for type, string, start, _, _ in tokens:
978 # ENCODING is only defined in python 3.x
979 if type == getattr(tokenize, 'ENCODING', None):
981 elif type in [tokenize.NAME, tokenize.NUMBER]:
982 current_selector = FormatSelector(SINGLE, string, [])
983 elif type == tokenize.OP:
986 # ')' will be handled by the parentheses group
987 tokens.restore_last_token()
989 elif inside_merge and string in ['/', ',']:
990 tokens.restore_last_token()
992 elif inside_choice and string == ',':
993 tokens.restore_last_token()
996 if not current_selector:
997 raise syntax_error('"," must follow a format selector', start)
998 selectors.append(current_selector)
999 current_selector = None
1001 if not current_selector:
1002 raise syntax_error('"/" must follow a format selector', start)
1003 first_choice = current_selector
1004 second_choice = _parse_format_selection(tokens, inside_choice=True)
1005 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1007 if not current_selector:
1008 current_selector = FormatSelector(SINGLE, 'best', [])
1009 format_filter = _parse_filter(tokens)
1010 current_selector.filters.append(format_filter)
1012 if current_selector:
1013 raise syntax_error('Unexpected "("', start)
1014 group = _parse_format_selection(tokens, inside_group=True)
1015 current_selector = FormatSelector(GROUP, group, [])
1017 video_selector = current_selector
1018 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1019 if not video_selector or not audio_selector:
1020 raise syntax_error('"+" must be between two format selectors', start)
1021 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1023 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1024 elif type == tokenize.ENDMARKER:
1026 if current_selector:
1027 selectors.append(current_selector)
1030 def _build_selector_function(selector):
1031 if isinstance(selector, list):
1032 fs = [_build_selector_function(s) for s in selector]
1034 def selector_function(formats):
1036 for format in f(formats):
1038 return selector_function
1039 elif selector.type == GROUP:
1040 selector_function = _build_selector_function(selector.selector)
1041 elif selector.type == PICKFIRST:
1042 fs = [_build_selector_function(s) for s in selector.selector]
1044 def selector_function(formats):
1046 picked_formats = list(f(formats))
1048 return picked_formats
1050 elif selector.type == SINGLE:
1051 format_spec = selector.selector
1053 def selector_function(formats):
1054 formats = list(formats)
1057 if format_spec == 'all':
1060 elif format_spec in ['best', 'worst', None]:
1061 format_idx = 0 if format_spec == 'worst' else -1
1062 audiovideo_formats = [
1064 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1065 if audiovideo_formats:
1066 yield audiovideo_formats[format_idx]
1067 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1068 elif (all(f.get('acodec') != 'none' for f in formats) or
1069 all(f.get('vcodec') != 'none' for f in formats)):
1070 yield formats[format_idx]
1071 elif format_spec == 'bestaudio':
1074 if f.get('vcodec') == 'none']
1076 yield audio_formats[-1]
1077 elif format_spec == 'worstaudio':
1080 if f.get('vcodec') == 'none']
1082 yield audio_formats[0]
1083 elif format_spec == 'bestvideo':
1086 if f.get('acodec') == 'none']
1088 yield video_formats[-1]
1089 elif format_spec == 'worstvideo':
1092 if f.get('acodec') == 'none']
1094 yield video_formats[0]
1096 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1097 if format_spec in extensions:
1098 filter_f = lambda f: f['ext'] == format_spec
1100 filter_f = lambda f: f['format_id'] == format_spec
1101 matches = list(filter(filter_f, formats))
1104 elif selector.type == MERGE:
1105 def _merge(formats_info):
1106 format_1, format_2 = [f['format_id'] for f in formats_info]
1107 # The first format must contain the video and the
1109 if formats_info[0].get('vcodec') == 'none':
1110 self.report_error('The first format must '
1111 'contain the video, try using '
1112 '"-f %s+%s"' % (format_2, format_1))
1114 # Formats must be opposite (video+audio)
1115 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1117 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1118 % (format_1, format_2))
1121 formats_info[0]['ext']
1122 if self.params.get('merge_output_format') is None
1123 else self.params['merge_output_format'])
1125 'requested_formats': formats_info,
1126 'format': '%s+%s' % (formats_info[0].get('format'),
1127 formats_info[1].get('format')),
1128 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1129 formats_info[1].get('format_id')),
1130 'width': formats_info[0].get('width'),
1131 'height': formats_info[0].get('height'),
1132 'resolution': formats_info[0].get('resolution'),
1133 'fps': formats_info[0].get('fps'),
1134 'vcodec': formats_info[0].get('vcodec'),
1135 'vbr': formats_info[0].get('vbr'),
1136 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1137 'acodec': formats_info[1].get('acodec'),
1138 'abr': formats_info[1].get('abr'),
1141 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1143 def selector_function(formats):
1144 formats = list(formats)
1145 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1148 filters = [self._build_format_filter(f) for f in selector.filters]
1150 def final_selector(formats):
1151 for _filter in filters:
1152 formats = list(filter(_filter, formats))
1153 return selector_function(formats)
1154 return final_selector
1156 stream = io.BytesIO(format_spec.encode('utf-8'))
1158 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1159 except tokenize.TokenError:
1160 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1162 class TokenIterator(object):
1163 def __init__(self, tokens):
1164 self.tokens = tokens
1171 if self.counter >= len(self.tokens):
1172 raise StopIteration()
1173 value = self.tokens[self.counter]
1179 def restore_last_token(self):
1182 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1183 return _build_selector_function(parsed_selector)
1185 def _calc_headers(self, info_dict):
1186 res = std_headers.copy()
1188 add_headers = info_dict.get('http_headers')
1190 res.update(add_headers)
1192 cookies = self._calc_cookies(info_dict)
1194 res['Cookie'] = cookies
1198 def _calc_cookies(self, info_dict):
1199 pr = sanitized_Request(info_dict['url'])
1200 self.cookiejar.add_cookie_header(pr)
1201 return pr.get_header('Cookie')
1203 def process_video_result(self, info_dict, download=True):
1204 assert info_dict.get('_type', 'video') == 'video'
1206 if 'id' not in info_dict:
1207 raise ExtractorError('Missing "id" field in extractor result')
1208 if 'title' not in info_dict:
1209 raise ExtractorError('Missing "title" field in extractor result')
1211 if 'playlist' not in info_dict:
1212 # It isn't part of a playlist
1213 info_dict['playlist'] = None
1214 info_dict['playlist_index'] = None
1216 thumbnails = info_dict.get('thumbnails')
1217 if thumbnails is None:
1218 thumbnail = info_dict.get('thumbnail')
1220 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1222 thumbnails.sort(key=lambda t: (
1223 t.get('preference'), t.get('width'), t.get('height'),
1224 t.get('id'), t.get('url')))
1225 for i, t in enumerate(thumbnails):
1226 if t.get('width') and t.get('height'):
1227 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1228 if t.get('id') is None:
1231 if thumbnails and 'thumbnail' not in info_dict:
1232 info_dict['thumbnail'] = thumbnails[-1]['url']
1234 if 'display_id' not in info_dict and 'id' in info_dict:
1235 info_dict['display_id'] = info_dict['id']
1237 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1238 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1239 # see http://bugs.python.org/issue1646728)
1241 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1242 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1243 except (ValueError, OverflowError, OSError):
1246 subtitles = info_dict.get('subtitles')
1248 for _, subtitle in subtitles.items():
1249 for subtitle_format in subtitle:
1250 if 'ext' not in subtitle_format:
1251 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1253 if self.params.get('listsubtitles', False):
1254 if 'automatic_captions' in info_dict:
1255 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1256 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1258 info_dict['requested_subtitles'] = self.process_subtitles(
1259 info_dict['id'], subtitles,
1260 info_dict.get('automatic_captions'))
1262 # We now pick which formats have to be downloaded
1263 if info_dict.get('formats') is None:
1264 # There's only one format available
1265 formats = [info_dict]
1267 formats = info_dict['formats']
1270 raise ExtractorError('No video formats found!')
1274 # We check that all the formats have the format and format_id fields
1275 for i, format in enumerate(formats):
1276 if 'url' not in format:
1277 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1279 if format.get('format_id') is None:
1280 format['format_id'] = compat_str(i)
1281 format_id = format['format_id']
1282 if format_id not in formats_dict:
1283 formats_dict[format_id] = []
1284 formats_dict[format_id].append(format)
1286 # Make sure all formats have unique format_id
1287 for format_id, ambiguous_formats in formats_dict.items():
1288 if len(ambiguous_formats) > 1:
1289 for i, format in enumerate(ambiguous_formats):
1290 format['format_id'] = '%s-%d' % (format_id, i)
1292 for i, format in enumerate(formats):
1293 if format.get('format') is None:
1294 format['format'] = '{id} - {res}{note}'.format(
1295 id=format['format_id'],
1296 res=self.format_resolution(format),
1297 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1299 # Automatically determine file extension if missing
1300 if 'ext' not in format:
1301 format['ext'] = determine_ext(format['url']).lower()
1302 # Add HTTP headers, so that external programs can use them from the
1304 full_format_info = info_dict.copy()
1305 full_format_info.update(format)
1306 format['http_headers'] = self._calc_headers(full_format_info)
1308 # TODO Central sorting goes here
1310 if formats[0] is not info_dict:
1311 # only set the 'formats' fields if the original info_dict list them
1312 # otherwise we end up with a circular reference, the first (and unique)
1313 # element in the 'formats' field in info_dict is info_dict itself,
1314 # wich can't be exported to json
1315 info_dict['formats'] = formats
1316 if self.params.get('listformats'):
1317 self.list_formats(info_dict)
1319 if self.params.get('list_thumbnails'):
1320 self.list_thumbnails(info_dict)
1323 req_format = self.params.get('format')
1324 if req_format is None:
1325 req_format_list = []
1326 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1327 info_dict['extractor'] in ['youtube', 'ted'] and
1328 not info_dict.get('is_live')):
1329 merger = FFmpegMergerPP(self)
1330 if merger.available and merger.can_merge():
1331 req_format_list.append('bestvideo+bestaudio')
1332 req_format_list.append('best')
1333 req_format = '/'.join(req_format_list)
1334 format_selector = self.build_format_selector(req_format)
1335 formats_to_download = list(format_selector(formats))
1336 if not formats_to_download:
1337 raise ExtractorError('requested format not available',
1341 if len(formats_to_download) > 1:
1342 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1343 for format in formats_to_download:
1344 new_info = dict(info_dict)
1345 new_info.update(format)
1346 self.process_info(new_info)
1347 # We update the info dict with the best quality format (backwards compatibility)
1348 info_dict.update(formats_to_download[-1])
1351 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1352 """Select the requested subtitles and their format"""
1354 if normal_subtitles and self.params.get('writesubtitles'):
1355 available_subs.update(normal_subtitles)
1356 if automatic_captions and self.params.get('writeautomaticsub'):
1357 for lang, cap_info in automatic_captions.items():
1358 if lang not in available_subs:
1359 available_subs[lang] = cap_info
1361 if (not self.params.get('writesubtitles') and not
1362 self.params.get('writeautomaticsub') or not
1366 if self.params.get('allsubtitles', False):
1367 requested_langs = available_subs.keys()
1369 if self.params.get('subtitleslangs', False):
1370 requested_langs = self.params.get('subtitleslangs')
1371 elif 'en' in available_subs:
1372 requested_langs = ['en']
1374 requested_langs = [list(available_subs.keys())[0]]
1376 formats_query = self.params.get('subtitlesformat', 'best')
1377 formats_preference = formats_query.split('/') if formats_query else []
1379 for lang in requested_langs:
1380 formats = available_subs.get(lang)
1382 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1384 for ext in formats_preference:
1388 matches = list(filter(lambda f: f['ext'] == ext, formats))
1394 self.report_warning(
1395 'No subtitle format found matching "%s" for language %s, '
1396 'using %s' % (formats_query, lang, f['ext']))
1400 def process_info(self, info_dict):
1401 """Process a single resolved IE result."""
1403 assert info_dict.get('_type', 'video') == 'video'
1405 max_downloads = self.params.get('max_downloads')
1406 if max_downloads is not None:
1407 if self._num_downloads >= int(max_downloads):
1408 raise MaxDownloadsReached()
1410 info_dict['fulltitle'] = info_dict['title']
1411 if len(info_dict['title']) > 200:
1412 info_dict['title'] = info_dict['title'][:197] + '...'
1414 if 'format' not in info_dict:
1415 info_dict['format'] = info_dict['ext']
1417 reason = self._match_entry(info_dict, incomplete=False)
1418 if reason is not None:
1419 self.to_screen('[download] ' + reason)
1422 self._num_downloads += 1
1424 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1427 if self.params.get('forcetitle', False):
1428 self.to_stdout(info_dict['fulltitle'])
1429 if self.params.get('forceid', False):
1430 self.to_stdout(info_dict['id'])
1431 if self.params.get('forceurl', False):
1432 if info_dict.get('requested_formats') is not None:
1433 for f in info_dict['requested_formats']:
1434 self.to_stdout(f['url'] + f.get('play_path', ''))
1436 # For RTMP URLs, also include the playpath
1437 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1438 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1439 self.to_stdout(info_dict['thumbnail'])
1440 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1441 self.to_stdout(info_dict['description'])
1442 if self.params.get('forcefilename', False) and filename is not None:
1443 self.to_stdout(filename)
1444 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1445 self.to_stdout(formatSeconds(info_dict['duration']))
1446 if self.params.get('forceformat', False):
1447 self.to_stdout(info_dict['format'])
1448 if self.params.get('forcejson', False):
1449 self.to_stdout(json.dumps(info_dict))
1451 # Do nothing else if in simulate mode
1452 if self.params.get('simulate', False):
1455 if filename is None:
1459 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1460 if dn and not os.path.exists(dn):
1462 except (OSError, IOError) as err:
1463 self.report_error('unable to create directory ' + error_to_str(err))
1466 if self.params.get('writedescription', False):
1467 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1468 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1469 self.to_screen('[info] Video description is already present')
1470 elif info_dict.get('description') is None:
1471 self.report_warning('There\'s no description to write.')
1474 self.to_screen('[info] Writing video description to: ' + descfn)
1475 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1476 descfile.write(info_dict['description'])
1477 except (OSError, IOError):
1478 self.report_error('Cannot write description file ' + descfn)
1481 if self.params.get('writeannotations', False):
1482 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1483 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1484 self.to_screen('[info] Video annotations are already present')
1487 self.to_screen('[info] Writing video annotations to: ' + annofn)
1488 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1489 annofile.write(info_dict['annotations'])
1490 except (KeyError, TypeError):
1491 self.report_warning('There are no annotations to write.')
1492 except (OSError, IOError):
1493 self.report_error('Cannot write annotations file: ' + annofn)
1496 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1497 self.params.get('writeautomaticsub')])
1499 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1500 # subtitles download errors are already managed as troubles in relevant IE
1501 # that way it will silently go on when used with unsupporting IE
1502 subtitles = info_dict['requested_subtitles']
1503 ie = self.get_info_extractor(info_dict['extractor_key'])
1504 for sub_lang, sub_info in subtitles.items():
1505 sub_format = sub_info['ext']
1506 if sub_info.get('data') is not None:
1507 sub_data = sub_info['data']
1510 sub_data = ie._download_webpage(
1511 sub_info['url'], info_dict['id'], note=False)
1512 except ExtractorError as err:
1513 self.report_warning('Unable to download subtitle for "%s": %s' %
1514 (sub_lang, compat_str(err.cause)))
1517 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1518 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1519 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1521 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1522 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1523 subfile.write(sub_data)
1524 except (OSError, IOError):
1525 self.report_error('Cannot write subtitles file ' + sub_filename)
1528 if self.params.get('writeinfojson', False):
1529 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1530 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1531 self.to_screen('[info] Video description metadata is already present')
1533 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1535 write_json_file(self.filter_requested_info(info_dict), infofn)
1536 except (OSError, IOError):
1537 self.report_error('Cannot write metadata to JSON file ' + infofn)
1540 self._write_thumbnails(info_dict, filename)
1542 if not self.params.get('skip_download', False):
1545 fd = get_suitable_downloader(info, self.params)(self, self.params)
1546 for ph in self._progress_hooks:
1547 fd.add_progress_hook(ph)
1548 if self.params.get('verbose'):
1549 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1550 return fd.download(name, info)
1552 if info_dict.get('requested_formats') is not None:
1555 merger = FFmpegMergerPP(self)
1556 if not merger.available:
1558 self.report_warning('You have requested multiple '
1559 'formats but ffmpeg or avconv are not installed.'
1560 ' The formats won\'t be merged.')
1562 postprocessors = [merger]
1564 def compatible_formats(formats):
1565 video, audio = formats
1567 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1568 if video_ext and audio_ext:
1570 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1573 for exts in COMPATIBLE_EXTS:
1574 if video_ext in exts and audio_ext in exts:
1576 # TODO: Check acodec/vcodec
1579 filename_real_ext = os.path.splitext(filename)[1][1:]
1581 os.path.splitext(filename)[0]
1582 if filename_real_ext == info_dict['ext']
1584 requested_formats = info_dict['requested_formats']
1585 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1586 info_dict['ext'] = 'mkv'
1587 self.report_warning(
1588 'Requested formats are incompatible for merge and will be merged into mkv.')
1589 # Ensure filename always has a correct extension for successful merge
1590 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1591 if os.path.exists(encodeFilename(filename)):
1593 '[download] %s has already been downloaded and '
1594 'merged' % filename)
1596 for f in requested_formats:
1597 new_info = dict(info_dict)
1599 fname = self.prepare_filename(new_info)
1600 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1601 downloaded.append(fname)
1602 partial_success = dl(fname, new_info)
1603 success = success and partial_success
1604 info_dict['__postprocessors'] = postprocessors
1605 info_dict['__files_to_merge'] = downloaded
1607 # Just a single file
1608 success = dl(filename, info_dict)
1609 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1610 self.report_error('unable to download video data: %s' % str(err))
1612 except (OSError, IOError) as err:
1613 raise UnavailableVideoError(err)
1614 except (ContentTooShortError, ) as err:
1615 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1620 fixup_policy = self.params.get('fixup')
1621 if fixup_policy is None:
1622 fixup_policy = 'detect_or_warn'
1624 stretched_ratio = info_dict.get('stretched_ratio')
1625 if stretched_ratio is not None and stretched_ratio != 1:
1626 if fixup_policy == 'warn':
1627 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1628 info_dict['id'], stretched_ratio))
1629 elif fixup_policy == 'detect_or_warn':
1630 stretched_pp = FFmpegFixupStretchedPP(self)
1631 if stretched_pp.available:
1632 info_dict.setdefault('__postprocessors', [])
1633 info_dict['__postprocessors'].append(stretched_pp)
1635 self.report_warning(
1636 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1637 info_dict['id'], stretched_ratio))
1639 assert fixup_policy in ('ignore', 'never')
1641 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1642 if fixup_policy == 'warn':
1643 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1645 elif fixup_policy == 'detect_or_warn':
1646 fixup_pp = FFmpegFixupM4aPP(self)
1647 if fixup_pp.available:
1648 info_dict.setdefault('__postprocessors', [])
1649 info_dict['__postprocessors'].append(fixup_pp)
1651 self.report_warning(
1652 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1655 assert fixup_policy in ('ignore', 'never')
1658 self.post_process(filename, info_dict)
1659 except (PostProcessingError) as err:
1660 self.report_error('postprocessing: %s' % str(err))
1662 self.record_download_archive(info_dict)
1664 def download(self, url_list):
1665 """Download a given list of URLs."""
1666 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1667 if (len(url_list) > 1 and
1668 '%' not in outtmpl and
1669 self.params.get('max_downloads') != 1):
1670 raise SameFileError(outtmpl)
1672 for url in url_list:
1674 # It also downloads the videos
1675 res = self.extract_info(
1676 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1677 except UnavailableVideoError:
1678 self.report_error('unable to download video')
1679 except MaxDownloadsReached:
1680 self.to_screen('[info] Maximum number of downloaded files reached.')
1683 if self.params.get('dump_single_json', False):
1684 self.to_stdout(json.dumps(res))
1686 return self._download_retcode
1688 def download_with_info_file(self, info_filename):
1689 with contextlib.closing(fileinput.FileInput(
1690 [info_filename], mode='r',
1691 openhook=fileinput.hook_encoded('utf-8'))) as f:
1692 # FileInput doesn't have a read method, we can't call json.load
1693 info = self.filter_requested_info(json.loads('\n'.join(f)))
1695 self.process_ie_result(info, download=True)
1696 except DownloadError:
1697 webpage_url = info.get('webpage_url')
1698 if webpage_url is not None:
1699 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1700 return self.download([webpage_url])
1703 return self._download_retcode
1706 def filter_requested_info(info_dict):
1708 (k, v) for k, v in info_dict.items()
1709 if k not in ['requested_formats', 'requested_subtitles'])
1711 def post_process(self, filename, ie_info):
1712 """Run all the postprocessors on the given file."""
1713 info = dict(ie_info)
1714 info['filepath'] = filename
1716 if ie_info.get('__postprocessors') is not None:
1717 pps_chain.extend(ie_info['__postprocessors'])
1718 pps_chain.extend(self._pps)
1719 for pp in pps_chain:
1720 files_to_delete = []
1722 files_to_delete, info = pp.run(info)
1723 except PostProcessingError as e:
1724 self.report_error(e.msg)
1725 if files_to_delete and not self.params.get('keepvideo', False):
1726 for old_filename in files_to_delete:
1727 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1729 os.remove(encodeFilename(old_filename))
1730 except (IOError, OSError):
1731 self.report_warning('Unable to remove downloaded original file')
1733 def _make_archive_id(self, info_dict):
1734 # Future-proof against any change in case
1735 # and backwards compatibility with prior versions
1736 extractor = info_dict.get('extractor_key')
1737 if extractor is None:
1738 if 'id' in info_dict:
1739 extractor = info_dict.get('ie_key') # key in a playlist
1740 if extractor is None:
1741 return None # Incomplete video information
1742 return extractor.lower() + ' ' + info_dict['id']
1744 def in_download_archive(self, info_dict):
1745 fn = self.params.get('download_archive')
1749 vid_id = self._make_archive_id(info_dict)
1751 return False # Incomplete video information
1754 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1755 for line in archive_file:
1756 if line.strip() == vid_id:
1758 except IOError as ioe:
1759 if ioe.errno != errno.ENOENT:
1763 def record_download_archive(self, info_dict):
1764 fn = self.params.get('download_archive')
1767 vid_id = self._make_archive_id(info_dict)
1769 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1770 archive_file.write(vid_id + '\n')
1773 def format_resolution(format, default='unknown'):
1774 if format.get('vcodec') == 'none':
1776 if format.get('resolution') is not None:
1777 return format['resolution']
1778 if format.get('height') is not None:
1779 if format.get('width') is not None:
1780 res = '%sx%s' % (format['width'], format['height'])
1782 res = '%sp' % format['height']
1783 elif format.get('width') is not None:
1784 res = '?x%d' % format['width']
1789 def _format_note(self, fdict):
1791 if fdict.get('ext') in ['f4f', 'f4m']:
1792 res += '(unsupported) '
1793 if fdict.get('format_note') is not None:
1794 res += fdict['format_note'] + ' '
1795 if fdict.get('tbr') is not None:
1796 res += '%4dk ' % fdict['tbr']
1797 if fdict.get('container') is not None:
1800 res += '%s container' % fdict['container']
1801 if (fdict.get('vcodec') is not None and
1802 fdict.get('vcodec') != 'none'):
1805 res += fdict['vcodec']
1806 if fdict.get('vbr') is not None:
1808 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1810 if fdict.get('vbr') is not None:
1811 res += '%4dk' % fdict['vbr']
1812 if fdict.get('fps') is not None:
1813 res += ', %sfps' % fdict['fps']
1814 if fdict.get('acodec') is not None:
1817 if fdict['acodec'] == 'none':
1820 res += '%-5s' % fdict['acodec']
1821 elif fdict.get('abr') is not None:
1825 if fdict.get('abr') is not None:
1826 res += '@%3dk' % fdict['abr']
1827 if fdict.get('asr') is not None:
1828 res += ' (%5dHz)' % fdict['asr']
1829 if fdict.get('filesize') is not None:
1832 res += format_bytes(fdict['filesize'])
1833 elif fdict.get('filesize_approx') is not None:
1836 res += '~' + format_bytes(fdict['filesize_approx'])
1839 def list_formats(self, info_dict):
1840 formats = info_dict.get('formats', [info_dict])
1842 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1844 if f.get('preference') is None or f['preference'] >= -1000]
1845 if len(formats) > 1:
1846 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1848 header_line = ['format code', 'extension', 'resolution', 'note']
1850 '[info] Available formats for %s:\n%s' %
1851 (info_dict['id'], render_table(header_line, table)))
1853 def list_thumbnails(self, info_dict):
1854 thumbnails = info_dict.get('thumbnails')
1856 tn_url = info_dict.get('thumbnail')
1858 thumbnails = [{'id': '0', 'url': tn_url}]
1861 '[info] No thumbnails present for %s' % info_dict['id'])
1865 '[info] Thumbnails for %s:' % info_dict['id'])
1866 self.to_screen(render_table(
1867 ['ID', 'width', 'height', 'URL'],
1868 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1870 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1872 self.to_screen('%s has no %s' % (video_id, name))
1875 'Available %s for %s:' % (name, video_id))
1876 self.to_screen(render_table(
1877 ['Language', 'formats'],
1878 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1879 for lang, formats in subtitles.items()]))
1881 def urlopen(self, req):
1882 """ Start an HTTP download """
1883 if isinstance(req, compat_basestring):
1884 req = sanitized_Request(req)
1885 return self._opener.open(req, timeout=self._socket_timeout)
1887 def print_debug_header(self):
1888 if not self.params.get('verbose'):
1891 if type('') is not compat_str:
1892 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1893 self.report_warning(
1894 'Your Python is broken! Update to a newer and supported version')
1896 stdout_encoding = getattr(
1897 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1899 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1900 locale.getpreferredencoding(),
1901 sys.getfilesystemencoding(),
1903 self.get_encoding()))
1904 write_string(encoding_str, encoding=None)
1906 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1908 sp = subprocess.Popen(
1909 ['git', 'rev-parse', '--short', 'HEAD'],
1910 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1911 cwd=os.path.dirname(os.path.abspath(__file__)))
1912 out, err = sp.communicate()
1913 out = out.decode().strip()
1914 if re.match('[0-9a-f]+', out):
1915 self._write_string('[debug] Git HEAD: ' + out + '\n')
1921 self._write_string('[debug] Python version %s - %s\n' % (
1922 platform.python_version(), platform_name()))
1924 exe_versions = FFmpegPostProcessor.get_versions(self)
1925 exe_versions['rtmpdump'] = rtmpdump_version()
1926 exe_str = ', '.join(
1928 for exe, v in sorted(exe_versions.items())
1933 self._write_string('[debug] exe versions: %s\n' % exe_str)
1936 for handler in self._opener.handlers:
1937 if hasattr(handler, 'proxies'):
1938 proxy_map.update(handler.proxies)
1939 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1941 if self.params.get('call_home', False):
1942 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1943 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1944 latest_version = self.urlopen(
1945 'https://yt-dl.org/latest/version').read().decode('utf-8')
1946 if version_tuple(latest_version) > version_tuple(__version__):
1947 self.report_warning(
1948 'You are using an outdated version (newest version: %s)! '
1949 'See https://yt-dl.org/update if you need help updating.' %
1952 def _setup_opener(self):
1953 timeout_val = self.params.get('socket_timeout')
1954 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1956 opts_cookiefile = self.params.get('cookiefile')
1957 opts_proxy = self.params.get('proxy')
1959 if opts_cookiefile is None:
1960 self.cookiejar = compat_cookiejar.CookieJar()
1962 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1964 if os.access(opts_cookiefile, os.R_OK):
1965 self.cookiejar.load()
1967 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1968 if opts_proxy is not None:
1969 if opts_proxy == '':
1972 proxies = {'http': opts_proxy, 'https': opts_proxy}
1974 proxies = compat_urllib_request.getproxies()
1975 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1976 if 'http' in proxies and 'https' not in proxies:
1977 proxies['https'] = proxies['http']
1978 proxy_handler = PerRequestProxyHandler(proxies)
1980 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1981 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1982 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1983 data_handler = compat_urllib_request_DataHandler()
1984 opener = compat_urllib_request.build_opener(
1985 proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1987 # Delete the default user-agent header, which would otherwise apply in
1988 # cases where our custom HTTP handler doesn't come into play
1989 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1990 opener.addheaders = []
1991 self._opener = opener
1993 def encode(self, s):
1994 if isinstance(s, bytes):
1995 return s # Already encoded
1998 return s.encode(self.get_encoding())
1999 except UnicodeEncodeError as err:
2000 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2003 def get_encoding(self):
2004 encoding = self.params.get('encoding')
2005 if encoding is None:
2006 encoding = preferredencoding()
2009 def _write_thumbnails(self, info_dict, filename):
2010 if self.params.get('writethumbnail', False):
2011 thumbnails = info_dict.get('thumbnails')
2013 thumbnails = [thumbnails[-1]]
2014 elif self.params.get('write_all_thumbnails', False):
2015 thumbnails = info_dict.get('thumbnails')
2020 # No thumbnails present, so return immediately
2023 for t in thumbnails:
2024 thumb_ext = determine_ext(t['url'], 'jpg')
2025 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2026 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2027 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2029 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2030 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2031 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2033 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2034 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2036 uf = self.urlopen(t['url'])
2037 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2038 shutil.copyfileobj(uf, thumbf)
2039 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2040 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2041 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2042 self.report_warning('Unable to download thumbnail "%s": %s' %
2043 (t['url'], error_to_str(err)))