2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_urllib_request,
59 UnavailableVideoError,
68 from .cache import Cache
69 from .extractor import get_info_extractor, gen_extractors
70 from .downloader import get_suitable_downloader
71 from .downloader.rtmp import rtmpdump_version
72 from .postprocessor import (
77 from .version import __version__
80 class YoutubeDL(object):
83 YoutubeDL objects are the ones responsible of downloading the
84 actual video file and writing it to disk if the user has requested
85 it, among some other tasks. In most cases there should be one per
86 program. As, given a video URL, the downloader doesn't know how to
87 extract all the needed information, task that InfoExtractors do, it
88 has to pass the URL to one of them.
90 For this, YoutubeDL objects have a method that allows
91 InfoExtractors to be registered in a given order. When it is passed
92 a URL, the YoutubeDL object handles it to the first InfoExtractor it
93 finds that reports being able to handle it. The InfoExtractor extracts
94 all the information about the video or videos the URL refers to, and
95 YoutubeDL process the extracted information, possibly using a File
96 Downloader to download the video.
98 YoutubeDL objects accept a lot of parameters. In order not to saturate
99 the object constructor with arguments, it receives a dictionary of
100 options instead. These options are available through the params
101 attribute for the InfoExtractors to use. The YoutubeDL also
102 registers itself as the downloader in charge for the InfoExtractors
103 that are added to it, so this is a "mutual registration".
107 username: Username for authentication purposes.
108 password: Password for authentication purposes.
109 videopassword: Password for acces a video.
110 usenetrc: Use netrc for authentication instead.
111 verbose: Print additional info to stdout.
112 quiet: Do not print messages to stdout.
113 no_warnings: Do not print out anything for warnings.
114 forceurl: Force printing final URL.
115 forcetitle: Force printing title.
116 forceid: Force printing ID.
117 forcethumbnail: Force printing thumbnail URL.
118 forcedescription: Force printing description.
119 forcefilename: Force printing final filename.
120 forceduration: Force printing duration.
121 forcejson: Force printing info_dict as JSON.
122 dump_single_json: Force printing the info_dict of the whole playlist
123 (or video) as a single JSON line.
124 simulate: Do not download the video files.
125 format: Video format code. See options.py for more information.
126 format_limit: Highest quality format to try.
127 outtmpl: Template for output names.
128 restrictfilenames: Do not allow "&" and spaces in file names
129 ignoreerrors: Do not stop on download errors.
130 nooverwrites: Prevent overwriting files.
131 playliststart: Playlist item to start at.
132 playlistend: Playlist item to end at.
133 playlistreverse: Download playlist items in reverse order.
134 matchtitle: Download only matching titles.
135 rejecttitle: Reject downloads for matching titles.
136 logger: Log messages to a logging.Logger instance.
137 logtostderr: Log messages to stderr instead of stdout.
138 writedescription: Write the video description to a .description file
139 writeinfojson: Write the video description to a .info.json file
140 writeannotations: Write the video annotations to a .annotations.xml file
141 writethumbnail: Write the thumbnail image to a file
142 writesubtitles: Write the video subtitles to a file
143 writeautomaticsub: Write the automatic subtitles to a file
144 allsubtitles: Downloads all the subtitles of the video
145 (requires writesubtitles or writeautomaticsub)
146 listsubtitles: Lists all available subtitles for the video
147 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
148 subtitleslangs: List of languages of the subtitles to download
149 keepvideo: Keep the video file after post-processing
150 daterange: A DateRange object, download only if the upload_date is in the range.
151 skip_download: Skip the actual download of the video file
152 cachedir: Location of the cache files in the filesystem.
153 False to disable filesystem cache.
154 noplaylist: Download single video instead of a playlist if in doubt.
155 age_limit: An integer representing the user's age in years.
156 Unsuitable videos for the given age are skipped.
157 min_views: An integer representing the minimum view count the video
158 must have in order to not be skipped.
159 Videos without view count information are always
160 downloaded. None for no limit.
161 max_views: An integer representing the maximum view count.
162 Videos that are more popular than that are not
164 Videos without view count information are always
165 downloaded. None for no limit.
166 download_archive: File name of a file where all downloads are recorded.
167 Videos already present in the file are not downloaded
169 cookiefile: File name where cookies should be read from and dumped to.
170 nocheckcertificate:Do not verify SSL certificates
171 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
172 At the moment, this is only supported by YouTube.
173 proxy: URL of the proxy server to use
174 socket_timeout: Time to wait for unresponsive hosts, in seconds
175 bidi_workaround: Work around buggy terminals without bidirectional text
176 support, using fridibi
177 debug_printtraffic:Print out sent and received HTTP traffic
178 include_ads: Download ads as well
179 default_search: Prepend this string if an input url is not valid.
180 'auto' for elaborate guessing
181 encoding: Use this encoding instead of the system-specified.
182 extract_flat: Do not resolve URLs, return the immediate result.
183 Pass in 'in_playlist' to only show this behavior for
185 postprocessors: A list of dictionaries, each with an entry
186 * key: The name of the postprocessor. See
187 youtube_dl/postprocessor/__init__.py for a list.
188 as well as any further keyword arguments for the
190 progress_hooks: A list of functions that get called on download
191 progress, with a dictionary with the entries
192 * filename: The final filename
193 * status: One of "downloading" and "finished"
195 The dict may also have some of the following entries:
197 * downloaded_bytes: Bytes on disk
198 * total_bytes: Size of the whole file, None if unknown
199 * tmpfilename: The filename we're currently writing to
200 * eta: The estimated time in seconds, None if unknown
201 * speed: The download speed in bytes/second, None if
204 Progress hooks are guaranteed to be called at least once
205 (with status "finished") if the download is successful.
208 The following parameters are not used by YoutubeDL itself, they are used by
210 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
211 noresizebuffer, retries, continuedl, noprogress, consoletitle
213 The following options are used by the post processors:
214 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
215 otherwise prefer avconv.
216 exec_cmd: Arbitrary command to run after downloading
222 _download_retcode = None
223 _num_downloads = None
226 def __init__(self, params=None, auto_init=True):
227 """Create a FileDownloader object with the given options."""
231 self._ies_instances = {}
233 self._progress_hooks = []
234 self._download_retcode = 0
235 self._num_downloads = 0
236 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
237 self._err_file = sys.stderr
239 self.cache = Cache(self)
241 if params.get('bidi_workaround', False):
244 master, slave = pty.openpty()
245 width = get_term_width()
249 width_args = ['-w', str(width)]
251 stdin=subprocess.PIPE,
253 stderr=self._err_file)
255 self._output_process = subprocess.Popen(
256 ['bidiv'] + width_args, **sp_kwargs
259 self._output_process = subprocess.Popen(
260 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
261 self._output_channel = os.fdopen(master, 'rb')
262 except OSError as ose:
264 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
268 if (sys.version_info >= (3,) and sys.platform != 'win32' and
269 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
270 and not params.get('restrictfilenames', False)):
271 # On Python 3, the Unicode filesystem API will throw errors (#1474)
273 'Assuming --restrict-filenames since file system encoding '
274 'cannot encode all characters. '
275 'Set the LC_ALL environment variable to fix this.')
276 self.params['restrictfilenames'] = True
278 if '%(stitle)s' in self.params.get('outtmpl', ''):
279 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
284 self.print_debug_header()
285 self.add_default_info_extractors()
287 for pp_def_raw in self.params.get('postprocessors', []):
288 pp_class = get_postprocessor(pp_def_raw['key'])
289 pp_def = dict(pp_def_raw)
291 pp = pp_class(self, **compat_kwargs(pp_def))
292 self.add_post_processor(pp)
294 for ph in self.params.get('progress_hooks', []):
295 self.add_progress_hook(ph)
297 def warn_if_short_id(self, argv):
298 # short YouTube ID starting with dash?
300 i for i, a in enumerate(argv)
301 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
305 [a for i, a in enumerate(argv) if i not in idxs] +
306 ['--'] + [argv[i] for i in idxs]
309 'Long argument string detected. '
310 'Use -- to separate parameters and URLs, like this:\n%s\n' %
311 args_to_str(correct_argv))
313 def add_info_extractor(self, ie):
314 """Add an InfoExtractor object to the end of the list."""
316 self._ies_instances[ie.ie_key()] = ie
317 ie.set_downloader(self)
319 def get_info_extractor(self, ie_key):
321 Get an instance of an IE with name ie_key, it will try to get one from
322 the _ies list, if there's no instance it will create a new one and add
323 it to the extractor list.
325 ie = self._ies_instances.get(ie_key)
327 ie = get_info_extractor(ie_key)()
328 self.add_info_extractor(ie)
331 def add_default_info_extractors(self):
333 Add the InfoExtractors returned by gen_extractors to the end of the list
335 for ie in gen_extractors():
336 self.add_info_extractor(ie)
338 def add_post_processor(self, pp):
339 """Add a PostProcessor object to the end of the chain."""
341 pp.set_downloader(self)
343 def add_progress_hook(self, ph):
344 """Add the progress hook (currently only for the file downloader)"""
345 self._progress_hooks.append(ph)
347 def _bidi_workaround(self, message):
348 if not hasattr(self, '_output_channel'):
351 assert hasattr(self, '_output_process')
352 assert isinstance(message, compat_str)
353 line_count = message.count('\n') + 1
354 self._output_process.stdin.write((message + '\n').encode('utf-8'))
355 self._output_process.stdin.flush()
356 res = ''.join(self._output_channel.readline().decode('utf-8')
357 for _ in range(line_count))
358 return res[:-len('\n')]
360 def to_screen(self, message, skip_eol=False):
361 """Print message to stdout if not in quiet mode."""
362 return self.to_stdout(message, skip_eol, check_quiet=True)
364 def _write_string(self, s, out=None):
365 write_string(s, out=out, encoding=self.params.get('encoding'))
367 def to_stdout(self, message, skip_eol=False, check_quiet=False):
368 """Print message to stdout if not in quiet mode."""
369 if self.params.get('logger'):
370 self.params['logger'].debug(message)
371 elif not check_quiet or not self.params.get('quiet', False):
372 message = self._bidi_workaround(message)
373 terminator = ['\n', ''][skip_eol]
374 output = message + terminator
376 self._write_string(output, self._screen_file)
378 def to_stderr(self, message):
379 """Print message to stderr."""
380 assert isinstance(message, compat_str)
381 if self.params.get('logger'):
382 self.params['logger'].error(message)
384 message = self._bidi_workaround(message)
385 output = message + '\n'
386 self._write_string(output, self._err_file)
388 def to_console_title(self, message):
389 if not self.params.get('consoletitle', False):
391 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
392 # c_wchar_p() might not be necessary if `message` is
393 # already of type unicode()
394 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
395 elif 'TERM' in os.environ:
396 self._write_string('\033]0;%s\007' % message, self._screen_file)
398 def save_console_title(self):
399 if not self.params.get('consoletitle', False):
401 if 'TERM' in os.environ:
402 # Save the title on stack
403 self._write_string('\033[22;0t', self._screen_file)
405 def restore_console_title(self):
406 if not self.params.get('consoletitle', False):
408 if 'TERM' in os.environ:
409 # Restore the title from stack
410 self._write_string('\033[23;0t', self._screen_file)
413 self.save_console_title()
416 def __exit__(self, *args):
417 self.restore_console_title()
419 if self.params.get('cookiefile') is not None:
420 self.cookiejar.save()
422 def trouble(self, message=None, tb=None):
423 """Determine action to take when a download problem appears.
425 Depending on if the downloader has been configured to ignore
426 download errors or not, this method may throw an exception or
427 not when errors are found, after printing the message.
429 tb, if given, is additional traceback information.
431 if message is not None:
432 self.to_stderr(message)
433 if self.params.get('verbose'):
435 if sys.exc_info()[0]: # if .trouble has been called from an except block
437 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
438 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
439 tb += compat_str(traceback.format_exc())
441 tb_data = traceback.format_list(traceback.extract_stack())
442 tb = ''.join(tb_data)
444 if not self.params.get('ignoreerrors', False):
445 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
446 exc_info = sys.exc_info()[1].exc_info
448 exc_info = sys.exc_info()
449 raise DownloadError(message, exc_info)
450 self._download_retcode = 1
452 def report_warning(self, message):
454 Print the message to stderr, it will be prefixed with 'WARNING:'
455 If stderr is a tty file the 'WARNING:' will be colored
457 if self.params.get('logger') is not None:
458 self.params['logger'].warning(message)
460 if self.params.get('no_warnings'):
462 if self._err_file.isatty() and os.name != 'nt':
463 _msg_header = '\033[0;33mWARNING:\033[0m'
465 _msg_header = 'WARNING:'
466 warning_message = '%s %s' % (_msg_header, message)
467 self.to_stderr(warning_message)
469 def report_error(self, message, tb=None):
471 Do the same as trouble, but prefixes the message with 'ERROR:', colored
472 in red if stderr is a tty file.
474 if self._err_file.isatty() and os.name != 'nt':
475 _msg_header = '\033[0;31mERROR:\033[0m'
477 _msg_header = 'ERROR:'
478 error_message = '%s %s' % (_msg_header, message)
479 self.trouble(error_message, tb)
481 def report_file_already_downloaded(self, file_name):
482 """Report file has already been fully downloaded."""
484 self.to_screen('[download] %s has already been downloaded' % file_name)
485 except UnicodeEncodeError:
486 self.to_screen('[download] The file has already been downloaded')
488 def prepare_filename(self, info_dict):
489 """Generate the output filename."""
491 template_dict = dict(info_dict)
493 template_dict['epoch'] = int(time.time())
494 autonumber_size = self.params.get('autonumber_size')
495 if autonumber_size is None:
497 autonumber_templ = '%0' + str(autonumber_size) + 'd'
498 template_dict['autonumber'] = autonumber_templ % self._num_downloads
499 if template_dict.get('playlist_index') is not None:
500 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
501 if template_dict.get('resolution') is None:
502 if template_dict.get('width') and template_dict.get('height'):
503 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
504 elif template_dict.get('height'):
505 template_dict['resolution'] = '%sp' % template_dict['height']
506 elif template_dict.get('width'):
507 template_dict['resolution'] = '?x%d' % template_dict['width']
509 sanitize = lambda k, v: sanitize_filename(
511 restricted=self.params.get('restrictfilenames'),
513 template_dict = dict((k, sanitize(k, v))
514 for k, v in template_dict.items()
516 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
518 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
519 tmpl = compat_expanduser(outtmpl)
520 filename = tmpl % template_dict
522 except ValueError as err:
523 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
526 def _match_entry(self, info_dict):
527 """ Returns None iff the file should be downloaded """
529 video_title = info_dict.get('title', info_dict.get('id', 'video'))
530 if 'title' in info_dict:
531 # This can happen when we're just evaluating the playlist
532 title = info_dict['title']
533 matchtitle = self.params.get('matchtitle', False)
535 if not re.search(matchtitle, title, re.IGNORECASE):
536 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
537 rejecttitle = self.params.get('rejecttitle', False)
539 if re.search(rejecttitle, title, re.IGNORECASE):
540 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
541 date = info_dict.get('upload_date', None)
543 dateRange = self.params.get('daterange', DateRange())
544 if date not in dateRange:
545 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
546 view_count = info_dict.get('view_count', None)
547 if view_count is not None:
548 min_views = self.params.get('min_views')
549 if min_views is not None and view_count < min_views:
550 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
551 max_views = self.params.get('max_views')
552 if max_views is not None and view_count > max_views:
553 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
554 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
555 return 'Skipping "%s" because it is age restricted' % title
556 if self.in_download_archive(info_dict):
557 return '%s has already been recorded in archive' % video_title
561 def add_extra_info(info_dict, extra_info):
562 '''Set the keys from extra_info in info dict if they are missing'''
563 for key, value in extra_info.items():
564 info_dict.setdefault(key, value)
566 def extract_info(self, url, download=True, ie_key=None, extra_info={},
569 Returns a list with a dictionary for each video we find.
570 If 'download', also downloads the videos.
571 extra_info is a dict containing the extra values to add to each result
575 ies = [self.get_info_extractor(ie_key)]
580 if not ie.suitable(url):
584 self.report_warning('The program functionality for this site has been marked as broken, '
585 'and will probably not work.')
588 ie_result = ie.extract(url)
589 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
591 if isinstance(ie_result, list):
592 # Backwards compatibility: old IE result format
594 '_type': 'compat_list',
595 'entries': ie_result,
597 self.add_default_extra_info(ie_result, ie, url)
599 return self.process_ie_result(ie_result, download, extra_info)
602 except ExtractorError as de: # An error we somewhat expected
603 self.report_error(compat_str(de), de.format_traceback())
605 except MaxDownloadsReached:
607 except Exception as e:
608 if self.params.get('ignoreerrors', False):
609 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
614 self.report_error('no suitable InfoExtractor for URL %s' % url)
616 def add_default_extra_info(self, ie_result, ie, url):
617 self.add_extra_info(ie_result, {
618 'extractor': ie.IE_NAME,
620 'webpage_url_basename': url_basename(url),
621 'extractor_key': ie.ie_key(),
624 def process_ie_result(self, ie_result, download=True, extra_info={}):
626 Take the result of the ie(may be modified) and resolve all unresolved
627 references (URLs, playlist items).
629 It will also download the videos if 'download'.
630 Returns the resolved ie_result.
633 result_type = ie_result.get('_type', 'video')
635 if result_type in ('url', 'url_transparent'):
636 extract_flat = self.params.get('extract_flat', False)
637 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
638 extract_flat is True):
639 if self.params.get('forcejson', False):
640 self.to_stdout(json.dumps(ie_result))
643 if result_type == 'video':
644 self.add_extra_info(ie_result, extra_info)
645 return self.process_video_result(ie_result, download=download)
646 elif result_type == 'url':
647 # We have to add extra_info to the results because it may be
648 # contained in a playlist
649 return self.extract_info(ie_result['url'],
651 ie_key=ie_result.get('ie_key'),
652 extra_info=extra_info)
653 elif result_type == 'url_transparent':
654 # Use the information from the embedding page
655 info = self.extract_info(
656 ie_result['url'], ie_key=ie_result.get('ie_key'),
657 extra_info=extra_info, download=False, process=False)
659 force_properties = dict(
660 (k, v) for k, v in ie_result.items() if v is not None)
661 for f in ('_type', 'url'):
662 if f in force_properties:
663 del force_properties[f]
664 new_result = info.copy()
665 new_result.update(force_properties)
667 assert new_result.get('_type') != 'url_transparent'
669 return self.process_ie_result(
670 new_result, download=download, extra_info=extra_info)
671 elif result_type == 'playlist' or result_type == 'multi_video':
672 # We process each entry in the playlist
673 playlist = ie_result.get('title', None) or ie_result.get('id', None)
674 self.to_screen('[download] Downloading playlist: %s' % playlist)
676 playlist_results = []
678 playliststart = self.params.get('playliststart', 1) - 1
679 playlistend = self.params.get('playlistend', None)
680 # For backwards compatibility, interpret -1 as whole list
681 if playlistend == -1:
684 ie_entries = ie_result['entries']
685 if isinstance(ie_entries, list):
686 n_all_entries = len(ie_entries)
687 entries = ie_entries[playliststart:playlistend]
688 n_entries = len(entries)
690 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
691 (ie_result['extractor'], playlist, n_all_entries, n_entries))
692 elif isinstance(ie_entries, PagedList):
693 entries = ie_entries.getslice(
694 playliststart, playlistend)
695 n_entries = len(entries)
697 "[%s] playlist %s: Downloading %d videos" %
698 (ie_result['extractor'], playlist, n_entries))
700 entries = list(itertools.islice(
701 ie_entries, playliststart, playlistend))
702 n_entries = len(entries)
704 "[%s] playlist %s: Downloading %d videos" %
705 (ie_result['extractor'], playlist, n_entries))
707 if self.params.get('playlistreverse', False):
708 entries = entries[::-1]
710 for i, entry in enumerate(entries, 1):
711 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
713 'n_entries': n_entries,
714 'playlist': playlist,
715 'playlist_id': ie_result.get('id'),
716 'playlist_title': ie_result.get('title'),
717 'playlist_index': i + playliststart,
718 'extractor': ie_result['extractor'],
719 'webpage_url': ie_result['webpage_url'],
720 'webpage_url_basename': url_basename(ie_result['webpage_url']),
721 'extractor_key': ie_result['extractor_key'],
724 reason = self._match_entry(entry)
725 if reason is not None:
726 self.to_screen('[download] ' + reason)
729 entry_result = self.process_ie_result(entry,
732 playlist_results.append(entry_result)
733 ie_result['entries'] = playlist_results
735 elif result_type == 'compat_list':
737 'Extractor %s returned a compat_list result. '
738 'It needs to be updated.' % ie_result.get('extractor'))
744 'extractor': ie_result['extractor'],
745 'webpage_url': ie_result['webpage_url'],
746 'webpage_url_basename': url_basename(ie_result['webpage_url']),
747 'extractor_key': ie_result['extractor_key'],
751 ie_result['entries'] = [
752 self.process_ie_result(_fixup(r), download, extra_info)
753 for r in ie_result['entries']
757 raise Exception('Invalid result type: %s' % result_type)
759 def select_format(self, format_spec, available_formats):
760 if format_spec == 'best' or format_spec is None:
761 return available_formats[-1]
762 elif format_spec == 'worst':
763 return available_formats[0]
764 elif format_spec == 'bestaudio':
766 f for f in available_formats
767 if f.get('vcodec') == 'none']
769 return audio_formats[-1]
770 elif format_spec == 'worstaudio':
772 f for f in available_formats
773 if f.get('vcodec') == 'none']
775 return audio_formats[0]
776 elif format_spec == 'bestvideo':
778 f for f in available_formats
779 if f.get('acodec') == 'none']
781 return video_formats[-1]
782 elif format_spec == 'worstvideo':
784 f for f in available_formats
785 if f.get('acodec') == 'none']
787 return video_formats[0]
789 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
790 if format_spec in extensions:
791 filter_f = lambda f: f['ext'] == format_spec
793 filter_f = lambda f: f['format_id'] == format_spec
794 matches = list(filter(filter_f, available_formats))
799 def process_video_result(self, info_dict, download=True):
800 assert info_dict.get('_type', 'video') == 'video'
802 if 'id' not in info_dict:
803 raise ExtractorError('Missing "id" field in extractor result')
804 if 'title' not in info_dict:
805 raise ExtractorError('Missing "title" field in extractor result')
807 if 'playlist' not in info_dict:
808 # It isn't part of a playlist
809 info_dict['playlist'] = None
810 info_dict['playlist_index'] = None
812 thumbnails = info_dict.get('thumbnails')
814 thumbnails.sort(key=lambda t: (
815 t.get('width'), t.get('height'), t.get('url')))
817 if 'width' in t and 'height' in t:
818 t['resolution'] = '%dx%d' % (t['width'], t['height'])
820 if thumbnails and 'thumbnail' not in info_dict:
821 info_dict['thumbnail'] = thumbnails[-1]['url']
823 if 'display_id' not in info_dict and 'id' in info_dict:
824 info_dict['display_id'] = info_dict['id']
826 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
827 # Working around negative timestamps in Windows
828 # (see http://bugs.python.org/issue1646728)
829 if info_dict['timestamp'] < 0 and os.name == 'nt':
830 info_dict['timestamp'] = 0
831 upload_date = datetime.datetime.utcfromtimestamp(
832 info_dict['timestamp'])
833 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
835 # This extractors handle format selection themselves
836 if info_dict['extractor'] in ['Youku']:
838 self.process_info(info_dict)
841 # We now pick which formats have to be downloaded
842 if info_dict.get('formats') is None:
843 # There's only one format available
844 formats = [info_dict]
846 formats = info_dict['formats']
849 raise ExtractorError('No video formats found!')
851 # We check that all the formats have the format and format_id fields
852 for i, format in enumerate(formats):
853 if 'url' not in format:
854 raise ExtractorError('Missing "url" key in result (index %d)' % i)
856 if format.get('format_id') is None:
857 format['format_id'] = compat_str(i)
858 if format.get('format') is None:
859 format['format'] = '{id} - {res}{note}'.format(
860 id=format['format_id'],
861 res=self.format_resolution(format),
862 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
864 # Automatically determine file extension if missing
865 if 'ext' not in format:
866 format['ext'] = determine_ext(format['url']).lower()
868 format_limit = self.params.get('format_limit', None)
870 formats = list(takewhile_inclusive(
871 lambda f: f['format_id'] != format_limit, formats
874 # TODO Central sorting goes here
876 if formats[0] is not info_dict:
877 # only set the 'formats' fields if the original info_dict list them
878 # otherwise we end up with a circular reference, the first (and unique)
879 # element in the 'formats' field in info_dict is info_dict itself,
880 # wich can't be exported to json
881 info_dict['formats'] = formats
882 if self.params.get('listformats', None):
883 self.list_formats(info_dict)
886 req_format = self.params.get('format')
887 if req_format is None:
889 formats_to_download = []
890 # The -1 is for supporting YoutubeIE
891 if req_format in ('-1', 'all'):
892 formats_to_download = formats
894 for rfstr in req_format.split(','):
895 # We can accept formats requested in the format: 34/5/best, we pick
896 # the first that is available, starting from left
897 req_formats = rfstr.split('/')
898 for rf in req_formats:
899 if re.match(r'.+?\+.+?', rf) is not None:
900 # Two formats have been requested like '137+139'
901 format_1, format_2 = rf.split('+')
902 formats_info = (self.select_format(format_1, formats),
903 self.select_format(format_2, formats))
904 if all(formats_info):
905 # The first format must contain the video and the
907 if formats_info[0].get('vcodec') == 'none':
908 self.report_error('The first format must '
909 'contain the video, try using '
910 '"-f %s+%s"' % (format_2, format_1))
913 'requested_formats': formats_info,
915 'ext': formats_info[0]['ext'],
916 'width': formats_info[0].get('width'),
917 'height': formats_info[0].get('height'),
918 'resolution': formats_info[0].get('resolution'),
919 'fps': formats_info[0].get('fps'),
920 'vcodec': formats_info[0].get('vcodec'),
921 'vbr': formats_info[0].get('vbr'),
922 'acodec': formats_info[1].get('acodec'),
923 'abr': formats_info[1].get('abr'),
926 selected_format = None
928 selected_format = self.select_format(rf, formats)
929 if selected_format is not None:
930 formats_to_download.append(selected_format)
932 if not formats_to_download:
933 raise ExtractorError('requested format not available',
937 if len(formats_to_download) > 1:
938 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
939 for format in formats_to_download:
940 new_info = dict(info_dict)
941 new_info.update(format)
942 self.process_info(new_info)
943 # We update the info dict with the best quality format (backwards compatibility)
944 info_dict.update(formats_to_download[-1])
947 def process_info(self, info_dict):
948 """Process a single resolved IE result."""
950 assert info_dict.get('_type', 'video') == 'video'
952 max_downloads = self.params.get('max_downloads')
953 if max_downloads is not None:
954 if self._num_downloads >= int(max_downloads):
955 raise MaxDownloadsReached()
957 info_dict['fulltitle'] = info_dict['title']
958 if len(info_dict['title']) > 200:
959 info_dict['title'] = info_dict['title'][:197] + '...'
961 # Keep for backwards compatibility
962 info_dict['stitle'] = info_dict['title']
964 if 'format' not in info_dict:
965 info_dict['format'] = info_dict['ext']
967 reason = self._match_entry(info_dict)
968 if reason is not None:
969 self.to_screen('[download] ' + reason)
972 self._num_downloads += 1
974 filename = self.prepare_filename(info_dict)
977 if self.params.get('forcetitle', False):
978 self.to_stdout(info_dict['fulltitle'])
979 if self.params.get('forceid', False):
980 self.to_stdout(info_dict['id'])
981 if self.params.get('forceurl', False):
982 if info_dict.get('requested_formats') is not None:
983 for f in info_dict['requested_formats']:
984 self.to_stdout(f['url'] + f.get('play_path', ''))
986 # For RTMP URLs, also include the playpath
987 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
988 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
989 self.to_stdout(info_dict['thumbnail'])
990 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
991 self.to_stdout(info_dict['description'])
992 if self.params.get('forcefilename', False) and filename is not None:
993 self.to_stdout(filename)
994 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
995 self.to_stdout(formatSeconds(info_dict['duration']))
996 if self.params.get('forceformat', False):
997 self.to_stdout(info_dict['format'])
998 if self.params.get('forcejson', False):
999 info_dict['_filename'] = filename
1000 self.to_stdout(json.dumps(info_dict))
1001 if self.params.get('dump_single_json', False):
1002 info_dict['_filename'] = filename
1004 # Do nothing else if in simulate mode
1005 if self.params.get('simulate', False):
1008 if filename is None:
1012 dn = os.path.dirname(encodeFilename(filename))
1013 if dn and not os.path.exists(dn):
1015 except (OSError, IOError) as err:
1016 self.report_error('unable to create directory ' + compat_str(err))
1019 if self.params.get('writedescription', False):
1020 descfn = filename + '.description'
1021 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1022 self.to_screen('[info] Video description is already present')
1023 elif info_dict.get('description') is None:
1024 self.report_warning('There\'s no description to write.')
1027 self.to_screen('[info] Writing video description to: ' + descfn)
1028 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1029 descfile.write(info_dict['description'])
1030 except (OSError, IOError):
1031 self.report_error('Cannot write description file ' + descfn)
1034 if self.params.get('writeannotations', False):
1035 annofn = filename + '.annotations.xml'
1036 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1037 self.to_screen('[info] Video annotations are already present')
1040 self.to_screen('[info] Writing video annotations to: ' + annofn)
1041 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1042 annofile.write(info_dict['annotations'])
1043 except (KeyError, TypeError):
1044 self.report_warning('There are no annotations to write.')
1045 except (OSError, IOError):
1046 self.report_error('Cannot write annotations file: ' + annofn)
1049 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1050 self.params.get('writeautomaticsub')])
1052 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1053 # subtitles download errors are already managed as troubles in relevant IE
1054 # that way it will silently go on when used with unsupporting IE
1055 subtitles = info_dict['subtitles']
1056 sub_format = self.params.get('subtitlesformat', 'srt')
1057 for sub_lang in subtitles.keys():
1058 sub = subtitles[sub_lang]
1062 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1063 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1064 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1066 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1067 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1069 except (OSError, IOError):
1070 self.report_error('Cannot write subtitles file ' + sub_filename)
1073 if self.params.get('writeinfojson', False):
1074 infofn = os.path.splitext(filename)[0] + '.info.json'
1075 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1076 self.to_screen('[info] Video description metadata is already present')
1078 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1080 write_json_file(info_dict, infofn)
1081 except (OSError, IOError):
1082 self.report_error('Cannot write metadata to JSON file ' + infofn)
1085 if self.params.get('writethumbnail', False):
1086 if info_dict.get('thumbnail') is not None:
1087 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1088 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1089 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1090 self.to_screen('[%s] %s: Thumbnail is already present' %
1091 (info_dict['extractor'], info_dict['id']))
1093 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1094 (info_dict['extractor'], info_dict['id']))
1096 uf = self.urlopen(info_dict['thumbnail'])
1097 with open(thumb_filename, 'wb') as thumbf:
1098 shutil.copyfileobj(uf, thumbf)
1099 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1100 (info_dict['extractor'], info_dict['id'], thumb_filename))
1101 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1102 self.report_warning('Unable to download thumbnail "%s": %s' %
1103 (info_dict['thumbnail'], compat_str(err)))
1105 if not self.params.get('skip_download', False):
1106 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1111 fd = get_suitable_downloader(info)(self, self.params)
1112 for ph in self._progress_hooks:
1113 fd.add_progress_hook(ph)
1114 if self.params.get('verbose'):
1115 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1116 return fd.download(name, info)
1117 if info_dict.get('requested_formats') is not None:
1120 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1121 if not merger._executable:
1123 self.report_warning('You have requested multiple '
1124 'formats but ffmpeg or avconv are not installed.'
1125 ' The formats won\'t be merged')
1127 postprocessors = [merger]
1128 for f in info_dict['requested_formats']:
1129 new_info = dict(info_dict)
1131 fname = self.prepare_filename(new_info)
1132 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1133 downloaded.append(fname)
1134 partial_success = dl(fname, new_info)
1135 success = success and partial_success
1136 info_dict['__postprocessors'] = postprocessors
1137 info_dict['__files_to_merge'] = downloaded
1139 # Just a single file
1140 success = dl(filename, info_dict)
1141 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1142 self.report_error('unable to download video data: %s' % str(err))
1144 except (OSError, IOError) as err:
1145 raise UnavailableVideoError(err)
1146 except (ContentTooShortError, ) as err:
1147 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1152 self.post_process(filename, info_dict)
1153 except (PostProcessingError) as err:
1154 self.report_error('postprocessing: %s' % str(err))
1156 self.record_download_archive(info_dict)
1158 def download(self, url_list):
1159 """Download a given list of URLs."""
1160 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1161 if (len(url_list) > 1 and
1163 and self.params.get('max_downloads') != 1):
1164 raise SameFileError(outtmpl)
1166 for url in url_list:
1168 # It also downloads the videos
1169 res = self.extract_info(url)
1170 except UnavailableVideoError:
1171 self.report_error('unable to download video')
1172 except MaxDownloadsReached:
1173 self.to_screen('[info] Maximum number of downloaded files reached.')
1176 if self.params.get('dump_single_json', False):
1177 self.to_stdout(json.dumps(res))
1179 return self._download_retcode
1181 def download_with_info_file(self, info_filename):
1182 with io.open(info_filename, 'r', encoding='utf-8') as f:
1185 self.process_ie_result(info, download=True)
1186 except DownloadError:
1187 webpage_url = info.get('webpage_url')
1188 if webpage_url is not None:
1189 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1190 return self.download([webpage_url])
1193 return self._download_retcode
1195 def post_process(self, filename, ie_info):
1196 """Run all the postprocessors on the given file."""
1197 info = dict(ie_info)
1198 info['filepath'] = filename
1201 if ie_info.get('__postprocessors') is not None:
1202 pps_chain.extend(ie_info['__postprocessors'])
1203 pps_chain.extend(self._pps)
1204 for pp in pps_chain:
1206 keep_video_wish, new_info = pp.run(info)
1207 if keep_video_wish is not None:
1209 keep_video = keep_video_wish
1210 elif keep_video is None:
1211 # No clear decision yet, let IE decide
1212 keep_video = keep_video_wish
1213 except PostProcessingError as e:
1214 self.report_error(e.msg)
1215 if keep_video is False and not self.params.get('keepvideo', False):
1217 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1218 os.remove(encodeFilename(filename))
1219 except (IOError, OSError):
1220 self.report_warning('Unable to remove downloaded video file')
1222 def _make_archive_id(self, info_dict):
1223 # Future-proof against any change in case
1224 # and backwards compatibility with prior versions
1225 extractor = info_dict.get('extractor_key')
1226 if extractor is None:
1227 if 'id' in info_dict:
1228 extractor = info_dict.get('ie_key') # key in a playlist
1229 if extractor is None:
1230 return None # Incomplete video information
1231 return extractor.lower() + ' ' + info_dict['id']
1233 def in_download_archive(self, info_dict):
1234 fn = self.params.get('download_archive')
1238 vid_id = self._make_archive_id(info_dict)
1240 return False # Incomplete video information
1243 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1244 for line in archive_file:
1245 if line.strip() == vid_id:
1247 except IOError as ioe:
1248 if ioe.errno != errno.ENOENT:
1252 def record_download_archive(self, info_dict):
1253 fn = self.params.get('download_archive')
1256 vid_id = self._make_archive_id(info_dict)
1258 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1259 archive_file.write(vid_id + '\n')
1262 def format_resolution(format, default='unknown'):
1263 if format.get('vcodec') == 'none':
1265 if format.get('resolution') is not None:
1266 return format['resolution']
1267 if format.get('height') is not None:
1268 if format.get('width') is not None:
1269 res = '%sx%s' % (format['width'], format['height'])
1271 res = '%sp' % format['height']
1272 elif format.get('width') is not None:
1273 res = '?x%d' % format['width']
1278 def _format_note(self, fdict):
1280 if fdict.get('ext') in ['f4f', 'f4m']:
1281 res += '(unsupported) '
1282 if fdict.get('format_note') is not None:
1283 res += fdict['format_note'] + ' '
1284 if fdict.get('tbr') is not None:
1285 res += '%4dk ' % fdict['tbr']
1286 if fdict.get('container') is not None:
1289 res += '%s container' % fdict['container']
1290 if (fdict.get('vcodec') is not None and
1291 fdict.get('vcodec') != 'none'):
1294 res += fdict['vcodec']
1295 if fdict.get('vbr') is not None:
1297 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1299 if fdict.get('vbr') is not None:
1300 res += '%4dk' % fdict['vbr']
1301 if fdict.get('fps') is not None:
1302 res += ', %sfps' % fdict['fps']
1303 if fdict.get('acodec') is not None:
1306 if fdict['acodec'] == 'none':
1309 res += '%-5s' % fdict['acodec']
1310 elif fdict.get('abr') is not None:
1314 if fdict.get('abr') is not None:
1315 res += '@%3dk' % fdict['abr']
1316 if fdict.get('asr') is not None:
1317 res += ' (%5dHz)' % fdict['asr']
1318 if fdict.get('filesize') is not None:
1321 res += format_bytes(fdict['filesize'])
1322 elif fdict.get('filesize_approx') is not None:
1325 res += '~' + format_bytes(fdict['filesize_approx'])
1328 def list_formats(self, info_dict):
1329 def line(format, idlen=20):
1330 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1331 format['format_id'],
1333 self.format_resolution(format),
1334 self._format_note(format),
1337 formats = info_dict.get('formats', [info_dict])
1338 idlen = max(len('format code'),
1339 max(len(f['format_id']) for f in formats))
1341 line(f, idlen) for f in formats
1342 if f.get('preference') is None or f['preference'] >= -1000]
1343 if len(formats) > 1:
1344 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1345 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1347 header_line = line({
1348 'format_id': 'format code', 'ext': 'extension',
1349 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1350 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1351 (info_dict['id'], header_line, '\n'.join(formats_s)))
1353 def urlopen(self, req):
1354 """ Start an HTTP download """
1356 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1357 # always respected by websites, some tend to give out URLs with non percent-encoded
1358 # non-ASCII characters (see telemb.py, ard.py [#3412])
1359 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1360 # To work around aforementioned issue we will replace request's original URL with
1361 # percent-encoded one
1362 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1363 url = req if req_is_string else req.get_full_url()
1364 url_escaped = escape_url(url)
1366 # Substitute URL if any change after escaping
1367 if url != url_escaped:
1371 req = compat_urllib_request.Request(
1372 url_escaped, data=req.data, headers=req.headers,
1373 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1375 return self._opener.open(req, timeout=self._socket_timeout)
1377 def print_debug_header(self):
1378 if not self.params.get('verbose'):
1381 if type('') is not compat_str:
1382 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1383 self.report_warning(
1384 'Your Python is broken! Update to a newer and supported version')
1386 stdout_encoding = getattr(
1387 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1389 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1390 locale.getpreferredencoding(),
1391 sys.getfilesystemencoding(),
1393 self.get_encoding()))
1394 write_string(encoding_str, encoding=None)
1396 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1398 sp = subprocess.Popen(
1399 ['git', 'rev-parse', '--short', 'HEAD'],
1400 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1401 cwd=os.path.dirname(os.path.abspath(__file__)))
1402 out, err = sp.communicate()
1403 out = out.decode().strip()
1404 if re.match('[0-9a-f]+', out):
1405 self._write_string('[debug] Git HEAD: ' + out + '\n')
1411 self._write_string('[debug] Python version %s - %s\n' % (
1412 platform.python_version(), platform_name()))
1414 exe_versions = FFmpegPostProcessor.get_versions()
1415 exe_versions['rtmpdump'] = rtmpdump_version()
1416 exe_str = ', '.join(
1418 for exe, v in sorted(exe_versions.items())
1423 self._write_string('[debug] exe versions: %s\n' % exe_str)
1426 for handler in self._opener.handlers:
1427 if hasattr(handler, 'proxies'):
1428 proxy_map.update(handler.proxies)
1429 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1431 def _setup_opener(self):
1432 timeout_val = self.params.get('socket_timeout')
1433 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1435 opts_cookiefile = self.params.get('cookiefile')
1436 opts_proxy = self.params.get('proxy')
1438 if opts_cookiefile is None:
1439 self.cookiejar = compat_cookiejar.CookieJar()
1441 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1443 if os.access(opts_cookiefile, os.R_OK):
1444 self.cookiejar.load()
1446 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1448 if opts_proxy is not None:
1449 if opts_proxy == '':
1452 proxies = {'http': opts_proxy, 'https': opts_proxy}
1454 proxies = compat_urllib_request.getproxies()
1455 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1456 if 'http' in proxies and 'https' not in proxies:
1457 proxies['https'] = proxies['http']
1458 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1460 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1461 https_handler = make_HTTPS_handler(
1462 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1463 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1464 opener = compat_urllib_request.build_opener(
1465 https_handler, proxy_handler, cookie_processor, ydlh)
1466 # Delete the default user-agent header, which would otherwise apply in
1467 # cases where our custom HTTP handler doesn't come into play
1468 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1469 opener.addheaders = []
1470 self._opener = opener
1472 def encode(self, s):
1473 if isinstance(s, bytes):
1474 return s # Already encoded
1477 return s.encode(self.get_encoding())
1478 except UnicodeEncodeError as err:
1479 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1482 def get_encoding(self):
1483 encoding = self.params.get('encoding')
1484 if encoding is None:
1485 encoding = preferredencoding()