2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_urllib_request,
59 UnavailableVideoError,
68 from .cache import Cache
69 from .extractor import get_info_extractor, gen_extractors
70 from .downloader import get_suitable_downloader
71 from .downloader.rtmp import rtmpdump_version
72 from .postprocessor import (
77 from .version import __version__
80 class YoutubeDL(object):
83 YoutubeDL objects are the ones responsible of downloading the
84 actual video file and writing it to disk if the user has requested
85 it, among some other tasks. In most cases there should be one per
86 program. As, given a video URL, the downloader doesn't know how to
87 extract all the needed information, task that InfoExtractors do, it
88 has to pass the URL to one of them.
90 For this, YoutubeDL objects have a method that allows
91 InfoExtractors to be registered in a given order. When it is passed
92 a URL, the YoutubeDL object handles it to the first InfoExtractor it
93 finds that reports being able to handle it. The InfoExtractor extracts
94 all the information about the video or videos the URL refers to, and
95 YoutubeDL process the extracted information, possibly using a File
96 Downloader to download the video.
98 YoutubeDL objects accept a lot of parameters. In order not to saturate
99 the object constructor with arguments, it receives a dictionary of
100 options instead. These options are available through the params
101 attribute for the InfoExtractors to use. The YoutubeDL also
102 registers itself as the downloader in charge for the InfoExtractors
103 that are added to it, so this is a "mutual registration".
107 username: Username for authentication purposes.
108 password: Password for authentication purposes.
109 videopassword: Password for acces a video.
110 usenetrc: Use netrc for authentication instead.
111 verbose: Print additional info to stdout.
112 quiet: Do not print messages to stdout.
113 no_warnings: Do not print out anything for warnings.
114 forceurl: Force printing final URL.
115 forcetitle: Force printing title.
116 forceid: Force printing ID.
117 forcethumbnail: Force printing thumbnail URL.
118 forcedescription: Force printing description.
119 forcefilename: Force printing final filename.
120 forceduration: Force printing duration.
121 forcejson: Force printing info_dict as JSON.
122 dump_single_json: Force printing the info_dict of the whole playlist
123 (or video) as a single JSON line.
124 simulate: Do not download the video files.
125 format: Video format code. See options.py for more information.
126 format_limit: Highest quality format to try.
127 outtmpl: Template for output names.
128 restrictfilenames: Do not allow "&" and spaces in file names
129 ignoreerrors: Do not stop on download errors.
130 nooverwrites: Prevent overwriting files.
131 playliststart: Playlist item to start at.
132 playlistend: Playlist item to end at.
133 playlistreverse: Download playlist items in reverse order.
134 matchtitle: Download only matching titles.
135 rejecttitle: Reject downloads for matching titles.
136 logger: Log messages to a logging.Logger instance.
137 logtostderr: Log messages to stderr instead of stdout.
138 writedescription: Write the video description to a .description file
139 writeinfojson: Write the video description to a .info.json file
140 writeannotations: Write the video annotations to a .annotations.xml file
141 writethumbnail: Write the thumbnail image to a file
142 writesubtitles: Write the video subtitles to a file
143 writeautomaticsub: Write the automatic subtitles to a file
144 allsubtitles: Downloads all the subtitles of the video
145 (requires writesubtitles or writeautomaticsub)
146 listsubtitles: Lists all available subtitles for the video
147 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
148 subtitleslangs: List of languages of the subtitles to download
149 keepvideo: Keep the video file after post-processing
150 daterange: A DateRange object, download only if the upload_date is in the range.
151 skip_download: Skip the actual download of the video file
152 cachedir: Location of the cache files in the filesystem.
153 False to disable filesystem cache.
154 noplaylist: Download single video instead of a playlist if in doubt.
155 age_limit: An integer representing the user's age in years.
156 Unsuitable videos for the given age are skipped.
157 min_views: An integer representing the minimum view count the video
158 must have in order to not be skipped.
159 Videos without view count information are always
160 downloaded. None for no limit.
161 max_views: An integer representing the maximum view count.
162 Videos that are more popular than that are not
164 Videos without view count information are always
165 downloaded. None for no limit.
166 download_archive: File name of a file where all downloads are recorded.
167 Videos already present in the file are not downloaded
169 cookiefile: File name where cookies should be read from and dumped to.
170 nocheckcertificate:Do not verify SSL certificates
171 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
172 At the moment, this is only supported by YouTube.
173 proxy: URL of the proxy server to use
174 socket_timeout: Time to wait for unresponsive hosts, in seconds
175 bidi_workaround: Work around buggy terminals without bidirectional text
176 support, using fridibi
177 debug_printtraffic:Print out sent and received HTTP traffic
178 include_ads: Download ads as well
179 default_search: Prepend this string if an input url is not valid.
180 'auto' for elaborate guessing
181 encoding: Use this encoding instead of the system-specified.
182 extract_flat: Do not resolve URLs, return the immediate result.
183 Pass in 'in_playlist' to only show this behavior for
185 postprocessors: A list of dictionaries, each with an entry
186 * key: The name of the postprocessor. See
187 youtube_dl/postprocessor/__init__.py for a list.
188 as well as any further keyword arguments for the
190 progress_hooks: A list of functions that get called on download
191 progress, with a dictionary with the entries
192 * filename: The final filename
193 * status: One of "downloading" and "finished"
195 The dict may also have some of the following entries:
197 * downloaded_bytes: Bytes on disk
198 * total_bytes: Size of the whole file, None if unknown
199 * tmpfilename: The filename we're currently writing to
200 * eta: The estimated time in seconds, None if unknown
201 * speed: The download speed in bytes/second, None if
204 Progress hooks are guaranteed to be called at least once
205 (with status "finished") if the download is successful.
208 The following parameters are not used by YoutubeDL itself, they are used by
210 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
211 noresizebuffer, retries, continuedl, noprogress, consoletitle
213 The following options are used by the post processors:
214 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
215 otherwise prefer avconv.
216 exec_cmd: Arbitrary command to run after downloading
222 _download_retcode = None
223 _num_downloads = None
226 def __init__(self, params=None, auto_init=True):
227 """Create a FileDownloader object with the given options."""
231 self._ies_instances = {}
233 self._progress_hooks = []
234 self._download_retcode = 0
235 self._num_downloads = 0
236 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
237 self._err_file = sys.stderr
239 self.cache = Cache(self)
241 if params.get('bidi_workaround', False):
244 master, slave = pty.openpty()
245 width = get_term_width()
249 width_args = ['-w', str(width)]
251 stdin=subprocess.PIPE,
253 stderr=self._err_file)
255 self._output_process = subprocess.Popen(
256 ['bidiv'] + width_args, **sp_kwargs
259 self._output_process = subprocess.Popen(
260 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
261 self._output_channel = os.fdopen(master, 'rb')
262 except OSError as ose:
264 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
268 if (sys.version_info >= (3,) and sys.platform != 'win32' and
269 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
270 and not params.get('restrictfilenames', False)):
271 # On Python 3, the Unicode filesystem API will throw errors (#1474)
273 'Assuming --restrict-filenames since file system encoding '
274 'cannot encode all characters. '
275 'Set the LC_ALL environment variable to fix this.')
276 self.params['restrictfilenames'] = True
278 if '%(stitle)s' in self.params.get('outtmpl', ''):
279 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
284 self.print_debug_header()
285 self.add_default_info_extractors()
287 for pp_def_raw in self.params.get('postprocessors', []):
288 pp_class = get_postprocessor(pp_def_raw['key'])
289 pp_def = dict(pp_def_raw)
291 pp = pp_class(self, **compat_kwargs(pp_def))
292 self.add_post_processor(pp)
294 for ph in self.params.get('progress_hooks', []):
295 self.add_progress_hook(ph)
297 def warn_if_short_id(self, argv):
298 # short YouTube ID starting with dash?
300 i for i, a in enumerate(argv)
301 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
305 [a for i, a in enumerate(argv) if i not in idxs] +
306 ['--'] + [argv[i] for i in idxs]
309 'Long argument string detected. '
310 'Use -- to separate parameters and URLs, like this:\n%s\n' %
311 args_to_str(correct_argv))
313 def add_info_extractor(self, ie):
314 """Add an InfoExtractor object to the end of the list."""
316 self._ies_instances[ie.ie_key()] = ie
317 ie.set_downloader(self)
319 def get_info_extractor(self, ie_key):
321 Get an instance of an IE with name ie_key, it will try to get one from
322 the _ies list, if there's no instance it will create a new one and add
323 it to the extractor list.
325 ie = self._ies_instances.get(ie_key)
327 ie = get_info_extractor(ie_key)()
328 self.add_info_extractor(ie)
331 def add_default_info_extractors(self):
333 Add the InfoExtractors returned by gen_extractors to the end of the list
335 for ie in gen_extractors():
336 self.add_info_extractor(ie)
338 def add_post_processor(self, pp):
339 """Add a PostProcessor object to the end of the chain."""
341 pp.set_downloader(self)
343 def add_progress_hook(self, ph):
344 """Add the progress hook (currently only for the file downloader)"""
345 self._progress_hooks.append(ph)
347 def _bidi_workaround(self, message):
348 if not hasattr(self, '_output_channel'):
351 assert hasattr(self, '_output_process')
352 assert isinstance(message, compat_str)
353 line_count = message.count('\n') + 1
354 self._output_process.stdin.write((message + '\n').encode('utf-8'))
355 self._output_process.stdin.flush()
356 res = ''.join(self._output_channel.readline().decode('utf-8')
357 for _ in range(line_count))
358 return res[:-len('\n')]
360 def to_screen(self, message, skip_eol=False):
361 """Print message to stdout if not in quiet mode."""
362 return self.to_stdout(message, skip_eol, check_quiet=True)
364 def _write_string(self, s, out=None):
365 write_string(s, out=out, encoding=self.params.get('encoding'))
367 def to_stdout(self, message, skip_eol=False, check_quiet=False):
368 """Print message to stdout if not in quiet mode."""
369 if self.params.get('logger'):
370 self.params['logger'].debug(message)
371 elif not check_quiet or not self.params.get('quiet', False):
372 message = self._bidi_workaround(message)
373 terminator = ['\n', ''][skip_eol]
374 output = message + terminator
376 self._write_string(output, self._screen_file)
378 def to_stderr(self, message):
379 """Print message to stderr."""
380 assert isinstance(message, compat_str)
381 if self.params.get('logger'):
382 self.params['logger'].error(message)
384 message = self._bidi_workaround(message)
385 output = message + '\n'
386 self._write_string(output, self._err_file)
388 def to_console_title(self, message):
389 if not self.params.get('consoletitle', False):
391 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
392 # c_wchar_p() might not be necessary if `message` is
393 # already of type unicode()
394 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
395 elif 'TERM' in os.environ:
396 self._write_string('\033]0;%s\007' % message, self._screen_file)
398 def save_console_title(self):
399 if not self.params.get('consoletitle', False):
401 if 'TERM' in os.environ:
402 # Save the title on stack
403 self._write_string('\033[22;0t', self._screen_file)
405 def restore_console_title(self):
406 if not self.params.get('consoletitle', False):
408 if 'TERM' in os.environ:
409 # Restore the title from stack
410 self._write_string('\033[23;0t', self._screen_file)
413 self.save_console_title()
416 def __exit__(self, *args):
417 self.restore_console_title()
419 if self.params.get('cookiefile') is not None:
420 self.cookiejar.save()
422 def trouble(self, message=None, tb=None):
423 """Determine action to take when a download problem appears.
425 Depending on if the downloader has been configured to ignore
426 download errors or not, this method may throw an exception or
427 not when errors are found, after printing the message.
429 tb, if given, is additional traceback information.
431 if message is not None:
432 self.to_stderr(message)
433 if self.params.get('verbose'):
435 if sys.exc_info()[0]: # if .trouble has been called from an except block
437 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
438 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
439 tb += compat_str(traceback.format_exc())
441 tb_data = traceback.format_list(traceback.extract_stack())
442 tb = ''.join(tb_data)
444 if not self.params.get('ignoreerrors', False):
445 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
446 exc_info = sys.exc_info()[1].exc_info
448 exc_info = sys.exc_info()
449 raise DownloadError(message, exc_info)
450 self._download_retcode = 1
452 def report_warning(self, message):
454 Print the message to stderr, it will be prefixed with 'WARNING:'
455 If stderr is a tty file the 'WARNING:' will be colored
457 if self.params.get('logger') is not None:
458 self.params['logger'].warning(message)
460 if self.params.get('no_warnings'):
462 if self._err_file.isatty() and os.name != 'nt':
463 _msg_header = '\033[0;33mWARNING:\033[0m'
465 _msg_header = 'WARNING:'
466 warning_message = '%s %s' % (_msg_header, message)
467 self.to_stderr(warning_message)
469 def report_error(self, message, tb=None):
471 Do the same as trouble, but prefixes the message with 'ERROR:', colored
472 in red if stderr is a tty file.
474 if self._err_file.isatty() and os.name != 'nt':
475 _msg_header = '\033[0;31mERROR:\033[0m'
477 _msg_header = 'ERROR:'
478 error_message = '%s %s' % (_msg_header, message)
479 self.trouble(error_message, tb)
481 def report_file_already_downloaded(self, file_name):
482 """Report file has already been fully downloaded."""
484 self.to_screen('[download] %s has already been downloaded' % file_name)
485 except UnicodeEncodeError:
486 self.to_screen('[download] The file has already been downloaded')
488 def prepare_filename(self, info_dict):
489 """Generate the output filename."""
491 template_dict = dict(info_dict)
493 template_dict['epoch'] = int(time.time())
494 autonumber_size = self.params.get('autonumber_size')
495 if autonumber_size is None:
497 autonumber_templ = '%0' + str(autonumber_size) + 'd'
498 template_dict['autonumber'] = autonumber_templ % self._num_downloads
499 if template_dict.get('playlist_index') is not None:
500 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
501 if template_dict.get('resolution') is None:
502 if template_dict.get('width') and template_dict.get('height'):
503 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
504 elif template_dict.get('height'):
505 template_dict['resolution'] = '%sp' % template_dict['height']
506 elif template_dict.get('width'):
507 template_dict['resolution'] = '?x%d' % template_dict['width']
509 sanitize = lambda k, v: sanitize_filename(
511 restricted=self.params.get('restrictfilenames'),
513 template_dict = dict((k, sanitize(k, v))
514 for k, v in template_dict.items()
516 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
518 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
519 tmpl = compat_expanduser(outtmpl)
520 filename = tmpl % template_dict
522 except ValueError as err:
523 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
526 def _match_entry(self, info_dict):
527 """ Returns None iff the file should be downloaded """
529 video_title = info_dict.get('title', info_dict.get('id', 'video'))
530 if 'title' in info_dict:
531 # This can happen when we're just evaluating the playlist
532 title = info_dict['title']
533 matchtitle = self.params.get('matchtitle', False)
535 if not re.search(matchtitle, title, re.IGNORECASE):
536 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
537 rejecttitle = self.params.get('rejecttitle', False)
539 if re.search(rejecttitle, title, re.IGNORECASE):
540 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
541 date = info_dict.get('upload_date', None)
543 dateRange = self.params.get('daterange', DateRange())
544 if date not in dateRange:
545 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
546 view_count = info_dict.get('view_count', None)
547 if view_count is not None:
548 min_views = self.params.get('min_views')
549 if min_views is not None and view_count < min_views:
550 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
551 max_views = self.params.get('max_views')
552 if max_views is not None and view_count > max_views:
553 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
554 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
555 return 'Skipping "%s" because it is age restricted' % title
556 if self.in_download_archive(info_dict):
557 return '%s has already been recorded in archive' % video_title
561 def add_extra_info(info_dict, extra_info):
562 '''Set the keys from extra_info in info dict if they are missing'''
563 for key, value in extra_info.items():
564 info_dict.setdefault(key, value)
566 def extract_info(self, url, download=True, ie_key=None, extra_info={},
569 Returns a list with a dictionary for each video we find.
570 If 'download', also downloads the videos.
571 extra_info is a dict containing the extra values to add to each result
575 ies = [self.get_info_extractor(ie_key)]
580 if not ie.suitable(url):
584 self.report_warning('The program functionality for this site has been marked as broken, '
585 'and will probably not work.')
588 ie_result = ie.extract(url)
589 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
591 if isinstance(ie_result, list):
592 # Backwards compatibility: old IE result format
594 '_type': 'compat_list',
595 'entries': ie_result,
597 self.add_default_extra_info(ie_result, ie, url)
599 return self.process_ie_result(ie_result, download, extra_info)
602 except ExtractorError as de: # An error we somewhat expected
603 self.report_error(compat_str(de), de.format_traceback())
605 except MaxDownloadsReached:
607 except Exception as e:
608 if self.params.get('ignoreerrors', False):
609 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
614 self.report_error('no suitable InfoExtractor for URL %s' % url)
616 def add_default_extra_info(self, ie_result, ie, url):
617 self.add_extra_info(ie_result, {
618 'extractor': ie.IE_NAME,
620 'webpage_url_basename': url_basename(url),
621 'extractor_key': ie.ie_key(),
624 def process_ie_result(self, ie_result, download=True, extra_info={}):
626 Take the result of the ie(may be modified) and resolve all unresolved
627 references (URLs, playlist items).
629 It will also download the videos if 'download'.
630 Returns the resolved ie_result.
633 result_type = ie_result.get('_type', 'video')
635 if result_type in ('url', 'url_transparent'):
636 extract_flat = self.params.get('extract_flat', False)
637 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
638 extract_flat is True):
639 if self.params.get('forcejson', False):
640 self.to_stdout(json.dumps(ie_result))
643 if result_type == 'video':
644 self.add_extra_info(ie_result, extra_info)
645 return self.process_video_result(ie_result, download=download)
646 elif result_type == 'url':
647 # We have to add extra_info to the results because it may be
648 # contained in a playlist
649 return self.extract_info(ie_result['url'],
651 ie_key=ie_result.get('ie_key'),
652 extra_info=extra_info)
653 elif result_type == 'url_transparent':
654 # Use the information from the embedding page
655 info = self.extract_info(
656 ie_result['url'], ie_key=ie_result.get('ie_key'),
657 extra_info=extra_info, download=False, process=False)
659 force_properties = dict(
660 (k, v) for k, v in ie_result.items() if v is not None)
661 for f in ('_type', 'url'):
662 if f in force_properties:
663 del force_properties[f]
664 new_result = info.copy()
665 new_result.update(force_properties)
667 assert new_result.get('_type') != 'url_transparent'
669 return self.process_ie_result(
670 new_result, download=download, extra_info=extra_info)
671 elif result_type == 'playlist' or result_type == 'multi_video':
672 # We process each entry in the playlist
673 playlist = ie_result.get('title', None) or ie_result.get('id', None)
674 self.to_screen('[download] Downloading playlist: %s' % playlist)
676 playlist_results = []
678 playliststart = self.params.get('playliststart', 1) - 1
679 playlistend = self.params.get('playlistend', None)
680 # For backwards compatibility, interpret -1 as whole list
681 if playlistend == -1:
684 ie_entries = ie_result['entries']
685 if isinstance(ie_entries, list):
686 n_all_entries = len(ie_entries)
687 entries = ie_entries[playliststart:playlistend]
688 n_entries = len(entries)
690 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
691 (ie_result['extractor'], playlist, n_all_entries, n_entries))
692 elif isinstance(ie_entries, PagedList):
693 entries = ie_entries.getslice(
694 playliststart, playlistend)
695 n_entries = len(entries)
697 "[%s] playlist %s: Downloading %d videos" %
698 (ie_result['extractor'], playlist, n_entries))
700 entries = list(itertools.islice(
701 ie_entries, playliststart, playlistend))
702 n_entries = len(entries)
704 "[%s] playlist %s: Downloading %d videos" %
705 (ie_result['extractor'], playlist, n_entries))
707 if self.params.get('playlistreverse', False):
708 entries = entries[::-1]
710 for i, entry in enumerate(entries, 1):
711 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
713 'n_entries': n_entries,
714 'playlist': playlist,
715 'playlist_id': ie_result.get('id'),
716 'playlist_title': ie_result.get('title'),
717 'playlist_index': i + playliststart,
718 'extractor': ie_result['extractor'],
719 'webpage_url': ie_result['webpage_url'],
720 'webpage_url_basename': url_basename(ie_result['webpage_url']),
721 'extractor_key': ie_result['extractor_key'],
724 reason = self._match_entry(entry)
725 if reason is not None:
726 self.to_screen('[download] ' + reason)
729 entry_result = self.process_ie_result(entry,
732 playlist_results.append(entry_result)
733 ie_result['entries'] = playlist_results
735 elif result_type == 'compat_list':
737 'Extractor %s returned a compat_list result. '
738 'It needs to be updated.' % ie_result.get('extractor'))
744 'extractor': ie_result['extractor'],
745 'webpage_url': ie_result['webpage_url'],
746 'webpage_url_basename': url_basename(ie_result['webpage_url']),
747 'extractor_key': ie_result['extractor_key'],
751 ie_result['entries'] = [
752 self.process_ie_result(_fixup(r), download, extra_info)
753 for r in ie_result['entries']
757 raise Exception('Invalid result type: %s' % result_type)
759 def select_format(self, format_spec, available_formats):
760 if format_spec == 'best' or format_spec is None:
761 return available_formats[-1]
762 elif format_spec == 'worst':
763 return available_formats[0]
764 elif format_spec == 'bestaudio':
766 f for f in available_formats
767 if f.get('vcodec') == 'none']
769 return audio_formats[-1]
770 elif format_spec == 'worstaudio':
772 f for f in available_formats
773 if f.get('vcodec') == 'none']
775 return audio_formats[0]
776 elif format_spec == 'bestvideo':
778 f for f in available_formats
779 if f.get('acodec') == 'none']
781 return video_formats[-1]
782 elif format_spec == 'worstvideo':
784 f for f in available_formats
785 if f.get('acodec') == 'none']
787 return video_formats[0]
789 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
790 if format_spec in extensions:
791 filter_f = lambda f: f['ext'] == format_spec
793 filter_f = lambda f: f['format_id'] == format_spec
794 matches = list(filter(filter_f, available_formats))
799 def process_video_result(self, info_dict, download=True):
800 assert info_dict.get('_type', 'video') == 'video'
802 if 'id' not in info_dict:
803 raise ExtractorError('Missing "id" field in extractor result')
804 if 'title' not in info_dict:
805 raise ExtractorError('Missing "title" field in extractor result')
807 if 'playlist' not in info_dict:
808 # It isn't part of a playlist
809 info_dict['playlist'] = None
810 info_dict['playlist_index'] = None
812 thumbnails = info_dict.get('thumbnails')
814 thumbnails.sort(key=lambda t: (
815 t.get('width'), t.get('height'), t.get('url')))
817 if 'width' in t and 'height' in t:
818 t['resolution'] = '%dx%d' % (t['width'], t['height'])
820 if thumbnails and 'thumbnail' not in info_dict:
821 info_dict['thumbnail'] = thumbnails[-1]['url']
823 if 'display_id' not in info_dict and 'id' in info_dict:
824 info_dict['display_id'] = info_dict['id']
826 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
827 # Working around negative timestamps in Windows
828 # (see http://bugs.python.org/issue1646728)
829 if info_dict['timestamp'] < 0 and os.name == 'nt':
830 info_dict['timestamp'] = 0
831 upload_date = datetime.datetime.utcfromtimestamp(
832 info_dict['timestamp'])
833 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
835 # This extractors handle format selection themselves
836 if info_dict['extractor'] in ['Youku']:
838 self.process_info(info_dict)
841 # We now pick which formats have to be downloaded
842 if info_dict.get('formats') is None:
843 # There's only one format available
844 formats = [info_dict]
846 formats = info_dict['formats']
849 raise ExtractorError('No video formats found!')
851 # We check that all the formats have the format and format_id fields
852 for i, format in enumerate(formats):
853 if 'url' not in format:
854 raise ExtractorError('Missing "url" key in result (index %d)' % i)
856 if format.get('format_id') is None:
857 format['format_id'] = compat_str(i)
858 if format.get('format') is None:
859 format['format'] = '{id} - {res}{note}'.format(
860 id=format['format_id'],
861 res=self.format_resolution(format),
862 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
864 # Automatically determine file extension if missing
865 if 'ext' not in format:
866 format['ext'] = determine_ext(format['url']).lower()
868 format_limit = self.params.get('format_limit', None)
870 formats = list(takewhile_inclusive(
871 lambda f: f['format_id'] != format_limit, formats
874 # TODO Central sorting goes here
876 if formats[0] is not info_dict:
877 # only set the 'formats' fields if the original info_dict list them
878 # otherwise we end up with a circular reference, the first (and unique)
879 # element in the 'formats' field in info_dict is info_dict itself,
880 # wich can't be exported to json
881 info_dict['formats'] = formats
882 if self.params.get('listformats', None):
883 self.list_formats(info_dict)
886 req_format = self.params.get('format')
887 if req_format is None:
889 formats_to_download = []
890 # The -1 is for supporting YoutubeIE
891 if req_format in ('-1', 'all'):
892 formats_to_download = formats
894 for rfstr in req_format.split(','):
895 # We can accept formats requested in the format: 34/5/best, we pick
896 # the first that is available, starting from left
897 req_formats = rfstr.split('/')
898 for rf in req_formats:
899 if re.match(r'.+?\+.+?', rf) is not None:
900 # Two formats have been requested like '137+139'
901 format_1, format_2 = rf.split('+')
902 formats_info = (self.select_format(format_1, formats),
903 self.select_format(format_2, formats))
904 if all(formats_info):
905 # The first format must contain the video and the
907 if formats_info[0].get('vcodec') == 'none':
908 self.report_error('The first format must '
909 'contain the video, try using '
910 '"-f %s+%s"' % (format_2, format_1))
913 'requested_formats': formats_info,
915 'ext': formats_info[0]['ext'],
918 selected_format = None
920 selected_format = self.select_format(rf, formats)
921 if selected_format is not None:
922 formats_to_download.append(selected_format)
924 if not formats_to_download:
925 raise ExtractorError('requested format not available',
929 if len(formats_to_download) > 1:
930 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
931 for format in formats_to_download:
932 new_info = dict(info_dict)
933 new_info.update(format)
934 self.process_info(new_info)
935 # We update the info dict with the best quality format (backwards compatibility)
936 info_dict.update(formats_to_download[-1])
939 def process_info(self, info_dict):
940 """Process a single resolved IE result."""
942 assert info_dict.get('_type', 'video') == 'video'
944 max_downloads = self.params.get('max_downloads')
945 if max_downloads is not None:
946 if self._num_downloads >= int(max_downloads):
947 raise MaxDownloadsReached()
949 info_dict['fulltitle'] = info_dict['title']
950 if len(info_dict['title']) > 200:
951 info_dict['title'] = info_dict['title'][:197] + '...'
953 # Keep for backwards compatibility
954 info_dict['stitle'] = info_dict['title']
956 if 'format' not in info_dict:
957 info_dict['format'] = info_dict['ext']
959 reason = self._match_entry(info_dict)
960 if reason is not None:
961 self.to_screen('[download] ' + reason)
964 self._num_downloads += 1
966 filename = self.prepare_filename(info_dict)
969 if self.params.get('forcetitle', False):
970 self.to_stdout(info_dict['fulltitle'])
971 if self.params.get('forceid', False):
972 self.to_stdout(info_dict['id'])
973 if self.params.get('forceurl', False):
974 if info_dict.get('requested_formats') is not None:
975 for f in info_dict['requested_formats']:
976 self.to_stdout(f['url'] + f.get('play_path', ''))
978 # For RTMP URLs, also include the playpath
979 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
980 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
981 self.to_stdout(info_dict['thumbnail'])
982 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
983 self.to_stdout(info_dict['description'])
984 if self.params.get('forcefilename', False) and filename is not None:
985 self.to_stdout(filename)
986 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
987 self.to_stdout(formatSeconds(info_dict['duration']))
988 if self.params.get('forceformat', False):
989 self.to_stdout(info_dict['format'])
990 if self.params.get('forcejson', False):
991 info_dict['_filename'] = filename
992 self.to_stdout(json.dumps(info_dict))
993 if self.params.get('dump_single_json', False):
994 info_dict['_filename'] = filename
996 # Do nothing else if in simulate mode
997 if self.params.get('simulate', False):
1000 if filename is None:
1004 dn = os.path.dirname(encodeFilename(filename))
1005 if dn and not os.path.exists(dn):
1007 except (OSError, IOError) as err:
1008 self.report_error('unable to create directory ' + compat_str(err))
1011 if self.params.get('writedescription', False):
1012 descfn = filename + '.description'
1013 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1014 self.to_screen('[info] Video description is already present')
1015 elif info_dict.get('description') is None:
1016 self.report_warning('There\'s no description to write.')
1019 self.to_screen('[info] Writing video description to: ' + descfn)
1020 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1021 descfile.write(info_dict['description'])
1022 except (OSError, IOError):
1023 self.report_error('Cannot write description file ' + descfn)
1026 if self.params.get('writeannotations', False):
1027 annofn = filename + '.annotations.xml'
1028 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1029 self.to_screen('[info] Video annotations are already present')
1032 self.to_screen('[info] Writing video annotations to: ' + annofn)
1033 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1034 annofile.write(info_dict['annotations'])
1035 except (KeyError, TypeError):
1036 self.report_warning('There are no annotations to write.')
1037 except (OSError, IOError):
1038 self.report_error('Cannot write annotations file: ' + annofn)
1041 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1042 self.params.get('writeautomaticsub')])
1044 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1045 # subtitles download errors are already managed as troubles in relevant IE
1046 # that way it will silently go on when used with unsupporting IE
1047 subtitles = info_dict['subtitles']
1048 sub_format = self.params.get('subtitlesformat', 'srt')
1049 for sub_lang in subtitles.keys():
1050 sub = subtitles[sub_lang]
1054 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1055 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1056 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1058 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1059 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1061 except (OSError, IOError):
1062 self.report_error('Cannot write subtitles file ' + sub_filename)
1065 if self.params.get('writeinfojson', False):
1066 infofn = os.path.splitext(filename)[0] + '.info.json'
1067 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1068 self.to_screen('[info] Video description metadata is already present')
1070 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1072 write_json_file(info_dict, infofn)
1073 except (OSError, IOError):
1074 self.report_error('Cannot write metadata to JSON file ' + infofn)
1077 if self.params.get('writethumbnail', False):
1078 if info_dict.get('thumbnail') is not None:
1079 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1080 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1081 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1082 self.to_screen('[%s] %s: Thumbnail is already present' %
1083 (info_dict['extractor'], info_dict['id']))
1085 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1086 (info_dict['extractor'], info_dict['id']))
1088 uf = self.urlopen(info_dict['thumbnail'])
1089 with open(thumb_filename, 'wb') as thumbf:
1090 shutil.copyfileobj(uf, thumbf)
1091 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1092 (info_dict['extractor'], info_dict['id'], thumb_filename))
1093 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1094 self.report_warning('Unable to download thumbnail "%s": %s' %
1095 (info_dict['thumbnail'], compat_str(err)))
1097 if not self.params.get('skip_download', False):
1098 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1103 fd = get_suitable_downloader(info)(self, self.params)
1104 for ph in self._progress_hooks:
1105 fd.add_progress_hook(ph)
1106 if self.params.get('verbose'):
1107 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1108 return fd.download(name, info)
1109 if info_dict.get('requested_formats') is not None:
1112 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1113 if not merger._executable:
1115 self.report_warning('You have requested multiple '
1116 'formats but ffmpeg or avconv are not installed.'
1117 ' The formats won\'t be merged')
1119 postprocessors = [merger]
1120 for f in info_dict['requested_formats']:
1121 new_info = dict(info_dict)
1123 fname = self.prepare_filename(new_info)
1124 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1125 downloaded.append(fname)
1126 partial_success = dl(fname, new_info)
1127 success = success and partial_success
1128 info_dict['__postprocessors'] = postprocessors
1129 info_dict['__files_to_merge'] = downloaded
1131 # Just a single file
1132 success = dl(filename, info_dict)
1133 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1134 self.report_error('unable to download video data: %s' % str(err))
1136 except (OSError, IOError) as err:
1137 raise UnavailableVideoError(err)
1138 except (ContentTooShortError, ) as err:
1139 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1144 self.post_process(filename, info_dict)
1145 except (PostProcessingError) as err:
1146 self.report_error('postprocessing: %s' % str(err))
1148 self.record_download_archive(info_dict)
1150 def download(self, url_list):
1151 """Download a given list of URLs."""
1152 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1153 if (len(url_list) > 1 and
1155 and self.params.get('max_downloads') != 1):
1156 raise SameFileError(outtmpl)
1158 for url in url_list:
1160 # It also downloads the videos
1161 res = self.extract_info(url)
1162 except UnavailableVideoError:
1163 self.report_error('unable to download video')
1164 except MaxDownloadsReached:
1165 self.to_screen('[info] Maximum number of downloaded files reached.')
1168 if self.params.get('dump_single_json', False):
1169 self.to_stdout(json.dumps(res))
1171 return self._download_retcode
1173 def download_with_info_file(self, info_filename):
1174 with io.open(info_filename, 'r', encoding='utf-8') as f:
1177 self.process_ie_result(info, download=True)
1178 except DownloadError:
1179 webpage_url = info.get('webpage_url')
1180 if webpage_url is not None:
1181 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1182 return self.download([webpage_url])
1185 return self._download_retcode
1187 def post_process(self, filename, ie_info):
1188 """Run all the postprocessors on the given file."""
1189 info = dict(ie_info)
1190 info['filepath'] = filename
1193 if ie_info.get('__postprocessors') is not None:
1194 pps_chain.extend(ie_info['__postprocessors'])
1195 pps_chain.extend(self._pps)
1196 for pp in pps_chain:
1198 keep_video_wish, new_info = pp.run(info)
1199 if keep_video_wish is not None:
1201 keep_video = keep_video_wish
1202 elif keep_video is None:
1203 # No clear decision yet, let IE decide
1204 keep_video = keep_video_wish
1205 except PostProcessingError as e:
1206 self.report_error(e.msg)
1207 if keep_video is False and not self.params.get('keepvideo', False):
1209 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1210 os.remove(encodeFilename(filename))
1211 except (IOError, OSError):
1212 self.report_warning('Unable to remove downloaded video file')
1214 def _make_archive_id(self, info_dict):
1215 # Future-proof against any change in case
1216 # and backwards compatibility with prior versions
1217 extractor = info_dict.get('extractor_key')
1218 if extractor is None:
1219 if 'id' in info_dict:
1220 extractor = info_dict.get('ie_key') # key in a playlist
1221 if extractor is None:
1222 return None # Incomplete video information
1223 return extractor.lower() + ' ' + info_dict['id']
1225 def in_download_archive(self, info_dict):
1226 fn = self.params.get('download_archive')
1230 vid_id = self._make_archive_id(info_dict)
1232 return False # Incomplete video information
1235 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1236 for line in archive_file:
1237 if line.strip() == vid_id:
1239 except IOError as ioe:
1240 if ioe.errno != errno.ENOENT:
1244 def record_download_archive(self, info_dict):
1245 fn = self.params.get('download_archive')
1248 vid_id = self._make_archive_id(info_dict)
1250 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1251 archive_file.write(vid_id + '\n')
1254 def format_resolution(format, default='unknown'):
1255 if format.get('vcodec') == 'none':
1257 if format.get('resolution') is not None:
1258 return format['resolution']
1259 if format.get('height') is not None:
1260 if format.get('width') is not None:
1261 res = '%sx%s' % (format['width'], format['height'])
1263 res = '%sp' % format['height']
1264 elif format.get('width') is not None:
1265 res = '?x%d' % format['width']
1270 def _format_note(self, fdict):
1272 if fdict.get('ext') in ['f4f', 'f4m']:
1273 res += '(unsupported) '
1274 if fdict.get('format_note') is not None:
1275 res += fdict['format_note'] + ' '
1276 if fdict.get('tbr') is not None:
1277 res += '%4dk ' % fdict['tbr']
1278 if fdict.get('container') is not None:
1281 res += '%s container' % fdict['container']
1282 if (fdict.get('vcodec') is not None and
1283 fdict.get('vcodec') != 'none'):
1286 res += fdict['vcodec']
1287 if fdict.get('vbr') is not None:
1289 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1291 if fdict.get('vbr') is not None:
1292 res += '%4dk' % fdict['vbr']
1293 if fdict.get('fps') is not None:
1294 res += ', %sfps' % fdict['fps']
1295 if fdict.get('acodec') is not None:
1298 if fdict['acodec'] == 'none':
1301 res += '%-5s' % fdict['acodec']
1302 elif fdict.get('abr') is not None:
1306 if fdict.get('abr') is not None:
1307 res += '@%3dk' % fdict['abr']
1308 if fdict.get('asr') is not None:
1309 res += ' (%5dHz)' % fdict['asr']
1310 if fdict.get('filesize') is not None:
1313 res += format_bytes(fdict['filesize'])
1314 elif fdict.get('filesize_approx') is not None:
1317 res += '~' + format_bytes(fdict['filesize_approx'])
1320 def list_formats(self, info_dict):
1321 def line(format, idlen=20):
1322 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1323 format['format_id'],
1325 self.format_resolution(format),
1326 self._format_note(format),
1329 formats = info_dict.get('formats', [info_dict])
1330 idlen = max(len('format code'),
1331 max(len(f['format_id']) for f in formats))
1333 line(f, idlen) for f in formats
1334 if f.get('preference') is None or f['preference'] >= -1000]
1335 if len(formats) > 1:
1336 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1337 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1339 header_line = line({
1340 'format_id': 'format code', 'ext': 'extension',
1341 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1342 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1343 (info_dict['id'], header_line, '\n'.join(formats_s)))
1345 def urlopen(self, req):
1346 """ Start an HTTP download """
1348 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1349 # always respected by websites, some tend to give out URLs with non percent-encoded
1350 # non-ASCII characters (see telemb.py, ard.py [#3412])
1351 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1352 # To work around aforementioned issue we will replace request's original URL with
1353 # percent-encoded one
1354 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1355 url = req if req_is_string else req.get_full_url()
1356 url_escaped = escape_url(url)
1358 # Substitute URL if any change after escaping
1359 if url != url_escaped:
1363 req = compat_urllib_request.Request(
1364 url_escaped, data=req.data, headers=req.headers,
1365 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1367 return self._opener.open(req, timeout=self._socket_timeout)
1369 def print_debug_header(self):
1370 if not self.params.get('verbose'):
1373 if type('') is not compat_str:
1374 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1375 self.report_warning(
1376 'Your Python is broken! Update to a newer and supported version')
1378 stdout_encoding = getattr(
1379 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1381 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1382 locale.getpreferredencoding(),
1383 sys.getfilesystemencoding(),
1385 self.get_encoding()))
1386 write_string(encoding_str, encoding=None)
1388 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1390 sp = subprocess.Popen(
1391 ['git', 'rev-parse', '--short', 'HEAD'],
1392 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1393 cwd=os.path.dirname(os.path.abspath(__file__)))
1394 out, err = sp.communicate()
1395 out = out.decode().strip()
1396 if re.match('[0-9a-f]+', out):
1397 self._write_string('[debug] Git HEAD: ' + out + '\n')
1403 self._write_string('[debug] Python version %s - %s\n' % (
1404 platform.python_version(), platform_name()))
1406 exe_versions = FFmpegPostProcessor.get_versions()
1407 exe_versions['rtmpdump'] = rtmpdump_version()
1408 exe_str = ', '.join(
1410 for exe, v in sorted(exe_versions.items())
1415 self._write_string('[debug] exe versions: %s\n' % exe_str)
1418 for handler in self._opener.handlers:
1419 if hasattr(handler, 'proxies'):
1420 proxy_map.update(handler.proxies)
1421 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1423 def _setup_opener(self):
1424 timeout_val = self.params.get('socket_timeout')
1425 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1427 opts_cookiefile = self.params.get('cookiefile')
1428 opts_proxy = self.params.get('proxy')
1430 if opts_cookiefile is None:
1431 self.cookiejar = compat_cookiejar.CookieJar()
1433 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1435 if os.access(opts_cookiefile, os.R_OK):
1436 self.cookiejar.load()
1438 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1440 if opts_proxy is not None:
1441 if opts_proxy == '':
1444 proxies = {'http': opts_proxy, 'https': opts_proxy}
1446 proxies = compat_urllib_request.getproxies()
1447 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1448 if 'http' in proxies and 'https' not in proxies:
1449 proxies['https'] = proxies['http']
1450 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1452 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1453 https_handler = make_HTTPS_handler(
1454 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1455 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1456 opener = compat_urllib_request.build_opener(
1457 https_handler, proxy_handler, cookie_processor, ydlh)
1458 # Delete the default user-agent header, which would otherwise apply in
1459 # cases where our custom HTTP handler doesn't come into play
1460 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1461 opener.addheaders = []
1462 self._opener = opener
1464 def encode(self, s):
1465 if isinstance(s, bytes):
1466 return s # Already encoded
1469 return s.encode(self.get_encoding())
1470 except UnicodeEncodeError as err:
1471 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1474 def get_encoding(self):
1475 encoding = self.params.get('encoding')
1476 if encoding is None:
1477 encoding = preferredencoding()