2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
53 UnavailableVideoError,
60 from .extractor import get_info_extractor, gen_extractors
61 from .downloader import get_suitable_downloader
62 from .postprocessor import FFmpegMergerPP
63 from .version import __version__
66 class YoutubeDL(object):
69 YoutubeDL objects are the ones responsible of downloading the
70 actual video file and writing it to disk if the user has requested
71 it, among some other tasks. In most cases there should be one per
72 program. As, given a video URL, the downloader doesn't know how to
73 extract all the needed information, task that InfoExtractors do, it
74 has to pass the URL to one of them.
76 For this, YoutubeDL objects have a method that allows
77 InfoExtractors to be registered in a given order. When it is passed
78 a URL, the YoutubeDL object handles it to the first InfoExtractor it
79 finds that reports being able to handle it. The InfoExtractor extracts
80 all the information about the video or videos the URL refers to, and
81 YoutubeDL process the extracted information, possibly using a File
82 Downloader to download the video.
84 YoutubeDL objects accept a lot of parameters. In order not to saturate
85 the object constructor with arguments, it receives a dictionary of
86 options instead. These options are available through the params
87 attribute for the InfoExtractors to use. The YoutubeDL also
88 registers itself as the downloader in charge for the InfoExtractors
89 that are added to it, so this is a "mutual registration".
93 username: Username for authentication purposes.
94 password: Password for authentication purposes.
95 videopassword: Password for acces a video.
96 usenetrc: Use netrc for authentication instead.
97 verbose: Print additional info to stdout.
98 quiet: Do not print messages to stdout.
99 no_warnings: Do not print out anything for warnings.
100 forceurl: Force printing final URL.
101 forcetitle: Force printing title.
102 forceid: Force printing ID.
103 forcethumbnail: Force printing thumbnail URL.
104 forcedescription: Force printing description.
105 forcefilename: Force printing final filename.
106 forceduration: Force printing duration.
107 forcejson: Force printing info_dict as JSON.
108 simulate: Do not download the video files.
109 format: Video format code.
110 format_limit: Highest quality format to try.
111 outtmpl: Template for output names.
112 restrictfilenames: Do not allow "&" and spaces in file names
113 ignoreerrors: Do not stop on download errors.
114 nooverwrites: Prevent overwriting files.
115 playliststart: Playlist item to start at.
116 playlistend: Playlist item to end at.
117 matchtitle: Download only matching titles.
118 rejecttitle: Reject downloads for matching titles.
119 logger: Log messages to a logging.Logger instance.
120 logtostderr: Log messages to stderr instead of stdout.
121 writedescription: Write the video description to a .description file
122 writeinfojson: Write the video description to a .info.json file
123 writeannotations: Write the video annotations to a .annotations.xml file
124 writethumbnail: Write the thumbnail image to a file
125 writesubtitles: Write the video subtitles to a file
126 writeautomaticsub: Write the automatic subtitles to a file
127 allsubtitles: Downloads all the subtitles of the video
128 (requires writesubtitles or writeautomaticsub)
129 listsubtitles: Lists all available subtitles for the video
130 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
131 subtitleslangs: List of languages of the subtitles to download
132 keepvideo: Keep the video file after post-processing
133 daterange: A DateRange object, download only if the upload_date is in the range.
134 skip_download: Skip the actual download of the video file
135 cachedir: Location of the cache files in the filesystem.
136 None to disable filesystem cache.
137 noplaylist: Download single video instead of a playlist if in doubt.
138 age_limit: An integer representing the user's age in years.
139 Unsuitable videos for the given age are skipped.
140 min_views: An integer representing the minimum view count the video
141 must have in order to not be skipped.
142 Videos without view count information are always
143 downloaded. None for no limit.
144 max_views: An integer representing the maximum view count.
145 Videos that are more popular than that are not
147 Videos without view count information are always
148 downloaded. None for no limit.
149 download_archive: File name of a file where all downloads are recorded.
150 Videos already present in the file are not downloaded
152 cookiefile: File name where cookies should be read from and dumped to.
153 nocheckcertificate:Do not verify SSL certificates
154 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
155 At the moment, this is only supported by YouTube.
156 proxy: URL of the proxy server to use
157 socket_timeout: Time to wait for unresponsive hosts, in seconds
158 bidi_workaround: Work around buggy terminals without bidirectional text
159 support, using fridibi
160 debug_printtraffic:Print out sent and received HTTP traffic
161 include_ads: Download ads as well
162 default_search: Prepend this string if an input url is not valid.
163 'auto' for elaborate guessing
164 encoding: Use this encoding instead of the system-specified.
166 The following parameters are not used by YoutubeDL itself, they are used by
168 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
169 noresizebuffer, retries, continuedl, noprogress, consoletitle
171 The following options are used by the post processors:
172 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
173 otherwise prefer avconv.
179 _download_retcode = None
180 _num_downloads = None
183 def __init__(self, params=None):
184 """Create a FileDownloader object with the given options."""
188 self._ies_instances = {}
190 self._progress_hooks = []
191 self._download_retcode = 0
192 self._num_downloads = 0
193 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
194 self._err_file = sys.stderr
197 if params.get('bidi_workaround', False):
200 master, slave = pty.openpty()
201 width = get_term_width()
205 width_args = ['-w', str(width)]
207 stdin=subprocess.PIPE,
209 stderr=self._err_file)
211 self._output_process = subprocess.Popen(
212 ['bidiv'] + width_args, **sp_kwargs
215 self._output_process = subprocess.Popen(
216 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
217 self._output_channel = os.fdopen(master, 'rb')
218 except OSError as ose:
220 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
224 if (sys.version_info >= (3,) and sys.platform != 'win32' and
225 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
226 and not params['restrictfilenames']):
227 # On Python 3, the Unicode filesystem API will throw errors (#1474)
229 'Assuming --restrict-filenames since file system encoding '
230 'cannot encode all charactes. '
231 'Set the LC_ALL environment variable to fix this.')
232 self.params['restrictfilenames'] = True
234 if '%(stitle)s' in self.params.get('outtmpl', ''):
235 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
239 def add_info_extractor(self, ie):
240 """Add an InfoExtractor object to the end of the list."""
242 self._ies_instances[ie.ie_key()] = ie
243 ie.set_downloader(self)
245 def get_info_extractor(self, ie_key):
247 Get an instance of an IE with name ie_key, it will try to get one from
248 the _ies list, if there's no instance it will create a new one and add
249 it to the extractor list.
251 ie = self._ies_instances.get(ie_key)
253 ie = get_info_extractor(ie_key)()
254 self.add_info_extractor(ie)
257 def add_default_info_extractors(self):
259 Add the InfoExtractors returned by gen_extractors to the end of the list
261 for ie in gen_extractors():
262 self.add_info_extractor(ie)
264 def add_post_processor(self, pp):
265 """Add a PostProcessor object to the end of the chain."""
267 pp.set_downloader(self)
269 def add_progress_hook(self, ph):
270 """Add the progress hook (currently only for the file downloader)"""
271 self._progress_hooks.append(ph)
273 def _bidi_workaround(self, message):
274 if not hasattr(self, '_output_channel'):
277 assert hasattr(self, '_output_process')
278 assert type(message) == type('')
279 line_count = message.count('\n') + 1
280 self._output_process.stdin.write((message + '\n').encode('utf-8'))
281 self._output_process.stdin.flush()
282 res = ''.join(self._output_channel.readline().decode('utf-8')
283 for _ in range(line_count))
284 return res[:-len('\n')]
286 def to_screen(self, message, skip_eol=False):
287 """Print message to stdout if not in quiet mode."""
288 return self.to_stdout(message, skip_eol, check_quiet=True)
290 def _write_string(self, s, out=None):
291 write_string(s, out=out, encoding=self.params.get('encoding'))
293 def to_stdout(self, message, skip_eol=False, check_quiet=False):
294 """Print message to stdout if not in quiet mode."""
295 if self.params.get('logger'):
296 self.params['logger'].debug(message)
297 elif not check_quiet or not self.params.get('quiet', False):
298 message = self._bidi_workaround(message)
299 terminator = ['\n', ''][skip_eol]
300 output = message + terminator
302 self._write_string(output, self._screen_file)
304 def to_stderr(self, message):
305 """Print message to stderr."""
306 assert type(message) == type('')
307 if self.params.get('logger'):
308 self.params['logger'].error(message)
310 message = self._bidi_workaround(message)
311 output = message + '\n'
312 self._write_string(output, self._err_file)
314 def to_console_title(self, message):
315 if not self.params.get('consoletitle', False):
317 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
318 # c_wchar_p() might not be necessary if `message` is
319 # already of type unicode()
320 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
321 elif 'TERM' in os.environ:
322 self._write_string('\033]0;%s\007' % message, self._screen_file)
324 def save_console_title(self):
325 if not self.params.get('consoletitle', False):
327 if 'TERM' in os.environ:
328 # Save the title on stack
329 self._write_string('\033[22;0t', self._screen_file)
331 def restore_console_title(self):
332 if not self.params.get('consoletitle', False):
334 if 'TERM' in os.environ:
335 # Restore the title from stack
336 self._write_string('\033[23;0t', self._screen_file)
339 self.save_console_title()
342 def __exit__(self, *args):
343 self.restore_console_title()
345 if self.params.get('cookiefile') is not None:
346 self.cookiejar.save()
348 def trouble(self, message=None, tb=None):
349 """Determine action to take when a download problem appears.
351 Depending on if the downloader has been configured to ignore
352 download errors or not, this method may throw an exception or
353 not when errors are found, after printing the message.
355 tb, if given, is additional traceback information.
357 if message is not None:
358 self.to_stderr(message)
359 if self.params.get('verbose'):
361 if sys.exc_info()[0]: # if .trouble has been called from an except block
363 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
364 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
365 tb += compat_str(traceback.format_exc())
367 tb_data = traceback.format_list(traceback.extract_stack())
368 tb = ''.join(tb_data)
370 if not self.params.get('ignoreerrors', False):
371 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
372 exc_info = sys.exc_info()[1].exc_info
374 exc_info = sys.exc_info()
375 raise DownloadError(message, exc_info)
376 self._download_retcode = 1
378 def report_warning(self, message):
380 Print the message to stderr, it will be prefixed with 'WARNING:'
381 If stderr is a tty file the 'WARNING:' will be colored
383 if self.params.get('logger') is not None:
384 self.params['logger'].warning(message)
386 if self.params.get('no_warnings'):
388 if self._err_file.isatty() and os.name != 'nt':
389 _msg_header = '\033[0;33mWARNING:\033[0m'
391 _msg_header = 'WARNING:'
392 warning_message = '%s %s' % (_msg_header, message)
393 self.to_stderr(warning_message)
395 def report_error(self, message, tb=None):
397 Do the same as trouble, but prefixes the message with 'ERROR:', colored
398 in red if stderr is a tty file.
400 if self._err_file.isatty() and os.name != 'nt':
401 _msg_header = '\033[0;31mERROR:\033[0m'
403 _msg_header = 'ERROR:'
404 error_message = '%s %s' % (_msg_header, message)
405 self.trouble(error_message, tb)
407 def report_file_already_downloaded(self, file_name):
408 """Report file has already been fully downloaded."""
410 self.to_screen('[download] %s has already been downloaded' % file_name)
411 except UnicodeEncodeError:
412 self.to_screen('[download] The file has already been downloaded')
414 def prepare_filename(self, info_dict):
415 """Generate the output filename."""
417 template_dict = dict(info_dict)
419 template_dict['epoch'] = int(time.time())
420 autonumber_size = self.params.get('autonumber_size')
421 if autonumber_size is None:
423 autonumber_templ = '%0' + str(autonumber_size) + 'd'
424 template_dict['autonumber'] = autonumber_templ % self._num_downloads
425 if template_dict.get('playlist_index') is not None:
426 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
427 if template_dict.get('resolution') is None:
428 if template_dict.get('width') and template_dict.get('height'):
429 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
430 elif template_dict.get('height'):
431 template_dict['resolution'] = '%sp' % template_dict['height']
432 elif template_dict.get('width'):
433 template_dict['resolution'] = '?x%d' % template_dict['width']
435 sanitize = lambda k, v: sanitize_filename(
437 restricted=self.params.get('restrictfilenames'),
439 template_dict = dict((k, sanitize(k, v))
440 for k, v in template_dict.items()
442 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
444 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
445 tmpl = os.path.expanduser(outtmpl)
446 filename = tmpl % template_dict
448 except ValueError as err:
449 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
452 def _match_entry(self, info_dict):
453 """ Returns None iff the file should be downloaded """
455 video_title = info_dict.get('title', info_dict.get('id', 'video'))
456 if 'title' in info_dict:
457 # This can happen when we're just evaluating the playlist
458 title = info_dict['title']
459 matchtitle = self.params.get('matchtitle', False)
461 if not re.search(matchtitle, title, re.IGNORECASE):
462 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
463 rejecttitle = self.params.get('rejecttitle', False)
465 if re.search(rejecttitle, title, re.IGNORECASE):
466 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
467 date = info_dict.get('upload_date', None)
469 dateRange = self.params.get('daterange', DateRange())
470 if date not in dateRange:
471 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
472 view_count = info_dict.get('view_count', None)
473 if view_count is not None:
474 min_views = self.params.get('min_views')
475 if min_views is not None and view_count < min_views:
476 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
477 max_views = self.params.get('max_views')
478 if max_views is not None and view_count > max_views:
479 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
480 age_limit = self.params.get('age_limit')
481 if age_limit is not None:
482 if age_limit < info_dict.get('age_limit', 0):
483 return 'Skipping "' + title + '" because it is age restricted'
484 if self.in_download_archive(info_dict):
485 return '%s has already been recorded in archive' % video_title
489 def add_extra_info(info_dict, extra_info):
490 '''Set the keys from extra_info in info dict if they are missing'''
491 for key, value in extra_info.items():
492 info_dict.setdefault(key, value)
494 def extract_info(self, url, download=True, ie_key=None, extra_info={},
497 Returns a list with a dictionary for each video we find.
498 If 'download', also downloads the videos.
499 extra_info is a dict containing the extra values to add to each result
503 ies = [self.get_info_extractor(ie_key)]
508 if not ie.suitable(url):
512 self.report_warning('The program functionality for this site has been marked as broken, '
513 'and will probably not work.')
516 ie_result = ie.extract(url)
517 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
519 if isinstance(ie_result, list):
520 # Backwards compatibility: old IE result format
522 '_type': 'compat_list',
523 'entries': ie_result,
525 self.add_default_extra_info(ie_result, ie, url)
527 return self.process_ie_result(ie_result, download, extra_info)
530 except ExtractorError as de: # An error we somewhat expected
531 self.report_error(compat_str(de), de.format_traceback())
533 except MaxDownloadsReached:
535 except Exception as e:
536 if self.params.get('ignoreerrors', False):
537 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
542 self.report_error('no suitable InfoExtractor for URL %s' % url)
544 def add_default_extra_info(self, ie_result, ie, url):
545 self.add_extra_info(ie_result, {
546 'extractor': ie.IE_NAME,
548 'webpage_url_basename': url_basename(url),
549 'extractor_key': ie.ie_key(),
552 def process_ie_result(self, ie_result, download=True, extra_info={}):
554 Take the result of the ie(may be modified) and resolve all unresolved
555 references (URLs, playlist items).
557 It will also download the videos if 'download'.
558 Returns the resolved ie_result.
561 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
562 if result_type == 'video':
563 self.add_extra_info(ie_result, extra_info)
564 return self.process_video_result(ie_result, download=download)
565 elif result_type == 'url':
566 # We have to add extra_info to the results because it may be
567 # contained in a playlist
568 return self.extract_info(ie_result['url'],
570 ie_key=ie_result.get('ie_key'),
571 extra_info=extra_info)
572 elif result_type == 'url_transparent':
573 # Use the information from the embedding page
574 info = self.extract_info(
575 ie_result['url'], ie_key=ie_result.get('ie_key'),
576 extra_info=extra_info, download=False, process=False)
578 def make_result(embedded_info):
579 new_result = ie_result.copy()
580 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
581 'entries', 'ie_key', 'duration',
582 'subtitles', 'annotations', 'format',
583 'thumbnail', 'thumbnails'):
586 if f in embedded_info:
587 new_result[f] = embedded_info[f]
589 new_result = make_result(info)
591 assert new_result.get('_type') != 'url_transparent'
592 if new_result.get('_type') == 'compat_list':
593 new_result['entries'] = [
594 make_result(e) for e in new_result['entries']]
596 return self.process_ie_result(
597 new_result, download=download, extra_info=extra_info)
598 elif result_type == 'playlist':
599 # We process each entry in the playlist
600 playlist = ie_result.get('title', None) or ie_result.get('id', None)
601 self.to_screen('[download] Downloading playlist: %s' % playlist)
603 playlist_results = []
605 playliststart = self.params.get('playliststart', 1) - 1
606 playlistend = self.params.get('playlistend', None)
607 # For backwards compatibility, interpret -1 as whole list
608 if playlistend == -1:
611 if isinstance(ie_result['entries'], list):
612 n_all_entries = len(ie_result['entries'])
613 entries = ie_result['entries'][playliststart:playlistend]
614 n_entries = len(entries)
616 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
617 (ie_result['extractor'], playlist, n_all_entries, n_entries))
619 assert isinstance(ie_result['entries'], PagedList)
620 entries = ie_result['entries'].getslice(
621 playliststart, playlistend)
622 n_entries = len(entries)
624 "[%s] playlist %s: Downloading %d videos" %
625 (ie_result['extractor'], playlist, n_entries))
627 for i, entry in enumerate(entries, 1):
628 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
630 'playlist': playlist,
631 'playlist_index': i + playliststart,
632 'extractor': ie_result['extractor'],
633 'webpage_url': ie_result['webpage_url'],
634 'webpage_url_basename': url_basename(ie_result['webpage_url']),
635 'extractor_key': ie_result['extractor_key'],
638 reason = self._match_entry(entry)
639 if reason is not None:
640 self.to_screen('[download] ' + reason)
643 entry_result = self.process_ie_result(entry,
646 playlist_results.append(entry_result)
647 ie_result['entries'] = playlist_results
649 elif result_type == 'compat_list':
651 self.add_extra_info(r,
653 'extractor': ie_result['extractor'],
654 'webpage_url': ie_result['webpage_url'],
655 'webpage_url_basename': url_basename(ie_result['webpage_url']),
656 'extractor_key': ie_result['extractor_key'],
659 ie_result['entries'] = [
660 self.process_ie_result(_fixup(r), download, extra_info)
661 for r in ie_result['entries']
665 raise Exception('Invalid result type: %s' % result_type)
667 def select_format(self, format_spec, available_formats):
668 if format_spec == 'best' or format_spec is None:
669 return available_formats[-1]
670 elif format_spec == 'worst':
671 return available_formats[0]
672 elif format_spec == 'bestaudio':
674 f for f in available_formats
675 if f.get('vcodec') == 'none']
677 return audio_formats[-1]
678 elif format_spec == 'worstaudio':
680 f for f in available_formats
681 if f.get('vcodec') == 'none']
683 return audio_formats[0]
684 elif format_spec == 'bestvideo':
686 f for f in available_formats
687 if f.get('acodec') == 'none']
689 return video_formats[-1]
690 elif format_spec == 'worstvideo':
692 f for f in available_formats
693 if f.get('acodec') == 'none']
695 return video_formats[0]
697 extensions = ['mp4', 'flv', 'webm', '3gp']
698 if format_spec in extensions:
699 filter_f = lambda f: f['ext'] == format_spec
701 filter_f = lambda f: f['format_id'] == format_spec
702 matches = list(filter(filter_f, available_formats))
707 def process_video_result(self, info_dict, download=True):
708 assert info_dict.get('_type', 'video') == 'video'
710 if 'id' not in info_dict:
711 raise ExtractorError('Missing "id" field in extractor result')
712 if 'title' not in info_dict:
713 raise ExtractorError('Missing "title" field in extractor result')
715 if 'playlist' not in info_dict:
716 # It isn't part of a playlist
717 info_dict['playlist'] = None
718 info_dict['playlist_index'] = None
720 thumbnails = info_dict.get('thumbnails')
722 thumbnails.sort(key=lambda t: (
723 t.get('width'), t.get('height'), t.get('url')))
725 if 'width' in t and 'height' in t:
726 t['resolution'] = '%dx%d' % (t['width'], t['height'])
728 if thumbnails and 'thumbnail' not in info_dict:
729 info_dict['thumbnail'] = thumbnails[-1]['url']
731 if 'display_id' not in info_dict and 'id' in info_dict:
732 info_dict['display_id'] = info_dict['id']
734 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
735 upload_date = datetime.datetime.utcfromtimestamp(
736 info_dict['timestamp'])
737 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
739 # This extractors handle format selection themselves
740 if info_dict['extractor'] in ['Youku']:
742 self.process_info(info_dict)
745 # We now pick which formats have to be downloaded
746 if info_dict.get('formats') is None:
747 # There's only one format available
748 formats = [info_dict]
750 formats = info_dict['formats']
753 raise ExtractorError('No video formats found!')
755 # We check that all the formats have the format and format_id fields
756 for i, format in enumerate(formats):
757 if 'url' not in format:
758 raise ExtractorError('Missing "url" key in result (index %d)' % i)
760 if format.get('format_id') is None:
761 format['format_id'] = compat_str(i)
762 if format.get('format') is None:
763 format['format'] = '{id} - {res}{note}'.format(
764 id=format['format_id'],
765 res=self.format_resolution(format),
766 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
768 # Automatically determine file extension if missing
769 if 'ext' not in format:
770 format['ext'] = determine_ext(format['url']).lower()
772 format_limit = self.params.get('format_limit', None)
774 formats = list(takewhile_inclusive(
775 lambda f: f['format_id'] != format_limit, formats
778 # TODO Central sorting goes here
780 if formats[0] is not info_dict:
781 # only set the 'formats' fields if the original info_dict list them
782 # otherwise we end up with a circular reference, the first (and unique)
783 # element in the 'formats' field in info_dict is info_dict itself,
784 # wich can't be exported to json
785 info_dict['formats'] = formats
786 if self.params.get('listformats', None):
787 self.list_formats(info_dict)
790 req_format = self.params.get('format')
791 if req_format is None:
793 formats_to_download = []
794 # The -1 is for supporting YoutubeIE
795 if req_format in ('-1', 'all'):
796 formats_to_download = formats
798 # We can accept formats requested in the format: 34/5/best, we pick
799 # the first that is available, starting from left
800 req_formats = req_format.split('/')
801 for rf in req_formats:
802 if re.match(r'.+?\+.+?', rf) is not None:
803 # Two formats have been requested like '137+139'
804 format_1, format_2 = rf.split('+')
805 formats_info = (self.select_format(format_1, formats),
806 self.select_format(format_2, formats))
807 if all(formats_info):
809 'requested_formats': formats_info,
811 'ext': formats_info[0]['ext'],
814 selected_format = None
816 selected_format = self.select_format(rf, formats)
817 if selected_format is not None:
818 formats_to_download = [selected_format]
820 if not formats_to_download:
821 raise ExtractorError('requested format not available',
825 if len(formats_to_download) > 1:
826 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
827 for format in formats_to_download:
828 new_info = dict(info_dict)
829 new_info.update(format)
830 self.process_info(new_info)
831 # We update the info dict with the best quality format (backwards compatibility)
832 info_dict.update(formats_to_download[-1])
835 def process_info(self, info_dict):
836 """Process a single resolved IE result."""
838 assert info_dict.get('_type', 'video') == 'video'
840 max_downloads = self.params.get('max_downloads')
841 if max_downloads is not None:
842 if self._num_downloads >= int(max_downloads):
843 raise MaxDownloadsReached()
845 info_dict['fulltitle'] = info_dict['title']
846 if len(info_dict['title']) > 200:
847 info_dict['title'] = info_dict['title'][:197] + '...'
849 # Keep for backwards compatibility
850 info_dict['stitle'] = info_dict['title']
852 if not 'format' in info_dict:
853 info_dict['format'] = info_dict['ext']
855 reason = self._match_entry(info_dict)
856 if reason is not None:
857 self.to_screen('[download] ' + reason)
860 self._num_downloads += 1
862 filename = self.prepare_filename(info_dict)
865 if self.params.get('forcetitle', False):
866 self.to_stdout(info_dict['fulltitle'])
867 if self.params.get('forceid', False):
868 self.to_stdout(info_dict['id'])
869 if self.params.get('forceurl', False):
870 # For RTMP URLs, also include the playpath
871 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
872 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
873 self.to_stdout(info_dict['thumbnail'])
874 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
875 self.to_stdout(info_dict['description'])
876 if self.params.get('forcefilename', False) and filename is not None:
877 self.to_stdout(filename)
878 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
879 self.to_stdout(formatSeconds(info_dict['duration']))
880 if self.params.get('forceformat', False):
881 self.to_stdout(info_dict['format'])
882 if self.params.get('forcejson', False):
883 info_dict['_filename'] = filename
884 self.to_stdout(json.dumps(info_dict))
886 # Do nothing else if in simulate mode
887 if self.params.get('simulate', False):
894 dn = os.path.dirname(encodeFilename(filename))
895 if dn and not os.path.exists(dn):
897 except (OSError, IOError) as err:
898 self.report_error('unable to create directory ' + compat_str(err))
901 if self.params.get('writedescription', False):
902 descfn = filename + '.description'
903 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
904 self.to_screen('[info] Video description is already present')
907 self.to_screen('[info] Writing video description to: ' + descfn)
908 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
909 descfile.write(info_dict['description'])
910 except (KeyError, TypeError):
911 self.report_warning('There\'s no description to write.')
912 except (OSError, IOError):
913 self.report_error('Cannot write description file ' + descfn)
916 if self.params.get('writeannotations', False):
917 annofn = filename + '.annotations.xml'
918 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
919 self.to_screen('[info] Video annotations are already present')
922 self.to_screen('[info] Writing video annotations to: ' + annofn)
923 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
924 annofile.write(info_dict['annotations'])
925 except (KeyError, TypeError):
926 self.report_warning('There are no annotations to write.')
927 except (OSError, IOError):
928 self.report_error('Cannot write annotations file: ' + annofn)
931 subtitles_are_requested = any([self.params.get('writesubtitles', False),
932 self.params.get('writeautomaticsub')])
934 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
935 # subtitles download errors are already managed as troubles in relevant IE
936 # that way it will silently go on when used with unsupporting IE
937 subtitles = info_dict['subtitles']
938 sub_format = self.params.get('subtitlesformat', 'srt')
939 for sub_lang in subtitles.keys():
940 sub = subtitles[sub_lang]
944 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
945 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
946 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
948 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
949 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
951 except (OSError, IOError):
952 self.report_error('Cannot write subtitles file ' + sub_filename)
955 if self.params.get('writeinfojson', False):
956 infofn = os.path.splitext(filename)[0] + '.info.json'
957 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
958 self.to_screen('[info] Video description metadata is already present')
960 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
962 write_json_file(info_dict, encodeFilename(infofn))
963 except (OSError, IOError):
964 self.report_error('Cannot write metadata to JSON file ' + infofn)
967 if self.params.get('writethumbnail', False):
968 if info_dict.get('thumbnail') is not None:
969 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
970 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
971 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
972 self.to_screen('[%s] %s: Thumbnail is already present' %
973 (info_dict['extractor'], info_dict['id']))
975 self.to_screen('[%s] %s: Downloading thumbnail ...' %
976 (info_dict['extractor'], info_dict['id']))
978 uf = self.urlopen(info_dict['thumbnail'])
979 with open(thumb_filename, 'wb') as thumbf:
980 shutil.copyfileobj(uf, thumbf)
981 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
982 (info_dict['extractor'], info_dict['id'], thumb_filename))
983 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
984 self.report_warning('Unable to download thumbnail "%s": %s' %
985 (info_dict['thumbnail'], compat_str(err)))
987 if not self.params.get('skip_download', False):
988 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
993 fd = get_suitable_downloader(info)(self, self.params)
994 for ph in self._progress_hooks:
995 fd.add_progress_hook(ph)
996 return fd.download(name, info)
997 if info_dict.get('requested_formats') is not None:
1000 merger = FFmpegMergerPP(self)
1001 if not merger._get_executable():
1003 self.report_warning('You have requested multiple '
1004 'formats but ffmpeg or avconv are not installed.'
1005 ' The formats won\'t be merged')
1007 postprocessors = [merger]
1008 for f in info_dict['requested_formats']:
1009 new_info = dict(info_dict)
1011 fname = self.prepare_filename(new_info)
1012 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1013 downloaded.append(fname)
1014 partial_success = dl(fname, new_info)
1015 success = success and partial_success
1016 info_dict['__postprocessors'] = postprocessors
1017 info_dict['__files_to_merge'] = downloaded
1019 # Just a single file
1020 success = dl(filename, info_dict)
1021 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1022 self.report_error('unable to download video data: %s' % str(err))
1024 except (OSError, IOError) as err:
1025 raise UnavailableVideoError(err)
1026 except (ContentTooShortError, ) as err:
1027 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1032 self.post_process(filename, info_dict)
1033 except (PostProcessingError) as err:
1034 self.report_error('postprocessing: %s' % str(err))
1037 self.record_download_archive(info_dict)
1039 def download(self, url_list):
1040 """Download a given list of URLs."""
1041 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1042 if (len(url_list) > 1 and
1044 and self.params.get('max_downloads') != 1):
1045 raise SameFileError(outtmpl)
1047 for url in url_list:
1049 #It also downloads the videos
1050 self.extract_info(url)
1051 except UnavailableVideoError:
1052 self.report_error('unable to download video')
1053 except MaxDownloadsReached:
1054 self.to_screen('[info] Maximum number of downloaded files reached.')
1057 return self._download_retcode
1059 def download_with_info_file(self, info_filename):
1060 with io.open(info_filename, 'r', encoding='utf-8') as f:
1063 self.process_ie_result(info, download=True)
1064 except DownloadError:
1065 webpage_url = info.get('webpage_url')
1066 if webpage_url is not None:
1067 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1068 return self.download([webpage_url])
1071 return self._download_retcode
1073 def post_process(self, filename, ie_info):
1074 """Run all the postprocessors on the given file."""
1075 info = dict(ie_info)
1076 info['filepath'] = filename
1079 if ie_info.get('__postprocessors') is not None:
1080 pps_chain.extend(ie_info['__postprocessors'])
1081 pps_chain.extend(self._pps)
1082 for pp in pps_chain:
1084 keep_video_wish, new_info = pp.run(info)
1085 if keep_video_wish is not None:
1087 keep_video = keep_video_wish
1088 elif keep_video is None:
1089 # No clear decision yet, let IE decide
1090 keep_video = keep_video_wish
1091 except PostProcessingError as e:
1092 self.report_error(e.msg)
1093 if keep_video is False and not self.params.get('keepvideo', False):
1095 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1096 os.remove(encodeFilename(filename))
1097 except (IOError, OSError):
1098 self.report_warning('Unable to remove downloaded video file')
1100 def _make_archive_id(self, info_dict):
1101 # Future-proof against any change in case
1102 # and backwards compatibility with prior versions
1103 extractor = info_dict.get('extractor_key')
1104 if extractor is None:
1105 if 'id' in info_dict:
1106 extractor = info_dict.get('ie_key') # key in a playlist
1107 if extractor is None:
1108 return None # Incomplete video information
1109 return extractor.lower() + ' ' + info_dict['id']
1111 def in_download_archive(self, info_dict):
1112 fn = self.params.get('download_archive')
1116 vid_id = self._make_archive_id(info_dict)
1118 return False # Incomplete video information
1121 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1122 for line in archive_file:
1123 if line.strip() == vid_id:
1125 except IOError as ioe:
1126 if ioe.errno != errno.ENOENT:
1130 def record_download_archive(self, info_dict):
1131 fn = self.params.get('download_archive')
1134 vid_id = self._make_archive_id(info_dict)
1136 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1137 archive_file.write(vid_id + '\n')
1140 def format_resolution(format, default='unknown'):
1141 if format.get('vcodec') == 'none':
1143 if format.get('resolution') is not None:
1144 return format['resolution']
1145 if format.get('height') is not None:
1146 if format.get('width') is not None:
1147 res = '%sx%s' % (format['width'], format['height'])
1149 res = '%sp' % format['height']
1150 elif format.get('width') is not None:
1151 res = '?x%d' % format['width']
1156 def _format_note(self, fdict):
1158 if fdict.get('ext') in ['f4f', 'f4m']:
1159 res += '(unsupported) '
1160 if fdict.get('format_note') is not None:
1161 res += fdict['format_note'] + ' '
1162 if fdict.get('tbr') is not None:
1163 res += '%4dk ' % fdict['tbr']
1164 if fdict.get('container') is not None:
1167 res += '%s container' % fdict['container']
1168 if (fdict.get('vcodec') is not None and
1169 fdict.get('vcodec') != 'none'):
1172 res += fdict['vcodec']
1173 if fdict.get('vbr') is not None:
1175 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1177 if fdict.get('vbr') is not None:
1178 res += '%4dk' % fdict['vbr']
1179 if fdict.get('acodec') is not None:
1182 if fdict['acodec'] == 'none':
1185 res += '%-5s' % fdict['acodec']
1186 elif fdict.get('abr') is not None:
1190 if fdict.get('abr') is not None:
1191 res += '@%3dk' % fdict['abr']
1192 if fdict.get('asr') is not None:
1193 res += ' (%5dHz)' % fdict['asr']
1194 if fdict.get('filesize') is not None:
1197 res += format_bytes(fdict['filesize'])
1200 def list_formats(self, info_dict):
1201 def line(format, idlen=20):
1202 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1203 format['format_id'],
1205 self.format_resolution(format),
1206 self._format_note(format),
1209 formats = info_dict.get('formats', [info_dict])
1210 idlen = max(len('format code'),
1211 max(len(f['format_id']) for f in formats))
1212 formats_s = [line(f, idlen) for f in formats]
1213 if len(formats) > 1:
1214 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1215 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1217 header_line = line({
1218 'format_id': 'format code', 'ext': 'extension',
1219 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1220 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1221 (info_dict['id'], header_line, '\n'.join(formats_s)))
1223 def urlopen(self, req):
1224 """ Start an HTTP download """
1225 return self._opener.open(req, timeout=self._socket_timeout)
1227 def print_debug_header(self):
1228 if not self.params.get('verbose'):
1232 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1233 locale.getpreferredencoding(),
1234 sys.getfilesystemencoding(),
1235 sys.stdout.encoding,
1236 self.get_encoding()),
1240 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1242 sp = subprocess.Popen(
1243 ['git', 'rev-parse', '--short', 'HEAD'],
1244 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1245 cwd=os.path.dirname(os.path.abspath(__file__)))
1246 out, err = sp.communicate()
1247 out = out.decode().strip()
1248 if re.match('[0-9a-f]+', out):
1249 self._write_string('[debug] Git HEAD: ' + out + '\n')
1255 self._write_string('[debug] Python version %s - %s' %
1256 (platform.python_version(), platform_name()) + '\n')
1259 for handler in self._opener.handlers:
1260 if hasattr(handler, 'proxies'):
1261 proxy_map.update(handler.proxies)
1262 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1264 def _setup_opener(self):
1265 timeout_val = self.params.get('socket_timeout')
1266 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1268 opts_cookiefile = self.params.get('cookiefile')
1269 opts_proxy = self.params.get('proxy')
1271 if opts_cookiefile is None:
1272 self.cookiejar = compat_cookiejar.CookieJar()
1274 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1276 if os.access(opts_cookiefile, os.R_OK):
1277 self.cookiejar.load()
1279 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1281 if opts_proxy is not None:
1282 if opts_proxy == '':
1285 proxies = {'http': opts_proxy, 'https': opts_proxy}
1287 proxies = compat_urllib_request.getproxies()
1288 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1289 if 'http' in proxies and 'https' not in proxies:
1290 proxies['https'] = proxies['http']
1291 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1293 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1294 https_handler = make_HTTPS_handler(
1295 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1296 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1297 opener = compat_urllib_request.build_opener(
1298 https_handler, proxy_handler, cookie_processor, ydlh)
1299 # Delete the default user-agent header, which would otherwise apply in
1300 # cases where our custom HTTP handler doesn't come into play
1301 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1302 opener.addheaders = []
1303 self._opener = opener
1305 def encode(self, s):
1306 if isinstance(s, bytes):
1307 return s # Already encoded
1310 return s.encode(self.get_encoding())
1311 except UnicodeEncodeError as err:
1312 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1315 def get_encoding(self):
1316 encoding = self.params.get('encoding')
1317 if encoding is None:
1318 encoding = preferredencoding()