2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
53 UnavailableVideoError,
60 from .extractor import get_info_extractor, gen_extractors
61 from .downloader import get_suitable_downloader
62 from .postprocessor import FFmpegMergerPP
63 from .version import __version__
66 class YoutubeDL(object):
69 YoutubeDL objects are the ones responsible of downloading the
70 actual video file and writing it to disk if the user has requested
71 it, among some other tasks. In most cases there should be one per
72 program. As, given a video URL, the downloader doesn't know how to
73 extract all the needed information, task that InfoExtractors do, it
74 has to pass the URL to one of them.
76 For this, YoutubeDL objects have a method that allows
77 InfoExtractors to be registered in a given order. When it is passed
78 a URL, the YoutubeDL object handles it to the first InfoExtractor it
79 finds that reports being able to handle it. The InfoExtractor extracts
80 all the information about the video or videos the URL refers to, and
81 YoutubeDL process the extracted information, possibly using a File
82 Downloader to download the video.
84 YoutubeDL objects accept a lot of parameters. In order not to saturate
85 the object constructor with arguments, it receives a dictionary of
86 options instead. These options are available through the params
87 attribute for the InfoExtractors to use. The YoutubeDL also
88 registers itself as the downloader in charge for the InfoExtractors
89 that are added to it, so this is a "mutual registration".
93 username: Username for authentication purposes.
94 password: Password for authentication purposes.
95 videopassword: Password for acces a video.
96 usenetrc: Use netrc for authentication instead.
97 verbose: Print additional info to stdout.
98 quiet: Do not print messages to stdout.
99 no_warnings: Do not print out anything for warnings.
100 forceurl: Force printing final URL.
101 forcetitle: Force printing title.
102 forceid: Force printing ID.
103 forcethumbnail: Force printing thumbnail URL.
104 forcedescription: Force printing description.
105 forcefilename: Force printing final filename.
106 forceduration: Force printing duration.
107 forcejson: Force printing info_dict as JSON.
108 simulate: Do not download the video files.
109 format: Video format code.
110 format_limit: Highest quality format to try.
111 outtmpl: Template for output names.
112 restrictfilenames: Do not allow "&" and spaces in file names
113 ignoreerrors: Do not stop on download errors.
114 nooverwrites: Prevent overwriting files.
115 playliststart: Playlist item to start at.
116 playlistend: Playlist item to end at.
117 matchtitle: Download only matching titles.
118 rejecttitle: Reject downloads for matching titles.
119 logger: Log messages to a logging.Logger instance.
120 logtostderr: Log messages to stderr instead of stdout.
121 writedescription: Write the video description to a .description file
122 writeinfojson: Write the video description to a .info.json file
123 writeannotations: Write the video annotations to a .annotations.xml file
124 writethumbnail: Write the thumbnail image to a file
125 writesubtitles: Write the video subtitles to a file
126 writeautomaticsub: Write the automatic subtitles to a file
127 allsubtitles: Downloads all the subtitles of the video
128 (requires writesubtitles or writeautomaticsub)
129 listsubtitles: Lists all available subtitles for the video
130 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
131 subtitleslangs: List of languages of the subtitles to download
132 keepvideo: Keep the video file after post-processing
133 daterange: A DateRange object, download only if the upload_date is in the range.
134 skip_download: Skip the actual download of the video file
135 cachedir: Location of the cache files in the filesystem.
136 None to disable filesystem cache.
137 noplaylist: Download single video instead of a playlist if in doubt.
138 age_limit: An integer representing the user's age in years.
139 Unsuitable videos for the given age are skipped.
140 min_views: An integer representing the minimum view count the video
141 must have in order to not be skipped.
142 Videos without view count information are always
143 downloaded. None for no limit.
144 max_views: An integer representing the maximum view count.
145 Videos that are more popular than that are not
147 Videos without view count information are always
148 downloaded. None for no limit.
149 download_archive: File name of a file where all downloads are recorded.
150 Videos already present in the file are not downloaded
152 cookiefile: File name where cookies should be read from and dumped to.
153 nocheckcertificate:Do not verify SSL certificates
154 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
155 At the moment, this is only supported by YouTube.
156 proxy: URL of the proxy server to use
157 socket_timeout: Time to wait for unresponsive hosts, in seconds
158 bidi_workaround: Work around buggy terminals without bidirectional text
159 support, using fridibi
160 debug_printtraffic:Print out sent and received HTTP traffic
161 include_ads: Download ads as well
162 default_search: Prepend this string if an input url is not valid.
163 'auto' for elaborate guessing
164 encoding: Use this encoding instead of the system-specified.
165 extract_flat: Do not resolve URLs, return the immediate result.
167 The following parameters are not used by YoutubeDL itself, they are used by
169 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
170 noresizebuffer, retries, continuedl, noprogress, consoletitle
172 The following options are used by the post processors:
173 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
174 otherwise prefer avconv.
180 _download_retcode = None
181 _num_downloads = None
184 def __init__(self, params=None):
185 """Create a FileDownloader object with the given options."""
189 self._ies_instances = {}
191 self._progress_hooks = []
192 self._download_retcode = 0
193 self._num_downloads = 0
194 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
195 self._err_file = sys.stderr
198 if params.get('bidi_workaround', False):
201 master, slave = pty.openpty()
202 width = get_term_width()
206 width_args = ['-w', str(width)]
208 stdin=subprocess.PIPE,
210 stderr=self._err_file)
212 self._output_process = subprocess.Popen(
213 ['bidiv'] + width_args, **sp_kwargs
216 self._output_process = subprocess.Popen(
217 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
218 self._output_channel = os.fdopen(master, 'rb')
219 except OSError as ose:
221 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
225 if (sys.version_info >= (3,) and sys.platform != 'win32' and
226 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
227 and not params['restrictfilenames']):
228 # On Python 3, the Unicode filesystem API will throw errors (#1474)
230 'Assuming --restrict-filenames since file system encoding '
231 'cannot encode all charactes. '
232 'Set the LC_ALL environment variable to fix this.')
233 self.params['restrictfilenames'] = True
235 if '%(stitle)s' in self.params.get('outtmpl', ''):
236 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
240 def add_info_extractor(self, ie):
241 """Add an InfoExtractor object to the end of the list."""
243 self._ies_instances[ie.ie_key()] = ie
244 ie.set_downloader(self)
246 def get_info_extractor(self, ie_key):
248 Get an instance of an IE with name ie_key, it will try to get one from
249 the _ies list, if there's no instance it will create a new one and add
250 it to the extractor list.
252 ie = self._ies_instances.get(ie_key)
254 ie = get_info_extractor(ie_key)()
255 self.add_info_extractor(ie)
258 def add_default_info_extractors(self):
260 Add the InfoExtractors returned by gen_extractors to the end of the list
262 for ie in gen_extractors():
263 self.add_info_extractor(ie)
265 def add_post_processor(self, pp):
266 """Add a PostProcessor object to the end of the chain."""
268 pp.set_downloader(self)
270 def add_progress_hook(self, ph):
271 """Add the progress hook (currently only for the file downloader)"""
272 self._progress_hooks.append(ph)
274 def _bidi_workaround(self, message):
275 if not hasattr(self, '_output_channel'):
278 assert hasattr(self, '_output_process')
279 assert isinstance(message, compat_str)
280 line_count = message.count('\n') + 1
281 self._output_process.stdin.write((message + '\n').encode('utf-8'))
282 self._output_process.stdin.flush()
283 res = ''.join(self._output_channel.readline().decode('utf-8')
284 for _ in range(line_count))
285 return res[:-len('\n')]
287 def to_screen(self, message, skip_eol=False):
288 """Print message to stdout if not in quiet mode."""
289 return self.to_stdout(message, skip_eol, check_quiet=True)
291 def _write_string(self, s, out=None):
292 write_string(s, out=out, encoding=self.params.get('encoding'))
294 def to_stdout(self, message, skip_eol=False, check_quiet=False):
295 """Print message to stdout if not in quiet mode."""
296 if self.params.get('logger'):
297 self.params['logger'].debug(message)
298 elif not check_quiet or not self.params.get('quiet', False):
299 message = self._bidi_workaround(message)
300 terminator = ['\n', ''][skip_eol]
301 output = message + terminator
303 self._write_string(output, self._screen_file)
305 def to_stderr(self, message):
306 """Print message to stderr."""
307 assert isinstance(message, compat_str)
308 if self.params.get('logger'):
309 self.params['logger'].error(message)
311 message = self._bidi_workaround(message)
312 output = message + '\n'
313 self._write_string(output, self._err_file)
315 def to_console_title(self, message):
316 if not self.params.get('consoletitle', False):
318 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
319 # c_wchar_p() might not be necessary if `message` is
320 # already of type unicode()
321 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
322 elif 'TERM' in os.environ:
323 self._write_string('\033]0;%s\007' % message, self._screen_file)
325 def save_console_title(self):
326 if not self.params.get('consoletitle', False):
328 if 'TERM' in os.environ:
329 # Save the title on stack
330 self._write_string('\033[22;0t', self._screen_file)
332 def restore_console_title(self):
333 if not self.params.get('consoletitle', False):
335 if 'TERM' in os.environ:
336 # Restore the title from stack
337 self._write_string('\033[23;0t', self._screen_file)
340 self.save_console_title()
343 def __exit__(self, *args):
344 self.restore_console_title()
346 if self.params.get('cookiefile') is not None:
347 self.cookiejar.save()
349 def trouble(self, message=None, tb=None):
350 """Determine action to take when a download problem appears.
352 Depending on if the downloader has been configured to ignore
353 download errors or not, this method may throw an exception or
354 not when errors are found, after printing the message.
356 tb, if given, is additional traceback information.
358 if message is not None:
359 self.to_stderr(message)
360 if self.params.get('verbose'):
362 if sys.exc_info()[0]: # if .trouble has been called from an except block
364 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
365 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
366 tb += compat_str(traceback.format_exc())
368 tb_data = traceback.format_list(traceback.extract_stack())
369 tb = ''.join(tb_data)
371 if not self.params.get('ignoreerrors', False):
372 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
373 exc_info = sys.exc_info()[1].exc_info
375 exc_info = sys.exc_info()
376 raise DownloadError(message, exc_info)
377 self._download_retcode = 1
379 def report_warning(self, message):
381 Print the message to stderr, it will be prefixed with 'WARNING:'
382 If stderr is a tty file the 'WARNING:' will be colored
384 if self.params.get('logger') is not None:
385 self.params['logger'].warning(message)
387 if self.params.get('no_warnings'):
389 if self._err_file.isatty() and os.name != 'nt':
390 _msg_header = '\033[0;33mWARNING:\033[0m'
392 _msg_header = 'WARNING:'
393 warning_message = '%s %s' % (_msg_header, message)
394 self.to_stderr(warning_message)
396 def report_error(self, message, tb=None):
398 Do the same as trouble, but prefixes the message with 'ERROR:', colored
399 in red if stderr is a tty file.
401 if self._err_file.isatty() and os.name != 'nt':
402 _msg_header = '\033[0;31mERROR:\033[0m'
404 _msg_header = 'ERROR:'
405 error_message = '%s %s' % (_msg_header, message)
406 self.trouble(error_message, tb)
408 def report_file_already_downloaded(self, file_name):
409 """Report file has already been fully downloaded."""
411 self.to_screen('[download] %s has already been downloaded' % file_name)
412 except UnicodeEncodeError:
413 self.to_screen('[download] The file has already been downloaded')
415 def prepare_filename(self, info_dict):
416 """Generate the output filename."""
418 template_dict = dict(info_dict)
420 template_dict['epoch'] = int(time.time())
421 autonumber_size = self.params.get('autonumber_size')
422 if autonumber_size is None:
424 autonumber_templ = '%0' + str(autonumber_size) + 'd'
425 template_dict['autonumber'] = autonumber_templ % self._num_downloads
426 if template_dict.get('playlist_index') is not None:
427 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
428 if template_dict.get('resolution') is None:
429 if template_dict.get('width') and template_dict.get('height'):
430 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
431 elif template_dict.get('height'):
432 template_dict['resolution'] = '%sp' % template_dict['height']
433 elif template_dict.get('width'):
434 template_dict['resolution'] = '?x%d' % template_dict['width']
436 sanitize = lambda k, v: sanitize_filename(
438 restricted=self.params.get('restrictfilenames'),
440 template_dict = dict((k, sanitize(k, v))
441 for k, v in template_dict.items()
443 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
445 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
446 tmpl = os.path.expanduser(outtmpl)
447 filename = tmpl % template_dict
449 except ValueError as err:
450 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
453 def _match_entry(self, info_dict):
454 """ Returns None iff the file should be downloaded """
456 video_title = info_dict.get('title', info_dict.get('id', 'video'))
457 if 'title' in info_dict:
458 # This can happen when we're just evaluating the playlist
459 title = info_dict['title']
460 matchtitle = self.params.get('matchtitle', False)
462 if not re.search(matchtitle, title, re.IGNORECASE):
463 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
464 rejecttitle = self.params.get('rejecttitle', False)
466 if re.search(rejecttitle, title, re.IGNORECASE):
467 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
468 date = info_dict.get('upload_date', None)
470 dateRange = self.params.get('daterange', DateRange())
471 if date not in dateRange:
472 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
473 view_count = info_dict.get('view_count', None)
474 if view_count is not None:
475 min_views = self.params.get('min_views')
476 if min_views is not None and view_count < min_views:
477 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
478 max_views = self.params.get('max_views')
479 if max_views is not None and view_count > max_views:
480 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
481 age_limit = self.params.get('age_limit')
482 if age_limit is not None:
483 if age_limit < info_dict.get('age_limit', 0):
484 return 'Skipping "' + title + '" because it is age restricted'
485 if self.in_download_archive(info_dict):
486 return '%s has already been recorded in archive' % video_title
490 def add_extra_info(info_dict, extra_info):
491 '''Set the keys from extra_info in info dict if they are missing'''
492 for key, value in extra_info.items():
493 info_dict.setdefault(key, value)
495 def extract_info(self, url, download=True, ie_key=None, extra_info={},
498 Returns a list with a dictionary for each video we find.
499 If 'download', also downloads the videos.
500 extra_info is a dict containing the extra values to add to each result
504 ies = [self.get_info_extractor(ie_key)]
509 if not ie.suitable(url):
513 self.report_warning('The program functionality for this site has been marked as broken, '
514 'and will probably not work.')
517 ie_result = ie.extract(url)
518 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
520 if isinstance(ie_result, list):
521 # Backwards compatibility: old IE result format
523 '_type': 'compat_list',
524 'entries': ie_result,
526 self.add_default_extra_info(ie_result, ie, url)
528 return self.process_ie_result(ie_result, download, extra_info)
531 except ExtractorError as de: # An error we somewhat expected
532 self.report_error(compat_str(de), de.format_traceback())
534 except MaxDownloadsReached:
536 except Exception as e:
537 if self.params.get('ignoreerrors', False):
538 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
543 self.report_error('no suitable InfoExtractor for URL %s' % url)
545 def add_default_extra_info(self, ie_result, ie, url):
546 self.add_extra_info(ie_result, {
547 'extractor': ie.IE_NAME,
549 'webpage_url_basename': url_basename(url),
550 'extractor_key': ie.ie_key(),
553 def process_ie_result(self, ie_result, download=True, extra_info={}):
555 Take the result of the ie(may be modified) and resolve all unresolved
556 references (URLs, playlist items).
558 It will also download the videos if 'download'.
559 Returns the resolved ie_result.
562 result_type = ie_result.get('_type', 'video')
564 if self.params.get('extract_flat', False):
565 if result_type in ('url', 'url_transparent'):
568 if result_type == 'video':
569 self.add_extra_info(ie_result, extra_info)
570 return self.process_video_result(ie_result, download=download)
571 elif result_type == 'url':
572 # We have to add extra_info to the results because it may be
573 # contained in a playlist
574 return self.extract_info(ie_result['url'],
576 ie_key=ie_result.get('ie_key'),
577 extra_info=extra_info)
578 elif result_type == 'url_transparent':
579 # Use the information from the embedding page
580 info = self.extract_info(
581 ie_result['url'], ie_key=ie_result.get('ie_key'),
582 extra_info=extra_info, download=False, process=False)
584 def make_result(embedded_info):
585 new_result = ie_result.copy()
586 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
587 'entries', 'ie_key', 'duration',
588 'subtitles', 'annotations', 'format',
589 'thumbnail', 'thumbnails'):
592 if f in embedded_info:
593 new_result[f] = embedded_info[f]
595 new_result = make_result(info)
597 assert new_result.get('_type') != 'url_transparent'
598 if new_result.get('_type') == 'compat_list':
599 new_result['entries'] = [
600 make_result(e) for e in new_result['entries']]
602 return self.process_ie_result(
603 new_result, download=download, extra_info=extra_info)
604 elif result_type == 'playlist':
605 # We process each entry in the playlist
606 playlist = ie_result.get('title', None) or ie_result.get('id', None)
607 self.to_screen('[download] Downloading playlist: %s' % playlist)
609 playlist_results = []
611 playliststart = self.params.get('playliststart', 1) - 1
612 playlistend = self.params.get('playlistend', None)
613 # For backwards compatibility, interpret -1 as whole list
614 if playlistend == -1:
617 if isinstance(ie_result['entries'], list):
618 n_all_entries = len(ie_result['entries'])
619 entries = ie_result['entries'][playliststart:playlistend]
620 n_entries = len(entries)
622 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
623 (ie_result['extractor'], playlist, n_all_entries, n_entries))
625 assert isinstance(ie_result['entries'], PagedList)
626 entries = ie_result['entries'].getslice(
627 playliststart, playlistend)
628 n_entries = len(entries)
630 "[%s] playlist %s: Downloading %d videos" %
631 (ie_result['extractor'], playlist, n_entries))
633 for i, entry in enumerate(entries, 1):
634 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
636 'playlist': playlist,
637 'playlist_index': i + playliststart,
638 'extractor': ie_result['extractor'],
639 'webpage_url': ie_result['webpage_url'],
640 'webpage_url_basename': url_basename(ie_result['webpage_url']),
641 'extractor_key': ie_result['extractor_key'],
644 reason = self._match_entry(entry)
645 if reason is not None:
646 self.to_screen('[download] ' + reason)
649 entry_result = self.process_ie_result(entry,
652 playlist_results.append(entry_result)
653 ie_result['entries'] = playlist_results
655 elif result_type == 'compat_list':
657 self.add_extra_info(r,
659 'extractor': ie_result['extractor'],
660 'webpage_url': ie_result['webpage_url'],
661 'webpage_url_basename': url_basename(ie_result['webpage_url']),
662 'extractor_key': ie_result['extractor_key'],
665 ie_result['entries'] = [
666 self.process_ie_result(_fixup(r), download, extra_info)
667 for r in ie_result['entries']
671 raise Exception('Invalid result type: %s' % result_type)
673 def select_format(self, format_spec, available_formats):
674 if format_spec == 'best' or format_spec is None:
675 return available_formats[-1]
676 elif format_spec == 'worst':
677 return available_formats[0]
678 elif format_spec == 'bestaudio':
680 f for f in available_formats
681 if f.get('vcodec') == 'none']
683 return audio_formats[-1]
684 elif format_spec == 'worstaudio':
686 f for f in available_formats
687 if f.get('vcodec') == 'none']
689 return audio_formats[0]
690 elif format_spec == 'bestvideo':
692 f for f in available_formats
693 if f.get('acodec') == 'none']
695 return video_formats[-1]
696 elif format_spec == 'worstvideo':
698 f for f in available_formats
699 if f.get('acodec') == 'none']
701 return video_formats[0]
703 extensions = ['mp4', 'flv', 'webm', '3gp']
704 if format_spec in extensions:
705 filter_f = lambda f: f['ext'] == format_spec
707 filter_f = lambda f: f['format_id'] == format_spec
708 matches = list(filter(filter_f, available_formats))
713 def process_video_result(self, info_dict, download=True):
714 assert info_dict.get('_type', 'video') == 'video'
716 if 'id' not in info_dict:
717 raise ExtractorError('Missing "id" field in extractor result')
718 if 'title' not in info_dict:
719 raise ExtractorError('Missing "title" field in extractor result')
721 if 'playlist' not in info_dict:
722 # It isn't part of a playlist
723 info_dict['playlist'] = None
724 info_dict['playlist_index'] = None
726 thumbnails = info_dict.get('thumbnails')
728 thumbnails.sort(key=lambda t: (
729 t.get('width'), t.get('height'), t.get('url')))
731 if 'width' in t and 'height' in t:
732 t['resolution'] = '%dx%d' % (t['width'], t['height'])
734 if thumbnails and 'thumbnail' not in info_dict:
735 info_dict['thumbnail'] = thumbnails[-1]['url']
737 if 'display_id' not in info_dict and 'id' in info_dict:
738 info_dict['display_id'] = info_dict['id']
740 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
741 upload_date = datetime.datetime.utcfromtimestamp(
742 info_dict['timestamp'])
743 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
745 # This extractors handle format selection themselves
746 if info_dict['extractor'] in ['Youku']:
748 self.process_info(info_dict)
751 # We now pick which formats have to be downloaded
752 if info_dict.get('formats') is None:
753 # There's only one format available
754 formats = [info_dict]
756 formats = info_dict['formats']
759 raise ExtractorError('No video formats found!')
761 # We check that all the formats have the format and format_id fields
762 for i, format in enumerate(formats):
763 if 'url' not in format:
764 raise ExtractorError('Missing "url" key in result (index %d)' % i)
766 if format.get('format_id') is None:
767 format['format_id'] = compat_str(i)
768 if format.get('format') is None:
769 format['format'] = '{id} - {res}{note}'.format(
770 id=format['format_id'],
771 res=self.format_resolution(format),
772 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
774 # Automatically determine file extension if missing
775 if 'ext' not in format:
776 format['ext'] = determine_ext(format['url']).lower()
778 format_limit = self.params.get('format_limit', None)
780 formats = list(takewhile_inclusive(
781 lambda f: f['format_id'] != format_limit, formats
784 # TODO Central sorting goes here
786 if formats[0] is not info_dict:
787 # only set the 'formats' fields if the original info_dict list them
788 # otherwise we end up with a circular reference, the first (and unique)
789 # element in the 'formats' field in info_dict is info_dict itself,
790 # wich can't be exported to json
791 info_dict['formats'] = formats
792 if self.params.get('listformats', None):
793 self.list_formats(info_dict)
796 req_format = self.params.get('format')
797 if req_format is None:
799 formats_to_download = []
800 # The -1 is for supporting YoutubeIE
801 if req_format in ('-1', 'all'):
802 formats_to_download = formats
804 # We can accept formats requested in the format: 34/5/best, we pick
805 # the first that is available, starting from left
806 req_formats = req_format.split('/')
807 for rf in req_formats:
808 if re.match(r'.+?\+.+?', rf) is not None:
809 # Two formats have been requested like '137+139'
810 format_1, format_2 = rf.split('+')
811 formats_info = (self.select_format(format_1, formats),
812 self.select_format(format_2, formats))
813 if all(formats_info):
815 'requested_formats': formats_info,
817 'ext': formats_info[0]['ext'],
820 selected_format = None
822 selected_format = self.select_format(rf, formats)
823 if selected_format is not None:
824 formats_to_download = [selected_format]
826 if not formats_to_download:
827 raise ExtractorError('requested format not available',
831 if len(formats_to_download) > 1:
832 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
833 for format in formats_to_download:
834 new_info = dict(info_dict)
835 new_info.update(format)
836 self.process_info(new_info)
837 # We update the info dict with the best quality format (backwards compatibility)
838 info_dict.update(formats_to_download[-1])
841 def process_info(self, info_dict):
842 """Process a single resolved IE result."""
844 assert info_dict.get('_type', 'video') == 'video'
846 max_downloads = self.params.get('max_downloads')
847 if max_downloads is not None:
848 if self._num_downloads >= int(max_downloads):
849 raise MaxDownloadsReached()
851 info_dict['fulltitle'] = info_dict['title']
852 if len(info_dict['title']) > 200:
853 info_dict['title'] = info_dict['title'][:197] + '...'
855 # Keep for backwards compatibility
856 info_dict['stitle'] = info_dict['title']
858 if 'format' not in info_dict:
859 info_dict['format'] = info_dict['ext']
861 reason = self._match_entry(info_dict)
862 if reason is not None:
863 self.to_screen('[download] ' + reason)
866 self._num_downloads += 1
868 filename = self.prepare_filename(info_dict)
871 if self.params.get('forcetitle', False):
872 self.to_stdout(info_dict['fulltitle'])
873 if self.params.get('forceid', False):
874 self.to_stdout(info_dict['id'])
875 if self.params.get('forceurl', False):
876 # For RTMP URLs, also include the playpath
877 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
878 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
879 self.to_stdout(info_dict['thumbnail'])
880 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
881 self.to_stdout(info_dict['description'])
882 if self.params.get('forcefilename', False) and filename is not None:
883 self.to_stdout(filename)
884 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
885 self.to_stdout(formatSeconds(info_dict['duration']))
886 if self.params.get('forceformat', False):
887 self.to_stdout(info_dict['format'])
888 if self.params.get('forcejson', False):
889 info_dict['_filename'] = filename
890 self.to_stdout(json.dumps(info_dict))
892 # Do nothing else if in simulate mode
893 if self.params.get('simulate', False):
900 dn = os.path.dirname(encodeFilename(filename))
901 if dn and not os.path.exists(dn):
903 except (OSError, IOError) as err:
904 self.report_error('unable to create directory ' + compat_str(err))
907 if self.params.get('writedescription', False):
908 descfn = filename + '.description'
909 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
910 self.to_screen('[info] Video description is already present')
913 self.to_screen('[info] Writing video description to: ' + descfn)
914 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
915 descfile.write(info_dict['description'])
916 except (KeyError, TypeError):
917 self.report_warning('There\'s no description to write.')
918 except (OSError, IOError):
919 self.report_error('Cannot write description file ' + descfn)
922 if self.params.get('writeannotations', False):
923 annofn = filename + '.annotations.xml'
924 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
925 self.to_screen('[info] Video annotations are already present')
928 self.to_screen('[info] Writing video annotations to: ' + annofn)
929 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
930 annofile.write(info_dict['annotations'])
931 except (KeyError, TypeError):
932 self.report_warning('There are no annotations to write.')
933 except (OSError, IOError):
934 self.report_error('Cannot write annotations file: ' + annofn)
937 subtitles_are_requested = any([self.params.get('writesubtitles', False),
938 self.params.get('writeautomaticsub')])
940 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
941 # subtitles download errors are already managed as troubles in relevant IE
942 # that way it will silently go on when used with unsupporting IE
943 subtitles = info_dict['subtitles']
944 sub_format = self.params.get('subtitlesformat', 'srt')
945 for sub_lang in subtitles.keys():
946 sub = subtitles[sub_lang]
950 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
951 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
952 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
954 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
955 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
957 except (OSError, IOError):
958 self.report_error('Cannot write subtitles file ' + sub_filename)
961 if self.params.get('writeinfojson', False):
962 infofn = os.path.splitext(filename)[0] + '.info.json'
963 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
964 self.to_screen('[info] Video description metadata is already present')
966 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
968 write_json_file(info_dict, encodeFilename(infofn))
969 except (OSError, IOError):
970 self.report_error('Cannot write metadata to JSON file ' + infofn)
973 if self.params.get('writethumbnail', False):
974 if info_dict.get('thumbnail') is not None:
975 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
976 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
977 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
978 self.to_screen('[%s] %s: Thumbnail is already present' %
979 (info_dict['extractor'], info_dict['id']))
981 self.to_screen('[%s] %s: Downloading thumbnail ...' %
982 (info_dict['extractor'], info_dict['id']))
984 uf = self.urlopen(info_dict['thumbnail'])
985 with open(thumb_filename, 'wb') as thumbf:
986 shutil.copyfileobj(uf, thumbf)
987 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
988 (info_dict['extractor'], info_dict['id'], thumb_filename))
989 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
990 self.report_warning('Unable to download thumbnail "%s": %s' %
991 (info_dict['thumbnail'], compat_str(err)))
993 if not self.params.get('skip_download', False):
994 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
999 fd = get_suitable_downloader(info)(self, self.params)
1000 for ph in self._progress_hooks:
1001 fd.add_progress_hook(ph)
1002 if self.params.get('verbose'):
1003 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1004 return fd.download(name, info)
1005 if info_dict.get('requested_formats') is not None:
1008 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1009 if not merger._get_executable():
1011 self.report_warning('You have requested multiple '
1012 'formats but ffmpeg or avconv are not installed.'
1013 ' The formats won\'t be merged')
1015 postprocessors = [merger]
1016 for f in info_dict['requested_formats']:
1017 new_info = dict(info_dict)
1019 fname = self.prepare_filename(new_info)
1020 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1021 downloaded.append(fname)
1022 partial_success = dl(fname, new_info)
1023 success = success and partial_success
1024 info_dict['__postprocessors'] = postprocessors
1025 info_dict['__files_to_merge'] = downloaded
1027 # Just a single file
1028 success = dl(filename, info_dict)
1029 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1030 self.report_error('unable to download video data: %s' % str(err))
1032 except (OSError, IOError) as err:
1033 raise UnavailableVideoError(err)
1034 except (ContentTooShortError, ) as err:
1035 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1040 self.post_process(filename, info_dict)
1041 except (PostProcessingError) as err:
1042 self.report_error('postprocessing: %s' % str(err))
1045 self.record_download_archive(info_dict)
1047 def download(self, url_list):
1048 """Download a given list of URLs."""
1049 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1050 if (len(url_list) > 1 and
1052 and self.params.get('max_downloads') != 1):
1053 raise SameFileError(outtmpl)
1055 for url in url_list:
1057 #It also downloads the videos
1058 self.extract_info(url)
1059 except UnavailableVideoError:
1060 self.report_error('unable to download video')
1061 except MaxDownloadsReached:
1062 self.to_screen('[info] Maximum number of downloaded files reached.')
1065 return self._download_retcode
1067 def download_with_info_file(self, info_filename):
1068 with io.open(info_filename, 'r', encoding='utf-8') as f:
1071 self.process_ie_result(info, download=True)
1072 except DownloadError:
1073 webpage_url = info.get('webpage_url')
1074 if webpage_url is not None:
1075 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1076 return self.download([webpage_url])
1079 return self._download_retcode
1081 def post_process(self, filename, ie_info):
1082 """Run all the postprocessors on the given file."""
1083 info = dict(ie_info)
1084 info['filepath'] = filename
1087 if ie_info.get('__postprocessors') is not None:
1088 pps_chain.extend(ie_info['__postprocessors'])
1089 pps_chain.extend(self._pps)
1090 for pp in pps_chain:
1092 keep_video_wish, new_info = pp.run(info)
1093 if keep_video_wish is not None:
1095 keep_video = keep_video_wish
1096 elif keep_video is None:
1097 # No clear decision yet, let IE decide
1098 keep_video = keep_video_wish
1099 except PostProcessingError as e:
1100 self.report_error(e.msg)
1101 if keep_video is False and not self.params.get('keepvideo', False):
1103 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1104 os.remove(encodeFilename(filename))
1105 except (IOError, OSError):
1106 self.report_warning('Unable to remove downloaded video file')
1108 def _make_archive_id(self, info_dict):
1109 # Future-proof against any change in case
1110 # and backwards compatibility with prior versions
1111 extractor = info_dict.get('extractor_key')
1112 if extractor is None:
1113 if 'id' in info_dict:
1114 extractor = info_dict.get('ie_key') # key in a playlist
1115 if extractor is None:
1116 return None # Incomplete video information
1117 return extractor.lower() + ' ' + info_dict['id']
1119 def in_download_archive(self, info_dict):
1120 fn = self.params.get('download_archive')
1124 vid_id = self._make_archive_id(info_dict)
1126 return False # Incomplete video information
1129 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1130 for line in archive_file:
1131 if line.strip() == vid_id:
1133 except IOError as ioe:
1134 if ioe.errno != errno.ENOENT:
1138 def record_download_archive(self, info_dict):
1139 fn = self.params.get('download_archive')
1142 vid_id = self._make_archive_id(info_dict)
1144 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1145 archive_file.write(vid_id + '\n')
1148 def format_resolution(format, default='unknown'):
1149 if format.get('vcodec') == 'none':
1151 if format.get('resolution') is not None:
1152 return format['resolution']
1153 if format.get('height') is not None:
1154 if format.get('width') is not None:
1155 res = '%sx%s' % (format['width'], format['height'])
1157 res = '%sp' % format['height']
1158 elif format.get('width') is not None:
1159 res = '?x%d' % format['width']
1164 def _format_note(self, fdict):
1166 if fdict.get('ext') in ['f4f', 'f4m']:
1167 res += '(unsupported) '
1168 if fdict.get('format_note') is not None:
1169 res += fdict['format_note'] + ' '
1170 if fdict.get('tbr') is not None:
1171 res += '%4dk ' % fdict['tbr']
1172 if fdict.get('container') is not None:
1175 res += '%s container' % fdict['container']
1176 if (fdict.get('vcodec') is not None and
1177 fdict.get('vcodec') != 'none'):
1180 res += fdict['vcodec']
1181 if fdict.get('vbr') is not None:
1183 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1185 if fdict.get('vbr') is not None:
1186 res += '%4dk' % fdict['vbr']
1187 if fdict.get('acodec') is not None:
1190 if fdict['acodec'] == 'none':
1193 res += '%-5s' % fdict['acodec']
1194 elif fdict.get('abr') is not None:
1198 if fdict.get('abr') is not None:
1199 res += '@%3dk' % fdict['abr']
1200 if fdict.get('asr') is not None:
1201 res += ' (%5dHz)' % fdict['asr']
1202 if fdict.get('filesize') is not None:
1205 res += format_bytes(fdict['filesize'])
1206 elif fdict.get('filesize_approx') is not None:
1209 res += '~' + format_bytes(fdict['filesize_approx'])
1212 def list_formats(self, info_dict):
1213 def line(format, idlen=20):
1214 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1215 format['format_id'],
1217 self.format_resolution(format),
1218 self._format_note(format),
1221 formats = info_dict.get('formats', [info_dict])
1222 idlen = max(len('format code'),
1223 max(len(f['format_id']) for f in formats))
1224 formats_s = [line(f, idlen) for f in formats]
1225 if len(formats) > 1:
1226 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1227 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1229 header_line = line({
1230 'format_id': 'format code', 'ext': 'extension',
1231 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1232 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1233 (info_dict['id'], header_line, '\n'.join(formats_s)))
1235 def urlopen(self, req):
1236 """ Start an HTTP download """
1237 return self._opener.open(req, timeout=self._socket_timeout)
1239 def print_debug_header(self):
1240 if not self.params.get('verbose'):
1243 if type('') is not compat_str:
1244 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1245 self.report_warning(
1246 'Your Python is broken! Update to a newer and supported version')
1249 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1250 locale.getpreferredencoding(),
1251 sys.getfilesystemencoding(),
1252 sys.stdout.encoding,
1253 self.get_encoding()))
1254 write_string(encoding_str, encoding=None)
1256 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1258 sp = subprocess.Popen(
1259 ['git', 'rev-parse', '--short', 'HEAD'],
1260 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1261 cwd=os.path.dirname(os.path.abspath(__file__)))
1262 out, err = sp.communicate()
1263 out = out.decode().strip()
1264 if re.match('[0-9a-f]+', out):
1265 self._write_string('[debug] Git HEAD: ' + out + '\n')
1271 self._write_string('[debug] Python version %s - %s' %
1272 (platform.python_version(), platform_name()) + '\n')
1275 for handler in self._opener.handlers:
1276 if hasattr(handler, 'proxies'):
1277 proxy_map.update(handler.proxies)
1278 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1280 def _setup_opener(self):
1281 timeout_val = self.params.get('socket_timeout')
1282 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1284 opts_cookiefile = self.params.get('cookiefile')
1285 opts_proxy = self.params.get('proxy')
1287 if opts_cookiefile is None:
1288 self.cookiejar = compat_cookiejar.CookieJar()
1290 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1292 if os.access(opts_cookiefile, os.R_OK):
1293 self.cookiejar.load()
1295 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1297 if opts_proxy is not None:
1298 if opts_proxy == '':
1301 proxies = {'http': opts_proxy, 'https': opts_proxy}
1303 proxies = compat_urllib_request.getproxies()
1304 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1305 if 'http' in proxies and 'https' not in proxies:
1306 proxies['https'] = proxies['http']
1307 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1309 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1310 https_handler = make_HTTPS_handler(
1311 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1312 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1313 opener = compat_urllib_request.build_opener(
1314 https_handler, proxy_handler, cookie_processor, ydlh)
1315 # Delete the default user-agent header, which would otherwise apply in
1316 # cases where our custom HTTP handler doesn't come into play
1317 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1318 opener.addheaders = []
1319 self._opener = opener
1321 def encode(self, s):
1322 if isinstance(s, bytes):
1323 return s # Already encoded
1326 return s.encode(self.get_encoding())
1327 except UnicodeEncodeError as err:
1328 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1331 def get_encoding(self):
1332 encoding = self.params.get('encoding')
1333 if encoding is None:
1334 encoding = preferredencoding()