2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
53 UnavailableVideoError,
60 from .cache import Cache
61 from .extractor import get_info_extractor, gen_extractors
62 from .downloader import get_suitable_downloader
63 from .postprocessor import FFmpegMergerPP
64 from .version import __version__
67 class YoutubeDL(object):
70 YoutubeDL objects are the ones responsible of downloading the
71 actual video file and writing it to disk if the user has requested
72 it, among some other tasks. In most cases there should be one per
73 program. As, given a video URL, the downloader doesn't know how to
74 extract all the needed information, task that InfoExtractors do, it
75 has to pass the URL to one of them.
77 For this, YoutubeDL objects have a method that allows
78 InfoExtractors to be registered in a given order. When it is passed
79 a URL, the YoutubeDL object handles it to the first InfoExtractor it
80 finds that reports being able to handle it. The InfoExtractor extracts
81 all the information about the video or videos the URL refers to, and
82 YoutubeDL process the extracted information, possibly using a File
83 Downloader to download the video.
85 YoutubeDL objects accept a lot of parameters. In order not to saturate
86 the object constructor with arguments, it receives a dictionary of
87 options instead. These options are available through the params
88 attribute for the InfoExtractors to use. The YoutubeDL also
89 registers itself as the downloader in charge for the InfoExtractors
90 that are added to it, so this is a "mutual registration".
94 username: Username for authentication purposes.
95 password: Password for authentication purposes.
96 videopassword: Password for acces a video.
97 usenetrc: Use netrc for authentication instead.
98 verbose: Print additional info to stdout.
99 quiet: Do not print messages to stdout.
100 no_warnings: Do not print out anything for warnings.
101 forceurl: Force printing final URL.
102 forcetitle: Force printing title.
103 forceid: Force printing ID.
104 forcethumbnail: Force printing thumbnail URL.
105 forcedescription: Force printing description.
106 forcefilename: Force printing final filename.
107 forceduration: Force printing duration.
108 forcejson: Force printing info_dict as JSON.
109 simulate: Do not download the video files.
110 format: Video format code.
111 format_limit: Highest quality format to try.
112 outtmpl: Template for output names.
113 restrictfilenames: Do not allow "&" and spaces in file names
114 ignoreerrors: Do not stop on download errors.
115 nooverwrites: Prevent overwriting files.
116 playliststart: Playlist item to start at.
117 playlistend: Playlist item to end at.
118 matchtitle: Download only matching titles.
119 rejecttitle: Reject downloads for matching titles.
120 logger: Log messages to a logging.Logger instance.
121 logtostderr: Log messages to stderr instead of stdout.
122 writedescription: Write the video description to a .description file
123 writeinfojson: Write the video description to a .info.json file
124 writeannotations: Write the video annotations to a .annotations.xml file
125 writethumbnail: Write the thumbnail image to a file
126 writesubtitles: Write the video subtitles to a file
127 writeautomaticsub: Write the automatic subtitles to a file
128 allsubtitles: Downloads all the subtitles of the video
129 (requires writesubtitles or writeautomaticsub)
130 listsubtitles: Lists all available subtitles for the video
131 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
132 subtitleslangs: List of languages of the subtitles to download
133 keepvideo: Keep the video file after post-processing
134 daterange: A DateRange object, download only if the upload_date is in the range.
135 skip_download: Skip the actual download of the video file
136 cachedir: Location of the cache files in the filesystem.
137 False to disable filesystem cache.
138 noplaylist: Download single video instead of a playlist if in doubt.
139 age_limit: An integer representing the user's age in years.
140 Unsuitable videos for the given age are skipped.
141 min_views: An integer representing the minimum view count the video
142 must have in order to not be skipped.
143 Videos without view count information are always
144 downloaded. None for no limit.
145 max_views: An integer representing the maximum view count.
146 Videos that are more popular than that are not
148 Videos without view count information are always
149 downloaded. None for no limit.
150 download_archive: File name of a file where all downloads are recorded.
151 Videos already present in the file are not downloaded
153 cookiefile: File name where cookies should be read from and dumped to.
154 nocheckcertificate:Do not verify SSL certificates
155 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
156 At the moment, this is only supported by YouTube.
157 proxy: URL of the proxy server to use
158 socket_timeout: Time to wait for unresponsive hosts, in seconds
159 bidi_workaround: Work around buggy terminals without bidirectional text
160 support, using fridibi
161 debug_printtraffic:Print out sent and received HTTP traffic
162 include_ads: Download ads as well
163 default_search: Prepend this string if an input url is not valid.
164 'auto' for elaborate guessing
165 encoding: Use this encoding instead of the system-specified.
166 extract_flat: Do not resolve URLs, return the immediate result.
168 The following parameters are not used by YoutubeDL itself, they are used by
170 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
171 noresizebuffer, retries, continuedl, noprogress, consoletitle
173 The following options are used by the post processors:
174 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
175 otherwise prefer avconv.
176 exec_cmd: Arbitrary command to run after downloading
182 _download_retcode = None
183 _num_downloads = None
186 def __init__(self, params=None):
187 """Create a FileDownloader object with the given options."""
191 self._ies_instances = {}
193 self._progress_hooks = []
194 self._download_retcode = 0
195 self._num_downloads = 0
196 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
197 self._err_file = sys.stderr
199 self.cache = Cache(self)
201 if params.get('bidi_workaround', False):
204 master, slave = pty.openpty()
205 width = get_term_width()
209 width_args = ['-w', str(width)]
211 stdin=subprocess.PIPE,
213 stderr=self._err_file)
215 self._output_process = subprocess.Popen(
216 ['bidiv'] + width_args, **sp_kwargs
219 self._output_process = subprocess.Popen(
220 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
221 self._output_channel = os.fdopen(master, 'rb')
222 except OSError as ose:
224 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
228 if (sys.version_info >= (3,) and sys.platform != 'win32' and
229 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
230 and not params['restrictfilenames']):
231 # On Python 3, the Unicode filesystem API will throw errors (#1474)
233 'Assuming --restrict-filenames since file system encoding '
234 'cannot encode all charactes. '
235 'Set the LC_ALL environment variable to fix this.')
236 self.params['restrictfilenames'] = True
238 if '%(stitle)s' in self.params.get('outtmpl', ''):
239 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
243 def add_info_extractor(self, ie):
244 """Add an InfoExtractor object to the end of the list."""
246 self._ies_instances[ie.ie_key()] = ie
247 ie.set_downloader(self)
249 def get_info_extractor(self, ie_key):
251 Get an instance of an IE with name ie_key, it will try to get one from
252 the _ies list, if there's no instance it will create a new one and add
253 it to the extractor list.
255 ie = self._ies_instances.get(ie_key)
257 ie = get_info_extractor(ie_key)()
258 self.add_info_extractor(ie)
261 def add_default_info_extractors(self):
263 Add the InfoExtractors returned by gen_extractors to the end of the list
265 for ie in gen_extractors():
266 self.add_info_extractor(ie)
268 def add_post_processor(self, pp):
269 """Add a PostProcessor object to the end of the chain."""
271 pp.set_downloader(self)
273 def add_progress_hook(self, ph):
274 """Add the progress hook (currently only for the file downloader)"""
275 self._progress_hooks.append(ph)
277 def _bidi_workaround(self, message):
278 if not hasattr(self, '_output_channel'):
281 assert hasattr(self, '_output_process')
282 assert isinstance(message, compat_str)
283 line_count = message.count('\n') + 1
284 self._output_process.stdin.write((message + '\n').encode('utf-8'))
285 self._output_process.stdin.flush()
286 res = ''.join(self._output_channel.readline().decode('utf-8')
287 for _ in range(line_count))
288 return res[:-len('\n')]
290 def to_screen(self, message, skip_eol=False):
291 """Print message to stdout if not in quiet mode."""
292 return self.to_stdout(message, skip_eol, check_quiet=True)
294 def _write_string(self, s, out=None):
295 write_string(s, out=out, encoding=self.params.get('encoding'))
297 def to_stdout(self, message, skip_eol=False, check_quiet=False):
298 """Print message to stdout if not in quiet mode."""
299 if self.params.get('logger'):
300 self.params['logger'].debug(message)
301 elif not check_quiet or not self.params.get('quiet', False):
302 message = self._bidi_workaround(message)
303 terminator = ['\n', ''][skip_eol]
304 output = message + terminator
306 self._write_string(output, self._screen_file)
308 def to_stderr(self, message):
309 """Print message to stderr."""
310 assert isinstance(message, compat_str)
311 if self.params.get('logger'):
312 self.params['logger'].error(message)
314 message = self._bidi_workaround(message)
315 output = message + '\n'
316 self._write_string(output, self._err_file)
318 def to_console_title(self, message):
319 if not self.params.get('consoletitle', False):
321 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
322 # c_wchar_p() might not be necessary if `message` is
323 # already of type unicode()
324 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
325 elif 'TERM' in os.environ:
326 self._write_string('\033]0;%s\007' % message, self._screen_file)
328 def save_console_title(self):
329 if not self.params.get('consoletitle', False):
331 if 'TERM' in os.environ:
332 # Save the title on stack
333 self._write_string('\033[22;0t', self._screen_file)
335 def restore_console_title(self):
336 if not self.params.get('consoletitle', False):
338 if 'TERM' in os.environ:
339 # Restore the title from stack
340 self._write_string('\033[23;0t', self._screen_file)
343 self.save_console_title()
346 def __exit__(self, *args):
347 self.restore_console_title()
349 if self.params.get('cookiefile') is not None:
350 self.cookiejar.save()
352 def trouble(self, message=None, tb=None):
353 """Determine action to take when a download problem appears.
355 Depending on if the downloader has been configured to ignore
356 download errors or not, this method may throw an exception or
357 not when errors are found, after printing the message.
359 tb, if given, is additional traceback information.
361 if message is not None:
362 self.to_stderr(message)
363 if self.params.get('verbose'):
365 if sys.exc_info()[0]: # if .trouble has been called from an except block
367 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
368 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
369 tb += compat_str(traceback.format_exc())
371 tb_data = traceback.format_list(traceback.extract_stack())
372 tb = ''.join(tb_data)
374 if not self.params.get('ignoreerrors', False):
375 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
376 exc_info = sys.exc_info()[1].exc_info
378 exc_info = sys.exc_info()
379 raise DownloadError(message, exc_info)
380 self._download_retcode = 1
382 def report_warning(self, message):
384 Print the message to stderr, it will be prefixed with 'WARNING:'
385 If stderr is a tty file the 'WARNING:' will be colored
387 if self.params.get('logger') is not None:
388 self.params['logger'].warning(message)
390 if self.params.get('no_warnings'):
392 if self._err_file.isatty() and os.name != 'nt':
393 _msg_header = '\033[0;33mWARNING:\033[0m'
395 _msg_header = 'WARNING:'
396 warning_message = '%s %s' % (_msg_header, message)
397 self.to_stderr(warning_message)
399 def report_error(self, message, tb=None):
401 Do the same as trouble, but prefixes the message with 'ERROR:', colored
402 in red if stderr is a tty file.
404 if self._err_file.isatty() and os.name != 'nt':
405 _msg_header = '\033[0;31mERROR:\033[0m'
407 _msg_header = 'ERROR:'
408 error_message = '%s %s' % (_msg_header, message)
409 self.trouble(error_message, tb)
411 def report_file_already_downloaded(self, file_name):
412 """Report file has already been fully downloaded."""
414 self.to_screen('[download] %s has already been downloaded' % file_name)
415 except UnicodeEncodeError:
416 self.to_screen('[download] The file has already been downloaded')
418 def prepare_filename(self, info_dict):
419 """Generate the output filename."""
421 template_dict = dict(info_dict)
423 template_dict['epoch'] = int(time.time())
424 autonumber_size = self.params.get('autonumber_size')
425 if autonumber_size is None:
427 autonumber_templ = '%0' + str(autonumber_size) + 'd'
428 template_dict['autonumber'] = autonumber_templ % self._num_downloads
429 if template_dict.get('playlist_index') is not None:
430 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
431 if template_dict.get('resolution') is None:
432 if template_dict.get('width') and template_dict.get('height'):
433 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
434 elif template_dict.get('height'):
435 template_dict['resolution'] = '%sp' % template_dict['height']
436 elif template_dict.get('width'):
437 template_dict['resolution'] = '?x%d' % template_dict['width']
439 sanitize = lambda k, v: sanitize_filename(
441 restricted=self.params.get('restrictfilenames'),
443 template_dict = dict((k, sanitize(k, v))
444 for k, v in template_dict.items()
446 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
448 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
449 tmpl = os.path.expanduser(outtmpl)
450 filename = tmpl % template_dict
452 except ValueError as err:
453 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
456 def _match_entry(self, info_dict):
457 """ Returns None iff the file should be downloaded """
459 video_title = info_dict.get('title', info_dict.get('id', 'video'))
460 if 'title' in info_dict:
461 # This can happen when we're just evaluating the playlist
462 title = info_dict['title']
463 matchtitle = self.params.get('matchtitle', False)
465 if not re.search(matchtitle, title, re.IGNORECASE):
466 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
467 rejecttitle = self.params.get('rejecttitle', False)
469 if re.search(rejecttitle, title, re.IGNORECASE):
470 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
471 date = info_dict.get('upload_date', None)
473 dateRange = self.params.get('daterange', DateRange())
474 if date not in dateRange:
475 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
476 view_count = info_dict.get('view_count', None)
477 if view_count is not None:
478 min_views = self.params.get('min_views')
479 if min_views is not None and view_count < min_views:
480 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
481 max_views = self.params.get('max_views')
482 if max_views is not None and view_count > max_views:
483 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
484 age_limit = self.params.get('age_limit')
485 if age_limit is not None:
486 actual_age_limit = info_dict.get('age_limit')
487 if actual_age_limit is None:
489 if age_limit < actual_age_limit:
490 return 'Skipping "' + title + '" because it is age restricted'
491 if self.in_download_archive(info_dict):
492 return '%s has already been recorded in archive' % video_title
496 def add_extra_info(info_dict, extra_info):
497 '''Set the keys from extra_info in info dict if they are missing'''
498 for key, value in extra_info.items():
499 info_dict.setdefault(key, value)
501 def extract_info(self, url, download=True, ie_key=None, extra_info={},
504 Returns a list with a dictionary for each video we find.
505 If 'download', also downloads the videos.
506 extra_info is a dict containing the extra values to add to each result
510 ies = [self.get_info_extractor(ie_key)]
515 if not ie.suitable(url):
519 self.report_warning('The program functionality for this site has been marked as broken, '
520 'and will probably not work.')
523 ie_result = ie.extract(url)
524 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
526 if isinstance(ie_result, list):
527 # Backwards compatibility: old IE result format
529 '_type': 'compat_list',
530 'entries': ie_result,
532 self.add_default_extra_info(ie_result, ie, url)
534 return self.process_ie_result(ie_result, download, extra_info)
537 except ExtractorError as de: # An error we somewhat expected
538 self.report_error(compat_str(de), de.format_traceback())
540 except MaxDownloadsReached:
542 except Exception as e:
543 if self.params.get('ignoreerrors', False):
544 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
549 self.report_error('no suitable InfoExtractor for URL %s' % url)
551 def add_default_extra_info(self, ie_result, ie, url):
552 self.add_extra_info(ie_result, {
553 'extractor': ie.IE_NAME,
555 'webpage_url_basename': url_basename(url),
556 'extractor_key': ie.ie_key(),
559 def process_ie_result(self, ie_result, download=True, extra_info={}):
561 Take the result of the ie(may be modified) and resolve all unresolved
562 references (URLs, playlist items).
564 It will also download the videos if 'download'.
565 Returns the resolved ie_result.
568 result_type = ie_result.get('_type', 'video')
570 if self.params.get('extract_flat', False):
571 if result_type in ('url', 'url_transparent'):
574 if result_type == 'video':
575 self.add_extra_info(ie_result, extra_info)
576 return self.process_video_result(ie_result, download=download)
577 elif result_type == 'url':
578 # We have to add extra_info to the results because it may be
579 # contained in a playlist
580 return self.extract_info(ie_result['url'],
582 ie_key=ie_result.get('ie_key'),
583 extra_info=extra_info)
584 elif result_type == 'url_transparent':
585 # Use the information from the embedding page
586 info = self.extract_info(
587 ie_result['url'], ie_key=ie_result.get('ie_key'),
588 extra_info=extra_info, download=False, process=False)
590 def make_result(embedded_info):
591 new_result = ie_result.copy()
592 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
593 'entries', 'ie_key', 'duration',
594 'subtitles', 'annotations', 'format',
595 'thumbnail', 'thumbnails'):
598 if f in embedded_info:
599 new_result[f] = embedded_info[f]
601 new_result = make_result(info)
603 assert new_result.get('_type') != 'url_transparent'
604 if new_result.get('_type') == 'compat_list':
605 new_result['entries'] = [
606 make_result(e) for e in new_result['entries']]
608 return self.process_ie_result(
609 new_result, download=download, extra_info=extra_info)
610 elif result_type == 'playlist':
611 # We process each entry in the playlist
612 playlist = ie_result.get('title', None) or ie_result.get('id', None)
613 self.to_screen('[download] Downloading playlist: %s' % playlist)
615 playlist_results = []
617 playliststart = self.params.get('playliststart', 1) - 1
618 playlistend = self.params.get('playlistend', None)
619 # For backwards compatibility, interpret -1 as whole list
620 if playlistend == -1:
623 if isinstance(ie_result['entries'], list):
624 n_all_entries = len(ie_result['entries'])
625 entries = ie_result['entries'][playliststart:playlistend]
626 n_entries = len(entries)
628 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
629 (ie_result['extractor'], playlist, n_all_entries, n_entries))
631 assert isinstance(ie_result['entries'], PagedList)
632 entries = ie_result['entries'].getslice(
633 playliststart, playlistend)
634 n_entries = len(entries)
636 "[%s] playlist %s: Downloading %d videos" %
637 (ie_result['extractor'], playlist, n_entries))
639 for i, entry in enumerate(entries, 1):
640 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
642 'n_entries': n_entries,
643 'playlist': playlist,
644 'playlist_index': i + playliststart,
645 'extractor': ie_result['extractor'],
646 'webpage_url': ie_result['webpage_url'],
647 'webpage_url_basename': url_basename(ie_result['webpage_url']),
648 'extractor_key': ie_result['extractor_key'],
651 reason = self._match_entry(entry)
652 if reason is not None:
653 self.to_screen('[download] ' + reason)
656 entry_result = self.process_ie_result(entry,
659 playlist_results.append(entry_result)
660 ie_result['entries'] = playlist_results
662 elif result_type == 'compat_list':
664 self.add_extra_info(r,
666 'extractor': ie_result['extractor'],
667 'webpage_url': ie_result['webpage_url'],
668 'webpage_url_basename': url_basename(ie_result['webpage_url']),
669 'extractor_key': ie_result['extractor_key'],
672 ie_result['entries'] = [
673 self.process_ie_result(_fixup(r), download, extra_info)
674 for r in ie_result['entries']
678 raise Exception('Invalid result type: %s' % result_type)
680 def select_format(self, format_spec, available_formats):
681 if format_spec == 'best' or format_spec is None:
682 return available_formats[-1]
683 elif format_spec == 'worst':
684 return available_formats[0]
685 elif format_spec == 'bestaudio':
687 f for f in available_formats
688 if f.get('vcodec') == 'none']
690 return audio_formats[-1]
691 elif format_spec == 'worstaudio':
693 f for f in available_formats
694 if f.get('vcodec') == 'none']
696 return audio_formats[0]
697 elif format_spec == 'bestvideo':
699 f for f in available_formats
700 if f.get('acodec') == 'none']
702 return video_formats[-1]
703 elif format_spec == 'worstvideo':
705 f for f in available_formats
706 if f.get('acodec') == 'none']
708 return video_formats[0]
710 extensions = ['mp4', 'flv', 'webm', '3gp']
711 if format_spec in extensions:
712 filter_f = lambda f: f['ext'] == format_spec
714 filter_f = lambda f: f['format_id'] == format_spec
715 matches = list(filter(filter_f, available_formats))
720 def process_video_result(self, info_dict, download=True):
721 assert info_dict.get('_type', 'video') == 'video'
723 if 'id' not in info_dict:
724 raise ExtractorError('Missing "id" field in extractor result')
725 if 'title' not in info_dict:
726 raise ExtractorError('Missing "title" field in extractor result')
728 if 'playlist' not in info_dict:
729 # It isn't part of a playlist
730 info_dict['playlist'] = None
731 info_dict['playlist_index'] = None
733 thumbnails = info_dict.get('thumbnails')
735 thumbnails.sort(key=lambda t: (
736 t.get('width'), t.get('height'), t.get('url')))
738 if 'width' in t and 'height' in t:
739 t['resolution'] = '%dx%d' % (t['width'], t['height'])
741 if thumbnails and 'thumbnail' not in info_dict:
742 info_dict['thumbnail'] = thumbnails[-1]['url']
744 if 'display_id' not in info_dict and 'id' in info_dict:
745 info_dict['display_id'] = info_dict['id']
747 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
748 upload_date = datetime.datetime.utcfromtimestamp(
749 info_dict['timestamp'])
750 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
752 # This extractors handle format selection themselves
753 if info_dict['extractor'] in ['Youku']:
755 self.process_info(info_dict)
758 # We now pick which formats have to be downloaded
759 if info_dict.get('formats') is None:
760 # There's only one format available
761 formats = [info_dict]
763 formats = info_dict['formats']
766 raise ExtractorError('No video formats found!')
768 # We check that all the formats have the format and format_id fields
769 for i, format in enumerate(formats):
770 if 'url' not in format:
771 raise ExtractorError('Missing "url" key in result (index %d)' % i)
773 if format.get('format_id') is None:
774 format['format_id'] = compat_str(i)
775 if format.get('format') is None:
776 format['format'] = '{id} - {res}{note}'.format(
777 id=format['format_id'],
778 res=self.format_resolution(format),
779 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
781 # Automatically determine file extension if missing
782 if 'ext' not in format:
783 format['ext'] = determine_ext(format['url']).lower()
785 format_limit = self.params.get('format_limit', None)
787 formats = list(takewhile_inclusive(
788 lambda f: f['format_id'] != format_limit, formats
791 # TODO Central sorting goes here
793 if formats[0] is not info_dict:
794 # only set the 'formats' fields if the original info_dict list them
795 # otherwise we end up with a circular reference, the first (and unique)
796 # element in the 'formats' field in info_dict is info_dict itself,
797 # wich can't be exported to json
798 info_dict['formats'] = formats
799 if self.params.get('listformats', None):
800 self.list_formats(info_dict)
803 req_format = self.params.get('format')
804 if req_format is None:
806 formats_to_download = []
807 # The -1 is for supporting YoutubeIE
808 if req_format in ('-1', 'all'):
809 formats_to_download = formats
811 # We can accept formats requested in the format: 34/5/best, we pick
812 # the first that is available, starting from left
813 req_formats = req_format.split('/')
814 for rf in req_formats:
815 if re.match(r'.+?\+.+?', rf) is not None:
816 # Two formats have been requested like '137+139'
817 format_1, format_2 = rf.split('+')
818 formats_info = (self.select_format(format_1, formats),
819 self.select_format(format_2, formats))
820 if all(formats_info):
822 'requested_formats': formats_info,
824 'ext': formats_info[0]['ext'],
827 selected_format = None
829 selected_format = self.select_format(rf, formats)
830 if selected_format is not None:
831 formats_to_download = [selected_format]
833 if not formats_to_download:
834 raise ExtractorError('requested format not available',
838 if len(formats_to_download) > 1:
839 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
840 for format in formats_to_download:
841 new_info = dict(info_dict)
842 new_info.update(format)
843 self.process_info(new_info)
844 # We update the info dict with the best quality format (backwards compatibility)
845 info_dict.update(formats_to_download[-1])
848 def process_info(self, info_dict):
849 """Process a single resolved IE result."""
851 assert info_dict.get('_type', 'video') == 'video'
853 max_downloads = self.params.get('max_downloads')
854 if max_downloads is not None:
855 if self._num_downloads >= int(max_downloads):
856 raise MaxDownloadsReached()
858 info_dict['fulltitle'] = info_dict['title']
859 if len(info_dict['title']) > 200:
860 info_dict['title'] = info_dict['title'][:197] + '...'
862 # Keep for backwards compatibility
863 info_dict['stitle'] = info_dict['title']
865 if 'format' not in info_dict:
866 info_dict['format'] = info_dict['ext']
868 reason = self._match_entry(info_dict)
869 if reason is not None:
870 self.to_screen('[download] ' + reason)
873 self._num_downloads += 1
875 filename = self.prepare_filename(info_dict)
878 if self.params.get('forcetitle', False):
879 self.to_stdout(info_dict['fulltitle'])
880 if self.params.get('forceid', False):
881 self.to_stdout(info_dict['id'])
882 if self.params.get('forceurl', False):
883 # For RTMP URLs, also include the playpath
884 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
885 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
886 self.to_stdout(info_dict['thumbnail'])
887 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
888 self.to_stdout(info_dict['description'])
889 if self.params.get('forcefilename', False) and filename is not None:
890 self.to_stdout(filename)
891 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
892 self.to_stdout(formatSeconds(info_dict['duration']))
893 if self.params.get('forceformat', False):
894 self.to_stdout(info_dict['format'])
895 if self.params.get('forcejson', False):
896 info_dict['_filename'] = filename
897 self.to_stdout(json.dumps(info_dict))
899 # Do nothing else if in simulate mode
900 if self.params.get('simulate', False):
907 dn = os.path.dirname(encodeFilename(filename))
908 if dn and not os.path.exists(dn):
910 except (OSError, IOError) as err:
911 self.report_error('unable to create directory ' + compat_str(err))
914 if self.params.get('writedescription', False):
915 descfn = filename + '.description'
916 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
917 self.to_screen('[info] Video description is already present')
920 self.to_screen('[info] Writing video description to: ' + descfn)
921 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
922 descfile.write(info_dict['description'])
923 except (KeyError, TypeError):
924 self.report_warning('There\'s no description to write.')
925 except (OSError, IOError):
926 self.report_error('Cannot write description file ' + descfn)
929 if self.params.get('writeannotations', False):
930 annofn = filename + '.annotations.xml'
931 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
932 self.to_screen('[info] Video annotations are already present')
935 self.to_screen('[info] Writing video annotations to: ' + annofn)
936 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
937 annofile.write(info_dict['annotations'])
938 except (KeyError, TypeError):
939 self.report_warning('There are no annotations to write.')
940 except (OSError, IOError):
941 self.report_error('Cannot write annotations file: ' + annofn)
944 subtitles_are_requested = any([self.params.get('writesubtitles', False),
945 self.params.get('writeautomaticsub')])
947 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
948 # subtitles download errors are already managed as troubles in relevant IE
949 # that way it will silently go on when used with unsupporting IE
950 subtitles = info_dict['subtitles']
951 sub_format = self.params.get('subtitlesformat', 'srt')
952 for sub_lang in subtitles.keys():
953 sub = subtitles[sub_lang]
957 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
958 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
959 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
961 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
962 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
964 except (OSError, IOError):
965 self.report_error('Cannot write subtitles file ' + sub_filename)
968 if self.params.get('writeinfojson', False):
969 infofn = os.path.splitext(filename)[0] + '.info.json'
970 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
971 self.to_screen('[info] Video description metadata is already present')
973 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
975 write_json_file(info_dict, encodeFilename(infofn))
976 except (OSError, IOError):
977 self.report_error('Cannot write metadata to JSON file ' + infofn)
980 if self.params.get('writethumbnail', False):
981 if info_dict.get('thumbnail') is not None:
982 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
983 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
984 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
985 self.to_screen('[%s] %s: Thumbnail is already present' %
986 (info_dict['extractor'], info_dict['id']))
988 self.to_screen('[%s] %s: Downloading thumbnail ...' %
989 (info_dict['extractor'], info_dict['id']))
991 uf = self.urlopen(info_dict['thumbnail'])
992 with open(thumb_filename, 'wb') as thumbf:
993 shutil.copyfileobj(uf, thumbf)
994 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
995 (info_dict['extractor'], info_dict['id'], thumb_filename))
996 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
997 self.report_warning('Unable to download thumbnail "%s": %s' %
998 (info_dict['thumbnail'], compat_str(err)))
1000 if not self.params.get('skip_download', False):
1001 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1006 fd = get_suitable_downloader(info)(self, self.params)
1007 for ph in self._progress_hooks:
1008 fd.add_progress_hook(ph)
1009 if self.params.get('verbose'):
1010 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1011 return fd.download(name, info)
1012 if info_dict.get('requested_formats') is not None:
1015 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1016 if not merger._get_executable():
1018 self.report_warning('You have requested multiple '
1019 'formats but ffmpeg or avconv are not installed.'
1020 ' The formats won\'t be merged')
1022 postprocessors = [merger]
1023 for f in info_dict['requested_formats']:
1024 new_info = dict(info_dict)
1026 fname = self.prepare_filename(new_info)
1027 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1028 downloaded.append(fname)
1029 partial_success = dl(fname, new_info)
1030 success = success and partial_success
1031 info_dict['__postprocessors'] = postprocessors
1032 info_dict['__files_to_merge'] = downloaded
1034 # Just a single file
1035 success = dl(filename, info_dict)
1036 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1037 self.report_error('unable to download video data: %s' % str(err))
1039 except (OSError, IOError) as err:
1040 raise UnavailableVideoError(err)
1041 except (ContentTooShortError, ) as err:
1042 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1047 self.post_process(filename, info_dict)
1048 except (PostProcessingError) as err:
1049 self.report_error('postprocessing: %s' % str(err))
1052 self.record_download_archive(info_dict)
1054 def download(self, url_list):
1055 """Download a given list of URLs."""
1056 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1057 if (len(url_list) > 1 and
1059 and self.params.get('max_downloads') != 1):
1060 raise SameFileError(outtmpl)
1062 for url in url_list:
1064 #It also downloads the videos
1065 self.extract_info(url)
1066 except UnavailableVideoError:
1067 self.report_error('unable to download video')
1068 except MaxDownloadsReached:
1069 self.to_screen('[info] Maximum number of downloaded files reached.')
1072 return self._download_retcode
1074 def download_with_info_file(self, info_filename):
1075 with io.open(info_filename, 'r', encoding='utf-8') as f:
1078 self.process_ie_result(info, download=True)
1079 except DownloadError:
1080 webpage_url = info.get('webpage_url')
1081 if webpage_url is not None:
1082 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1083 return self.download([webpage_url])
1086 return self._download_retcode
1088 def post_process(self, filename, ie_info):
1089 """Run all the postprocessors on the given file."""
1090 info = dict(ie_info)
1091 info['filepath'] = filename
1094 if ie_info.get('__postprocessors') is not None:
1095 pps_chain.extend(ie_info['__postprocessors'])
1096 pps_chain.extend(self._pps)
1097 for pp in pps_chain:
1099 keep_video_wish, new_info = pp.run(info)
1100 if keep_video_wish is not None:
1102 keep_video = keep_video_wish
1103 elif keep_video is None:
1104 # No clear decision yet, let IE decide
1105 keep_video = keep_video_wish
1106 except PostProcessingError as e:
1107 self.report_error(e.msg)
1108 if keep_video is False and not self.params.get('keepvideo', False):
1110 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1111 os.remove(encodeFilename(filename))
1112 except (IOError, OSError):
1113 self.report_warning('Unable to remove downloaded video file')
1115 def _make_archive_id(self, info_dict):
1116 # Future-proof against any change in case
1117 # and backwards compatibility with prior versions
1118 extractor = info_dict.get('extractor_key')
1119 if extractor is None:
1120 if 'id' in info_dict:
1121 extractor = info_dict.get('ie_key') # key in a playlist
1122 if extractor is None:
1123 return None # Incomplete video information
1124 return extractor.lower() + ' ' + info_dict['id']
1126 def in_download_archive(self, info_dict):
1127 fn = self.params.get('download_archive')
1131 vid_id = self._make_archive_id(info_dict)
1133 return False # Incomplete video information
1136 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1137 for line in archive_file:
1138 if line.strip() == vid_id:
1140 except IOError as ioe:
1141 if ioe.errno != errno.ENOENT:
1145 def record_download_archive(self, info_dict):
1146 fn = self.params.get('download_archive')
1149 vid_id = self._make_archive_id(info_dict)
1151 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1152 archive_file.write(vid_id + '\n')
1155 def format_resolution(format, default='unknown'):
1156 if format.get('vcodec') == 'none':
1158 if format.get('resolution') is not None:
1159 return format['resolution']
1160 if format.get('height') is not None:
1161 if format.get('width') is not None:
1162 res = '%sx%s' % (format['width'], format['height'])
1164 res = '%sp' % format['height']
1165 elif format.get('width') is not None:
1166 res = '?x%d' % format['width']
1171 def _format_note(self, fdict):
1173 if fdict.get('ext') in ['f4f', 'f4m']:
1174 res += '(unsupported) '
1175 if fdict.get('format_note') is not None:
1176 res += fdict['format_note'] + ' '
1177 if fdict.get('tbr') is not None:
1178 res += '%4dk ' % fdict['tbr']
1179 if fdict.get('container') is not None:
1182 res += '%s container' % fdict['container']
1183 if (fdict.get('vcodec') is not None and
1184 fdict.get('vcodec') != 'none'):
1187 res += fdict['vcodec']
1188 if fdict.get('vbr') is not None:
1190 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1192 if fdict.get('vbr') is not None:
1193 res += '%4dk' % fdict['vbr']
1194 if fdict.get('acodec') is not None:
1197 if fdict['acodec'] == 'none':
1200 res += '%-5s' % fdict['acodec']
1201 elif fdict.get('abr') is not None:
1205 if fdict.get('abr') is not None:
1206 res += '@%3dk' % fdict['abr']
1207 if fdict.get('asr') is not None:
1208 res += ' (%5dHz)' % fdict['asr']
1209 if fdict.get('filesize') is not None:
1212 res += format_bytes(fdict['filesize'])
1213 elif fdict.get('filesize_approx') is not None:
1216 res += '~' + format_bytes(fdict['filesize_approx'])
1219 def list_formats(self, info_dict):
1220 def line(format, idlen=20):
1221 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1222 format['format_id'],
1224 self.format_resolution(format),
1225 self._format_note(format),
1228 formats = info_dict.get('formats', [info_dict])
1229 idlen = max(len('format code'),
1230 max(len(f['format_id']) for f in formats))
1231 formats_s = [line(f, idlen) for f in formats]
1232 if len(formats) > 1:
1233 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1234 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1236 header_line = line({
1237 'format_id': 'format code', 'ext': 'extension',
1238 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1239 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1240 (info_dict['id'], header_line, '\n'.join(formats_s)))
1242 def urlopen(self, req):
1243 """ Start an HTTP download """
1244 return self._opener.open(req, timeout=self._socket_timeout)
1246 def print_debug_header(self):
1247 if not self.params.get('verbose'):
1250 if type('') is not compat_str:
1251 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1252 self.report_warning(
1253 'Your Python is broken! Update to a newer and supported version')
1256 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1257 locale.getpreferredencoding(),
1258 sys.getfilesystemencoding(),
1259 sys.stdout.encoding,
1260 self.get_encoding()))
1261 write_string(encoding_str, encoding=None)
1263 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1265 sp = subprocess.Popen(
1266 ['git', 'rev-parse', '--short', 'HEAD'],
1267 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1268 cwd=os.path.dirname(os.path.abspath(__file__)))
1269 out, err = sp.communicate()
1270 out = out.decode().strip()
1271 if re.match('[0-9a-f]+', out):
1272 self._write_string('[debug] Git HEAD: ' + out + '\n')
1278 self._write_string('[debug] Python version %s - %s' %
1279 (platform.python_version(), platform_name()) + '\n')
1282 for handler in self._opener.handlers:
1283 if hasattr(handler, 'proxies'):
1284 proxy_map.update(handler.proxies)
1285 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1287 def _setup_opener(self):
1288 timeout_val = self.params.get('socket_timeout')
1289 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1291 opts_cookiefile = self.params.get('cookiefile')
1292 opts_proxy = self.params.get('proxy')
1294 if opts_cookiefile is None:
1295 self.cookiejar = compat_cookiejar.CookieJar()
1297 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1299 if os.access(opts_cookiefile, os.R_OK):
1300 self.cookiejar.load()
1302 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1304 if opts_proxy is not None:
1305 if opts_proxy == '':
1308 proxies = {'http': opts_proxy, 'https': opts_proxy}
1310 proxies = compat_urllib_request.getproxies()
1311 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1312 if 'http' in proxies and 'https' not in proxies:
1313 proxies['https'] = proxies['http']
1314 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1316 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1317 https_handler = make_HTTPS_handler(
1318 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1319 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1320 opener = compat_urllib_request.build_opener(
1321 https_handler, proxy_handler, cookie_processor, ydlh)
1322 # Delete the default user-agent header, which would otherwise apply in
1323 # cases where our custom HTTP handler doesn't come into play
1324 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1325 opener.addheaders = []
1326 self._opener = opener
1328 def encode(self, s):
1329 if isinstance(s, bytes):
1330 return s # Already encoded
1333 return s.encode(self.get_encoding())
1334 except UnicodeEncodeError as err:
1335 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1338 def get_encoding(self):
1339 encoding = self.params.get('encoding')
1340 if encoding is None:
1341 encoding = preferredencoding()