2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
52 UnavailableVideoError,
59 from .extractor import get_info_extractor, gen_extractors
60 from .downloader import get_suitable_downloader
61 from .postprocessor import FFmpegMergerPP
62 from .version import __version__
65 class YoutubeDL(object):
68 YoutubeDL objects are the ones responsible of downloading the
69 actual video file and writing it to disk if the user has requested
70 it, among some other tasks. In most cases there should be one per
71 program. As, given a video URL, the downloader doesn't know how to
72 extract all the needed information, task that InfoExtractors do, it
73 has to pass the URL to one of them.
75 For this, YoutubeDL objects have a method that allows
76 InfoExtractors to be registered in a given order. When it is passed
77 a URL, the YoutubeDL object handles it to the first InfoExtractor it
78 finds that reports being able to handle it. The InfoExtractor extracts
79 all the information about the video or videos the URL refers to, and
80 YoutubeDL process the extracted information, possibly using a File
81 Downloader to download the video.
83 YoutubeDL objects accept a lot of parameters. In order not to saturate
84 the object constructor with arguments, it receives a dictionary of
85 options instead. These options are available through the params
86 attribute for the InfoExtractors to use. The YoutubeDL also
87 registers itself as the downloader in charge for the InfoExtractors
88 that are added to it, so this is a "mutual registration".
92 username: Username for authentication purposes.
93 password: Password for authentication purposes.
94 videopassword: Password for acces a video.
95 usenetrc: Use netrc for authentication instead.
96 verbose: Print additional info to stdout.
97 quiet: Do not print messages to stdout.
98 no_warnings: Do not print out anything for warnings.
99 forceurl: Force printing final URL.
100 forcetitle: Force printing title.
101 forceid: Force printing ID.
102 forcethumbnail: Force printing thumbnail URL.
103 forcedescription: Force printing description.
104 forcefilename: Force printing final filename.
105 forceduration: Force printing duration.
106 forcejson: Force printing info_dict as JSON.
107 simulate: Do not download the video files.
108 format: Video format code.
109 format_limit: Highest quality format to try.
110 outtmpl: Template for output names.
111 restrictfilenames: Do not allow "&" and spaces in file names
112 ignoreerrors: Do not stop on download errors.
113 nooverwrites: Prevent overwriting files.
114 playliststart: Playlist item to start at.
115 playlistend: Playlist item to end at.
116 matchtitle: Download only matching titles.
117 rejecttitle: Reject downloads for matching titles.
118 logger: Log messages to a logging.Logger instance.
119 logtostderr: Log messages to stderr instead of stdout.
120 writedescription: Write the video description to a .description file
121 writeinfojson: Write the video description to a .info.json file
122 writeannotations: Write the video annotations to a .annotations.xml file
123 writethumbnail: Write the thumbnail image to a file
124 writesubtitles: Write the video subtitles to a file
125 writeautomaticsub: Write the automatic subtitles to a file
126 allsubtitles: Downloads all the subtitles of the video
127 (requires writesubtitles or writeautomaticsub)
128 listsubtitles: Lists all available subtitles for the video
129 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
130 subtitleslangs: List of languages of the subtitles to download
131 keepvideo: Keep the video file after post-processing
132 daterange: A DateRange object, download only if the upload_date is in the range.
133 skip_download: Skip the actual download of the video file
134 cachedir: Location of the cache files in the filesystem.
135 None to disable filesystem cache.
136 noplaylist: Download single video instead of a playlist if in doubt.
137 age_limit: An integer representing the user's age in years.
138 Unsuitable videos for the given age are skipped.
139 min_views: An integer representing the minimum view count the video
140 must have in order to not be skipped.
141 Videos without view count information are always
142 downloaded. None for no limit.
143 max_views: An integer representing the maximum view count.
144 Videos that are more popular than that are not
146 Videos without view count information are always
147 downloaded. None for no limit.
148 download_archive: File name of a file where all downloads are recorded.
149 Videos already present in the file are not downloaded
151 cookiefile: File name where cookies should be read from and dumped to.
152 nocheckcertificate:Do not verify SSL certificates
153 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
154 At the moment, this is only supported by YouTube.
155 proxy: URL of the proxy server to use
156 socket_timeout: Time to wait for unresponsive hosts, in seconds
157 bidi_workaround: Work around buggy terminals without bidirectional text
158 support, using fridibi
159 debug_printtraffic:Print out sent and received HTTP traffic
160 include_ads: Download ads as well
161 default_search: Prepend this string if an input url is not valid.
162 'auto' for elaborate guessing
163 encoding: Use this encoding instead of the system-specified.
165 The following parameters are not used by YoutubeDL itself, they are used by
167 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
168 noresizebuffer, retries, continuedl, noprogress, consoletitle
170 The following options are used by the post processors:
171 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
172 otherwise prefer avconv.
178 _download_retcode = None
179 _num_downloads = None
182 def __init__(self, params=None):
183 """Create a FileDownloader object with the given options."""
187 self._ies_instances = {}
189 self._progress_hooks = []
190 self._download_retcode = 0
191 self._num_downloads = 0
192 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
193 self._err_file = sys.stderr
196 if params.get('bidi_workaround', False):
199 master, slave = pty.openpty()
200 width = get_term_width()
204 width_args = ['-w', str(width)]
206 stdin=subprocess.PIPE,
208 stderr=self._err_file)
210 self._output_process = subprocess.Popen(
211 ['bidiv'] + width_args, **sp_kwargs
214 self._output_process = subprocess.Popen(
215 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
216 self._output_channel = os.fdopen(master, 'rb')
217 except OSError as ose:
219 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
223 if (sys.version_info >= (3,) and sys.platform != 'win32' and
224 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
225 and not params['restrictfilenames']):
226 # On Python 3, the Unicode filesystem API will throw errors (#1474)
228 'Assuming --restrict-filenames since file system encoding '
229 'cannot encode all charactes. '
230 'Set the LC_ALL environment variable to fix this.')
231 self.params['restrictfilenames'] = True
233 if '%(stitle)s' in self.params.get('outtmpl', ''):
234 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
238 def add_info_extractor(self, ie):
239 """Add an InfoExtractor object to the end of the list."""
241 self._ies_instances[ie.ie_key()] = ie
242 ie.set_downloader(self)
244 def get_info_extractor(self, ie_key):
246 Get an instance of an IE with name ie_key, it will try to get one from
247 the _ies list, if there's no instance it will create a new one and add
248 it to the extractor list.
250 ie = self._ies_instances.get(ie_key)
252 ie = get_info_extractor(ie_key)()
253 self.add_info_extractor(ie)
256 def add_default_info_extractors(self):
258 Add the InfoExtractors returned by gen_extractors to the end of the list
260 for ie in gen_extractors():
261 self.add_info_extractor(ie)
263 def add_post_processor(self, pp):
264 """Add a PostProcessor object to the end of the chain."""
266 pp.set_downloader(self)
268 def add_progress_hook(self, ph):
269 """Add the progress hook (currently only for the file downloader)"""
270 self._progress_hooks.append(ph)
272 def _bidi_workaround(self, message):
273 if not hasattr(self, '_output_channel'):
276 assert hasattr(self, '_output_process')
277 assert type(message) == type('')
278 line_count = message.count('\n') + 1
279 self._output_process.stdin.write((message + '\n').encode('utf-8'))
280 self._output_process.stdin.flush()
281 res = ''.join(self._output_channel.readline().decode('utf-8')
282 for _ in range(line_count))
283 return res[:-len('\n')]
285 def to_screen(self, message, skip_eol=False):
286 """Print message to stdout if not in quiet mode."""
287 return self.to_stdout(message, skip_eol, check_quiet=True)
289 def to_stdout(self, message, skip_eol=False, check_quiet=False):
290 """Print message to stdout if not in quiet mode."""
291 if self.params.get('logger'):
292 self.params['logger'].debug(message)
293 elif not check_quiet or not self.params.get('quiet', False):
294 message = self._bidi_workaround(message)
295 terminator = ['\n', ''][skip_eol]
296 output = message + terminator
298 write_string(output, self._screen_file)
300 def to_stderr(self, message):
301 """Print message to stderr."""
302 assert type(message) == type('')
303 if self.params.get('logger'):
304 self.params['logger'].error(message)
306 message = self._bidi_workaround(message)
307 output = message + '\n'
308 write_string(output, self._err_file)
310 def to_console_title(self, message):
311 if not self.params.get('consoletitle', False):
313 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
314 # c_wchar_p() might not be necessary if `message` is
315 # already of type unicode()
316 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
317 elif 'TERM' in os.environ:
318 write_string('\033]0;%s\007' % message, self._screen_file)
320 def save_console_title(self):
321 if not self.params.get('consoletitle', False):
323 if 'TERM' in os.environ:
324 # Save the title on stack
325 write_string('\033[22;0t', self._screen_file)
327 def restore_console_title(self):
328 if not self.params.get('consoletitle', False):
330 if 'TERM' in os.environ:
331 # Restore the title from stack
332 write_string('\033[23;0t', self._screen_file)
335 self.save_console_title()
338 def __exit__(self, *args):
339 self.restore_console_title()
341 if self.params.get('cookiefile') is not None:
342 self.cookiejar.save()
344 def trouble(self, message=None, tb=None):
345 """Determine action to take when a download problem appears.
347 Depending on if the downloader has been configured to ignore
348 download errors or not, this method may throw an exception or
349 not when errors are found, after printing the message.
351 tb, if given, is additional traceback information.
353 if message is not None:
354 self.to_stderr(message)
355 if self.params.get('verbose'):
357 if sys.exc_info()[0]: # if .trouble has been called from an except block
359 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
360 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
361 tb += compat_str(traceback.format_exc())
363 tb_data = traceback.format_list(traceback.extract_stack())
364 tb = ''.join(tb_data)
366 if not self.params.get('ignoreerrors', False):
367 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
368 exc_info = sys.exc_info()[1].exc_info
370 exc_info = sys.exc_info()
371 raise DownloadError(message, exc_info)
372 self._download_retcode = 1
374 def report_warning(self, message):
376 Print the message to stderr, it will be prefixed with 'WARNING:'
377 If stderr is a tty file the 'WARNING:' will be colored
379 if self.params.get('logger') is not None:
380 self.params['logger'].warning(message)
382 if self.params.get('no_warnings'):
384 if self._err_file.isatty() and os.name != 'nt':
385 _msg_header = '\033[0;33mWARNING:\033[0m'
387 _msg_header = 'WARNING:'
388 warning_message = '%s %s' % (_msg_header, message)
389 self.to_stderr(warning_message)
391 def report_error(self, message, tb=None):
393 Do the same as trouble, but prefixes the message with 'ERROR:', colored
394 in red if stderr is a tty file.
396 if self._err_file.isatty() and os.name != 'nt':
397 _msg_header = '\033[0;31mERROR:\033[0m'
399 _msg_header = 'ERROR:'
400 error_message = '%s %s' % (_msg_header, message)
401 self.trouble(error_message, tb)
403 def report_file_already_downloaded(self, file_name):
404 """Report file has already been fully downloaded."""
406 self.to_screen('[download] %s has already been downloaded' % file_name)
407 except UnicodeEncodeError:
408 self.to_screen('[download] The file has already been downloaded')
410 def prepare_filename(self, info_dict):
411 """Generate the output filename."""
413 template_dict = dict(info_dict)
415 template_dict['epoch'] = int(time.time())
416 autonumber_size = self.params.get('autonumber_size')
417 if autonumber_size is None:
419 autonumber_templ = '%0' + str(autonumber_size) + 'd'
420 template_dict['autonumber'] = autonumber_templ % self._num_downloads
421 if template_dict.get('playlist_index') is not None:
422 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
423 if template_dict.get('resolution') is None:
424 if template_dict.get('width') and template_dict.get('height'):
425 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
426 elif template_dict.get('height'):
427 template_dict['resolution'] = '%sp' % template_dict['height']
428 elif template_dict.get('width'):
429 template_dict['resolution'] = '?x%d' % template_dict['width']
431 sanitize = lambda k, v: sanitize_filename(
433 restricted=self.params.get('restrictfilenames'),
435 template_dict = dict((k, sanitize(k, v))
436 for k, v in template_dict.items()
438 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
440 tmpl = os.path.expanduser(self.params['outtmpl'])
441 filename = tmpl % template_dict
443 except ValueError as err:
444 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
447 def _match_entry(self, info_dict):
448 """ Returns None iff the file should be downloaded """
450 video_title = info_dict.get('title', info_dict.get('id', 'video'))
451 if 'title' in info_dict:
452 # This can happen when we're just evaluating the playlist
453 title = info_dict['title']
454 matchtitle = self.params.get('matchtitle', False)
456 if not re.search(matchtitle, title, re.IGNORECASE):
457 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
458 rejecttitle = self.params.get('rejecttitle', False)
460 if re.search(rejecttitle, title, re.IGNORECASE):
461 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
462 date = info_dict.get('upload_date', None)
464 dateRange = self.params.get('daterange', DateRange())
465 if date not in dateRange:
466 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
467 view_count = info_dict.get('view_count', None)
468 if view_count is not None:
469 min_views = self.params.get('min_views')
470 if min_views is not None and view_count < min_views:
471 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
472 max_views = self.params.get('max_views')
473 if max_views is not None and view_count > max_views:
474 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
475 age_limit = self.params.get('age_limit')
476 if age_limit is not None:
477 if age_limit < info_dict.get('age_limit', 0):
478 return 'Skipping "' + title + '" because it is age restricted'
479 if self.in_download_archive(info_dict):
480 return '%s has already been recorded in archive' % video_title
484 def add_extra_info(info_dict, extra_info):
485 '''Set the keys from extra_info in info dict if they are missing'''
486 for key, value in extra_info.items():
487 info_dict.setdefault(key, value)
489 def extract_info(self, url, download=True, ie_key=None, extra_info={},
492 Returns a list with a dictionary for each video we find.
493 If 'download', also downloads the videos.
494 extra_info is a dict containing the extra values to add to each result
498 ies = [self.get_info_extractor(ie_key)]
503 if not ie.suitable(url):
507 self.report_warning('The program functionality for this site has been marked as broken, '
508 'and will probably not work.')
511 ie_result = ie.extract(url)
512 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
514 if isinstance(ie_result, list):
515 # Backwards compatibility: old IE result format
517 '_type': 'compat_list',
518 'entries': ie_result,
520 self.add_default_extra_info(ie_result, ie, url)
522 return self.process_ie_result(ie_result, download, extra_info)
525 except ExtractorError as de: # An error we somewhat expected
526 self.report_error(compat_str(de), de.format_traceback())
528 except MaxDownloadsReached:
530 except Exception as e:
531 if self.params.get('ignoreerrors', False):
532 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
537 self.report_error('no suitable InfoExtractor for URL %s' % url)
539 def add_default_extra_info(self, ie_result, ie, url):
540 self.add_extra_info(ie_result, {
541 'extractor': ie.IE_NAME,
543 'webpage_url_basename': url_basename(url),
544 'extractor_key': ie.ie_key(),
547 def process_ie_result(self, ie_result, download=True, extra_info={}):
549 Take the result of the ie(may be modified) and resolve all unresolved
550 references (URLs, playlist items).
552 It will also download the videos if 'download'.
553 Returns the resolved ie_result.
556 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
557 if result_type == 'video':
558 self.add_extra_info(ie_result, extra_info)
559 return self.process_video_result(ie_result, download=download)
560 elif result_type == 'url':
561 # We have to add extra_info to the results because it may be
562 # contained in a playlist
563 return self.extract_info(ie_result['url'],
565 ie_key=ie_result.get('ie_key'),
566 extra_info=extra_info)
567 elif result_type == 'url_transparent':
568 # Use the information from the embedding page
569 info = self.extract_info(
570 ie_result['url'], ie_key=ie_result.get('ie_key'),
571 extra_info=extra_info, download=False, process=False)
573 def make_result(embedded_info):
574 new_result = ie_result.copy()
575 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
576 'entries', 'ie_key', 'duration',
577 'subtitles', 'annotations', 'format',
578 'thumbnail', 'thumbnails'):
581 if f in embedded_info:
582 new_result[f] = embedded_info[f]
584 new_result = make_result(info)
586 assert new_result.get('_type') != 'url_transparent'
587 if new_result.get('_type') == 'compat_list':
588 new_result['entries'] = [
589 make_result(e) for e in new_result['entries']]
591 return self.process_ie_result(
592 new_result, download=download, extra_info=extra_info)
593 elif result_type == 'playlist':
594 # We process each entry in the playlist
595 playlist = ie_result.get('title', None) or ie_result.get('id', None)
596 self.to_screen('[download] Downloading playlist: %s' % playlist)
598 playlist_results = []
600 playliststart = self.params.get('playliststart', 1) - 1
601 playlistend = self.params.get('playlistend', None)
602 # For backwards compatibility, interpret -1 as whole list
603 if playlistend == -1:
606 if isinstance(ie_result['entries'], list):
607 n_all_entries = len(ie_result['entries'])
608 entries = ie_result['entries'][playliststart:playlistend]
609 n_entries = len(entries)
611 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
612 (ie_result['extractor'], playlist, n_all_entries, n_entries))
614 assert isinstance(ie_result['entries'], PagedList)
615 entries = ie_result['entries'].getslice(
616 playliststart, playlistend)
617 n_entries = len(entries)
619 "[%s] playlist %s: Downloading %d videos" %
620 (ie_result['extractor'], playlist, n_entries))
622 for i, entry in enumerate(entries, 1):
623 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
625 'playlist': playlist,
626 'playlist_index': i + playliststart,
627 'extractor': ie_result['extractor'],
628 'webpage_url': ie_result['webpage_url'],
629 'webpage_url_basename': url_basename(ie_result['webpage_url']),
630 'extractor_key': ie_result['extractor_key'],
633 reason = self._match_entry(entry)
634 if reason is not None:
635 self.to_screen('[download] ' + reason)
638 entry_result = self.process_ie_result(entry,
641 playlist_results.append(entry_result)
642 ie_result['entries'] = playlist_results
644 elif result_type == 'compat_list':
646 self.add_extra_info(r,
648 'extractor': ie_result['extractor'],
649 'webpage_url': ie_result['webpage_url'],
650 'webpage_url_basename': url_basename(ie_result['webpage_url']),
651 'extractor_key': ie_result['extractor_key'],
654 ie_result['entries'] = [
655 self.process_ie_result(_fixup(r), download, extra_info)
656 for r in ie_result['entries']
660 raise Exception('Invalid result type: %s' % result_type)
662 def select_format(self, format_spec, available_formats):
663 if format_spec == 'best' or format_spec is None:
664 return available_formats[-1]
665 elif format_spec == 'worst':
666 return available_formats[0]
667 elif format_spec == 'bestaudio':
669 f for f in available_formats
670 if f.get('vcodec') == 'none']
672 return audio_formats[-1]
673 elif format_spec == 'worstaudio':
675 f for f in available_formats
676 if f.get('vcodec') == 'none']
678 return audio_formats[0]
679 elif format_spec == 'bestvideo':
681 f for f in available_formats
682 if f.get('acodec') == 'none']
684 return video_formats[-1]
685 elif format_spec == 'worstvideo':
687 f for f in available_formats
688 if f.get('acodec') == 'none']
690 return video_formats[0]
692 extensions = ['mp4', 'flv', 'webm', '3gp']
693 if format_spec in extensions:
694 filter_f = lambda f: f['ext'] == format_spec
696 filter_f = lambda f: f['format_id'] == format_spec
697 matches = list(filter(filter_f, available_formats))
702 def process_video_result(self, info_dict, download=True):
703 assert info_dict.get('_type', 'video') == 'video'
705 if 'id' not in info_dict:
706 raise ExtractorError('Missing "id" field in extractor result')
707 if 'title' not in info_dict:
708 raise ExtractorError('Missing "title" field in extractor result')
710 if 'playlist' not in info_dict:
711 # It isn't part of a playlist
712 info_dict['playlist'] = None
713 info_dict['playlist_index'] = None
715 if 'display_id' not in info_dict and 'id' in info_dict:
716 info_dict['display_id'] = info_dict['id']
718 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
719 upload_date = datetime.datetime.utcfromtimestamp(
720 info_dict['timestamp'])
721 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
723 # This extractors handle format selection themselves
724 if info_dict['extractor'] in ['Youku']:
726 self.process_info(info_dict)
729 # We now pick which formats have to be downloaded
730 if info_dict.get('formats') is None:
731 # There's only one format available
732 formats = [info_dict]
734 formats = info_dict['formats']
737 raise ExtractorError('No video formats found!')
739 # We check that all the formats have the format and format_id fields
740 for i, format in enumerate(formats):
741 if 'url' not in format:
742 raise ExtractorError('Missing "url" key in result (index %d)' % i)
744 if format.get('format_id') is None:
745 format['format_id'] = compat_str(i)
746 if format.get('format') is None:
747 format['format'] = '{id} - {res}{note}'.format(
748 id=format['format_id'],
749 res=self.format_resolution(format),
750 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
752 # Automatically determine file extension if missing
753 if 'ext' not in format:
754 format['ext'] = determine_ext(format['url']).lower()
756 format_limit = self.params.get('format_limit', None)
758 formats = list(takewhile_inclusive(
759 lambda f: f['format_id'] != format_limit, formats
762 # TODO Central sorting goes here
764 if formats[0] is not info_dict:
765 # only set the 'formats' fields if the original info_dict list them
766 # otherwise we end up with a circular reference, the first (and unique)
767 # element in the 'formats' field in info_dict is info_dict itself,
768 # wich can't be exported to json
769 info_dict['formats'] = formats
770 if self.params.get('listformats', None):
771 self.list_formats(info_dict)
774 req_format = self.params.get('format')
775 if req_format is None:
777 formats_to_download = []
778 # The -1 is for supporting YoutubeIE
779 if req_format in ('-1', 'all'):
780 formats_to_download = formats
782 # We can accept formats requested in the format: 34/5/best, we pick
783 # the first that is available, starting from left
784 req_formats = req_format.split('/')
785 for rf in req_formats:
786 if re.match(r'.+?\+.+?', rf) is not None:
787 # Two formats have been requested like '137+139'
788 format_1, format_2 = rf.split('+')
789 formats_info = (self.select_format(format_1, formats),
790 self.select_format(format_2, formats))
791 if all(formats_info):
793 'requested_formats': formats_info,
795 'ext': formats_info[0]['ext'],
798 selected_format = None
800 selected_format = self.select_format(rf, formats)
801 if selected_format is not None:
802 formats_to_download = [selected_format]
804 if not formats_to_download:
805 raise ExtractorError('requested format not available',
809 if len(formats_to_download) > 1:
810 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
811 for format in formats_to_download:
812 new_info = dict(info_dict)
813 new_info.update(format)
814 self.process_info(new_info)
815 # We update the info dict with the best quality format (backwards compatibility)
816 info_dict.update(formats_to_download[-1])
819 def process_info(self, info_dict):
820 """Process a single resolved IE result."""
822 assert info_dict.get('_type', 'video') == 'video'
824 max_downloads = self.params.get('max_downloads')
825 if max_downloads is not None:
826 if self._num_downloads >= int(max_downloads):
827 raise MaxDownloadsReached()
829 info_dict['fulltitle'] = info_dict['title']
830 if len(info_dict['title']) > 200:
831 info_dict['title'] = info_dict['title'][:197] + '...'
833 # Keep for backwards compatibility
834 info_dict['stitle'] = info_dict['title']
836 if not 'format' in info_dict:
837 info_dict['format'] = info_dict['ext']
839 reason = self._match_entry(info_dict)
840 if reason is not None:
841 self.to_screen('[download] ' + reason)
844 self._num_downloads += 1
846 filename = self.prepare_filename(info_dict)
849 if self.params.get('forcetitle', False):
850 self.to_stdout(info_dict['fulltitle'])
851 if self.params.get('forceid', False):
852 self.to_stdout(info_dict['id'])
853 if self.params.get('forceurl', False):
854 # For RTMP URLs, also include the playpath
855 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
856 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
857 self.to_stdout(info_dict['thumbnail'])
858 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
859 self.to_stdout(info_dict['description'])
860 if self.params.get('forcefilename', False) and filename is not None:
861 self.to_stdout(filename)
862 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
863 self.to_stdout(formatSeconds(info_dict['duration']))
864 if self.params.get('forceformat', False):
865 self.to_stdout(info_dict['format'])
866 if self.params.get('forcejson', False):
867 info_dict['_filename'] = filename
868 self.to_stdout(json.dumps(info_dict))
870 # Do nothing else if in simulate mode
871 if self.params.get('simulate', False):
878 dn = os.path.dirname(encodeFilename(filename))
879 if dn and not os.path.exists(dn):
881 except (OSError, IOError) as err:
882 self.report_error('unable to create directory ' + compat_str(err))
885 if self.params.get('writedescription', False):
886 descfn = filename + '.description'
887 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
888 self.to_screen('[info] Video description is already present')
891 self.to_screen('[info] Writing video description to: ' + descfn)
892 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
893 descfile.write(info_dict['description'])
894 except (KeyError, TypeError):
895 self.report_warning('There\'s no description to write.')
896 except (OSError, IOError):
897 self.report_error('Cannot write description file ' + descfn)
900 if self.params.get('writeannotations', False):
901 annofn = filename + '.annotations.xml'
902 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
903 self.to_screen('[info] Video annotations are already present')
906 self.to_screen('[info] Writing video annotations to: ' + annofn)
907 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
908 annofile.write(info_dict['annotations'])
909 except (KeyError, TypeError):
910 self.report_warning('There are no annotations to write.')
911 except (OSError, IOError):
912 self.report_error('Cannot write annotations file: ' + annofn)
915 subtitles_are_requested = any([self.params.get('writesubtitles', False),
916 self.params.get('writeautomaticsub')])
918 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
919 # subtitles download errors are already managed as troubles in relevant IE
920 # that way it will silently go on when used with unsupporting IE
921 subtitles = info_dict['subtitles']
922 sub_format = self.params.get('subtitlesformat', 'srt')
923 for sub_lang in subtitles.keys():
924 sub = subtitles[sub_lang]
928 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
929 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
930 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
932 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
933 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
935 except (OSError, IOError):
936 self.report_error('Cannot write subtitles file ' + descfn)
939 if self.params.get('writeinfojson', False):
940 infofn = os.path.splitext(filename)[0] + '.info.json'
941 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
942 self.to_screen('[info] Video description metadata is already present')
944 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
946 write_json_file(info_dict, encodeFilename(infofn))
947 except (OSError, IOError):
948 self.report_error('Cannot write metadata to JSON file ' + infofn)
951 if self.params.get('writethumbnail', False):
952 if info_dict.get('thumbnail') is not None:
953 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
954 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
955 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
956 self.to_screen('[%s] %s: Thumbnail is already present' %
957 (info_dict['extractor'], info_dict['id']))
959 self.to_screen('[%s] %s: Downloading thumbnail ...' %
960 (info_dict['extractor'], info_dict['id']))
962 uf = self.urlopen(info_dict['thumbnail'])
963 with open(thumb_filename, 'wb') as thumbf:
964 shutil.copyfileobj(uf, thumbf)
965 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
966 (info_dict['extractor'], info_dict['id'], thumb_filename))
967 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
968 self.report_warning('Unable to download thumbnail "%s": %s' %
969 (info_dict['thumbnail'], compat_str(err)))
971 if not self.params.get('skip_download', False):
972 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
977 fd = get_suitable_downloader(info)(self, self.params)
978 for ph in self._progress_hooks:
979 fd.add_progress_hook(ph)
980 return fd.download(name, info)
981 if info_dict.get('requested_formats') is not None:
984 merger = FFmpegMergerPP(self)
985 if not merger._get_executable():
987 self.report_warning('You have requested multiple '
988 'formats but ffmpeg or avconv are not installed.'
989 ' The formats won\'t be merged')
991 postprocessors = [merger]
992 for f in info_dict['requested_formats']:
993 new_info = dict(info_dict)
995 fname = self.prepare_filename(new_info)
996 fname = prepend_extension(fname, 'f%s' % f['format_id'])
997 downloaded.append(fname)
998 partial_success = dl(fname, new_info)
999 success = success and partial_success
1000 info_dict['__postprocessors'] = postprocessors
1001 info_dict['__files_to_merge'] = downloaded
1003 # Just a single file
1004 success = dl(filename, info_dict)
1005 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1006 self.report_error('unable to download video data: %s' % str(err))
1008 except (OSError, IOError) as err:
1009 raise UnavailableVideoError(err)
1010 except (ContentTooShortError, ) as err:
1011 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1016 self.post_process(filename, info_dict)
1017 except (PostProcessingError) as err:
1018 self.report_error('postprocessing: %s' % str(err))
1021 self.record_download_archive(info_dict)
1023 def download(self, url_list):
1024 """Download a given list of URLs."""
1025 if (len(url_list) > 1 and
1026 '%' not in self.params['outtmpl']
1027 and self.params.get('max_downloads') != 1):
1028 raise SameFileError(self.params['outtmpl'])
1030 for url in url_list:
1032 #It also downloads the videos
1033 self.extract_info(url)
1034 except UnavailableVideoError:
1035 self.report_error('unable to download video')
1036 except MaxDownloadsReached:
1037 self.to_screen('[info] Maximum number of downloaded files reached.')
1040 return self._download_retcode
1042 def download_with_info_file(self, info_filename):
1043 with io.open(info_filename, 'r', encoding='utf-8') as f:
1046 self.process_ie_result(info, download=True)
1047 except DownloadError:
1048 webpage_url = info.get('webpage_url')
1049 if webpage_url is not None:
1050 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1051 return self.download([webpage_url])
1054 return self._download_retcode
1056 def post_process(self, filename, ie_info):
1057 """Run all the postprocessors on the given file."""
1058 info = dict(ie_info)
1059 info['filepath'] = filename
1062 if ie_info.get('__postprocessors') is not None:
1063 pps_chain.extend(ie_info['__postprocessors'])
1064 pps_chain.extend(self._pps)
1065 for pp in pps_chain:
1067 keep_video_wish, new_info = pp.run(info)
1068 if keep_video_wish is not None:
1070 keep_video = keep_video_wish
1071 elif keep_video is None:
1072 # No clear decision yet, let IE decide
1073 keep_video = keep_video_wish
1074 except PostProcessingError as e:
1075 self.report_error(e.msg)
1076 if keep_video is False and not self.params.get('keepvideo', False):
1078 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1079 os.remove(encodeFilename(filename))
1080 except (IOError, OSError):
1081 self.report_warning('Unable to remove downloaded video file')
1083 def _make_archive_id(self, info_dict):
1084 # Future-proof against any change in case
1085 # and backwards compatibility with prior versions
1086 extractor = info_dict.get('extractor_key')
1087 if extractor is None:
1088 if 'id' in info_dict:
1089 extractor = info_dict.get('ie_key') # key in a playlist
1090 if extractor is None:
1091 return None # Incomplete video information
1092 return extractor.lower() + ' ' + info_dict['id']
1094 def in_download_archive(self, info_dict):
1095 fn = self.params.get('download_archive')
1099 vid_id = self._make_archive_id(info_dict)
1101 return False # Incomplete video information
1104 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1105 for line in archive_file:
1106 if line.strip() == vid_id:
1108 except IOError as ioe:
1109 if ioe.errno != errno.ENOENT:
1113 def record_download_archive(self, info_dict):
1114 fn = self.params.get('download_archive')
1117 vid_id = self._make_archive_id(info_dict)
1119 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1120 archive_file.write(vid_id + '\n')
1123 def format_resolution(format, default='unknown'):
1124 if format.get('vcodec') == 'none':
1126 if format.get('resolution') is not None:
1127 return format['resolution']
1128 if format.get('height') is not None:
1129 if format.get('width') is not None:
1130 res = '%sx%s' % (format['width'], format['height'])
1132 res = '%sp' % format['height']
1133 elif format.get('width') is not None:
1134 res = '?x%d' % format['width']
1139 def list_formats(self, info_dict):
1140 def format_note(fdict):
1142 if fdict.get('ext') in ['f4f', 'f4m']:
1143 res += '(unsupported) '
1144 if fdict.get('format_note') is not None:
1145 res += fdict['format_note'] + ' '
1146 if fdict.get('tbr') is not None:
1147 res += '%4dk ' % fdict['tbr']
1148 if fdict.get('container') is not None:
1151 res += '%s container' % fdict['container']
1152 if (fdict.get('vcodec') is not None and
1153 fdict.get('vcodec') != 'none'):
1156 res += fdict['vcodec']
1157 if fdict.get('vbr') is not None:
1159 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1161 if fdict.get('vbr') is not None:
1162 res += '%4dk' % fdict['vbr']
1163 if fdict.get('acodec') is not None:
1166 if fdict['acodec'] == 'none':
1169 res += '%-5s' % fdict['acodec']
1170 elif fdict.get('abr') is not None:
1174 if fdict.get('abr') is not None:
1175 res += '@%3dk' % fdict['abr']
1176 if fdict.get('asr') is not None:
1177 res += ' (%5dHz)' % fdict['asr']
1178 if fdict.get('filesize') is not None:
1181 res += format_bytes(fdict['filesize'])
1184 def line(format, idlen=20):
1185 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1186 format['format_id'],
1188 self.format_resolution(format),
1189 format_note(format),
1192 formats = info_dict.get('formats', [info_dict])
1193 idlen = max(len('format code'),
1194 max(len(f['format_id']) for f in formats))
1195 formats_s = [line(f, idlen) for f in formats]
1196 if len(formats) > 1:
1197 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1198 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1200 header_line = line({
1201 'format_id': 'format code', 'ext': 'extension',
1202 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1203 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1204 (info_dict['id'], header_line, '\n'.join(formats_s)))
1206 def urlopen(self, req):
1207 """ Start an HTTP download """
1208 return self._opener.open(req, timeout=self._socket_timeout)
1210 def print_debug_header(self):
1211 if not self.params.get('verbose'):
1214 write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
1215 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
1216 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1218 sp = subprocess.Popen(
1219 ['git', 'rev-parse', '--short', 'HEAD'],
1220 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1221 cwd=os.path.dirname(os.path.abspath(__file__)))
1222 out, err = sp.communicate()
1223 out = out.decode().strip()
1224 if re.match('[0-9a-f]+', out):
1225 write_string('[debug] Git HEAD: ' + out + '\n')
1231 write_string('[debug] Python version %s - %s' %
1232 (platform.python_version(), platform_name()) + '\n')
1235 for handler in self._opener.handlers:
1236 if hasattr(handler, 'proxies'):
1237 proxy_map.update(handler.proxies)
1238 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1240 def _setup_opener(self):
1241 timeout_val = self.params.get('socket_timeout')
1242 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1244 opts_cookiefile = self.params.get('cookiefile')
1245 opts_proxy = self.params.get('proxy')
1247 if opts_cookiefile is None:
1248 self.cookiejar = compat_cookiejar.CookieJar()
1250 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1252 if os.access(opts_cookiefile, os.R_OK):
1253 self.cookiejar.load()
1255 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1257 if opts_proxy is not None:
1258 if opts_proxy == '':
1261 proxies = {'http': opts_proxy, 'https': opts_proxy}
1263 proxies = compat_urllib_request.getproxies()
1264 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1265 if 'http' in proxies and 'https' not in proxies:
1266 proxies['https'] = proxies['http']
1267 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1269 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1270 https_handler = make_HTTPS_handler(
1271 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1272 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1273 opener = compat_urllib_request.build_opener(
1274 https_handler, proxy_handler, cookie_processor, ydlh)
1275 # Delete the default user-agent header, which would otherwise apply in
1276 # cases where our custom HTTP handler doesn't come into play
1277 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1278 opener.addheaders = []
1279 self._opener = opener
1281 def encode(self, s):
1282 if isinstance(s, bytes):
1283 return s # Already encoded
1286 return s.encode(self.get_encoding())
1287 except UnicodeEncodeError as err:
1288 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1291 def get_encoding(self):
1292 encoding = self.params.get('encoding')
1293 if encoding is None:
1294 encoding = preferredencoding()