2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
57 UnavailableVideoError,
64 from .cache import Cache
65 from .extractor import get_info_extractor, gen_extractors
66 from .downloader import get_suitable_downloader
67 from .downloader.rtmp import rtmpdump_version
68 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
69 from .version import __version__
72 class YoutubeDL(object):
75 YoutubeDL objects are the ones responsible of downloading the
76 actual video file and writing it to disk if the user has requested
77 it, among some other tasks. In most cases there should be one per
78 program. As, given a video URL, the downloader doesn't know how to
79 extract all the needed information, task that InfoExtractors do, it
80 has to pass the URL to one of them.
82 For this, YoutubeDL objects have a method that allows
83 InfoExtractors to be registered in a given order. When it is passed
84 a URL, the YoutubeDL object handles it to the first InfoExtractor it
85 finds that reports being able to handle it. The InfoExtractor extracts
86 all the information about the video or videos the URL refers to, and
87 YoutubeDL process the extracted information, possibly using a File
88 Downloader to download the video.
90 YoutubeDL objects accept a lot of parameters. In order not to saturate
91 the object constructor with arguments, it receives a dictionary of
92 options instead. These options are available through the params
93 attribute for the InfoExtractors to use. The YoutubeDL also
94 registers itself as the downloader in charge for the InfoExtractors
95 that are added to it, so this is a "mutual registration".
99 username: Username for authentication purposes.
100 password: Password for authentication purposes.
101 videopassword: Password for acces a video.
102 usenetrc: Use netrc for authentication instead.
103 verbose: Print additional info to stdout.
104 quiet: Do not print messages to stdout.
105 no_warnings: Do not print out anything for warnings.
106 forceurl: Force printing final URL.
107 forcetitle: Force printing title.
108 forceid: Force printing ID.
109 forcethumbnail: Force printing thumbnail URL.
110 forcedescription: Force printing description.
111 forcefilename: Force printing final filename.
112 forceduration: Force printing duration.
113 forcejson: Force printing info_dict as JSON.
114 dump_single_json: Force printing the info_dict of the whole playlist
115 (or video) as a single JSON line.
116 simulate: Do not download the video files.
117 format: Video format code.
118 format_limit: Highest quality format to try.
119 outtmpl: Template for output names.
120 restrictfilenames: Do not allow "&" and spaces in file names
121 ignoreerrors: Do not stop on download errors.
122 nooverwrites: Prevent overwriting files.
123 playliststart: Playlist item to start at.
124 playlistend: Playlist item to end at.
125 matchtitle: Download only matching titles.
126 rejecttitle: Reject downloads for matching titles.
127 logger: Log messages to a logging.Logger instance.
128 logtostderr: Log messages to stderr instead of stdout.
129 writedescription: Write the video description to a .description file
130 writeinfojson: Write the video description to a .info.json file
131 writeannotations: Write the video annotations to a .annotations.xml file
132 writethumbnail: Write the thumbnail image to a file
133 writesubtitles: Write the video subtitles to a file
134 writeautomaticsub: Write the automatic subtitles to a file
135 allsubtitles: Downloads all the subtitles of the video
136 (requires writesubtitles or writeautomaticsub)
137 listsubtitles: Lists all available subtitles for the video
138 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
139 subtitleslangs: List of languages of the subtitles to download
140 keepvideo: Keep the video file after post-processing
141 daterange: A DateRange object, download only if the upload_date is in the range.
142 skip_download: Skip the actual download of the video file
143 cachedir: Location of the cache files in the filesystem.
144 False to disable filesystem cache.
145 noplaylist: Download single video instead of a playlist if in doubt.
146 age_limit: An integer representing the user's age in years.
147 Unsuitable videos for the given age are skipped.
148 min_views: An integer representing the minimum view count the video
149 must have in order to not be skipped.
150 Videos without view count information are always
151 downloaded. None for no limit.
152 max_views: An integer representing the maximum view count.
153 Videos that are more popular than that are not
155 Videos without view count information are always
156 downloaded. None for no limit.
157 download_archive: File name of a file where all downloads are recorded.
158 Videos already present in the file are not downloaded
160 cookiefile: File name where cookies should be read from and dumped to.
161 nocheckcertificate:Do not verify SSL certificates
162 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
163 At the moment, this is only supported by YouTube.
164 proxy: URL of the proxy server to use
165 socket_timeout: Time to wait for unresponsive hosts, in seconds
166 bidi_workaround: Work around buggy terminals without bidirectional text
167 support, using fridibi
168 debug_printtraffic:Print out sent and received HTTP traffic
169 include_ads: Download ads as well
170 default_search: Prepend this string if an input url is not valid.
171 'auto' for elaborate guessing
172 encoding: Use this encoding instead of the system-specified.
173 extract_flat: Do not resolve URLs, return the immediate result.
174 Pass in 'in_playlist' to only show this behavior for
177 The following parameters are not used by YoutubeDL itself, they are used by
179 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
180 noresizebuffer, retries, continuedl, noprogress, consoletitle
182 The following options are used by the post processors:
183 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
184 otherwise prefer avconv.
185 exec_cmd: Arbitrary command to run after downloading
191 _download_retcode = None
192 _num_downloads = None
195 def __init__(self, params=None, auto_init=True):
196 """Create a FileDownloader object with the given options."""
200 self._ies_instances = {}
202 self._progress_hooks = []
203 self._download_retcode = 0
204 self._num_downloads = 0
205 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
206 self._err_file = sys.stderr
208 self.cache = Cache(self)
210 if params.get('bidi_workaround', False):
213 master, slave = pty.openpty()
214 width = get_term_width()
218 width_args = ['-w', str(width)]
220 stdin=subprocess.PIPE,
222 stderr=self._err_file)
224 self._output_process = subprocess.Popen(
225 ['bidiv'] + width_args, **sp_kwargs
228 self._output_process = subprocess.Popen(
229 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
230 self._output_channel = os.fdopen(master, 'rb')
231 except OSError as ose:
233 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
237 if (sys.version_info >= (3,) and sys.platform != 'win32' and
238 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
239 and not params.get('restrictfilenames', False)):
240 # On Python 3, the Unicode filesystem API will throw errors (#1474)
242 'Assuming --restrict-filenames since file system encoding '
243 'cannot encode all characters. '
244 'Set the LC_ALL environment variable to fix this.')
245 self.params['restrictfilenames'] = True
247 if '%(stitle)s' in self.params.get('outtmpl', ''):
248 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
253 self.print_debug_header()
254 self.add_default_info_extractors()
256 def add_info_extractor(self, ie):
257 """Add an InfoExtractor object to the end of the list."""
259 self._ies_instances[ie.ie_key()] = ie
260 ie.set_downloader(self)
262 def get_info_extractor(self, ie_key):
264 Get an instance of an IE with name ie_key, it will try to get one from
265 the _ies list, if there's no instance it will create a new one and add
266 it to the extractor list.
268 ie = self._ies_instances.get(ie_key)
270 ie = get_info_extractor(ie_key)()
271 self.add_info_extractor(ie)
274 def add_default_info_extractors(self):
276 Add the InfoExtractors returned by gen_extractors to the end of the list
278 for ie in gen_extractors():
279 self.add_info_extractor(ie)
281 def add_post_processor(self, pp):
282 """Add a PostProcessor object to the end of the chain."""
284 pp.set_downloader(self)
286 def add_progress_hook(self, ph):
287 """Add the progress hook (currently only for the file downloader)"""
288 self._progress_hooks.append(ph)
290 def _bidi_workaround(self, message):
291 if not hasattr(self, '_output_channel'):
294 assert hasattr(self, '_output_process')
295 assert isinstance(message, compat_str)
296 line_count = message.count('\n') + 1
297 self._output_process.stdin.write((message + '\n').encode('utf-8'))
298 self._output_process.stdin.flush()
299 res = ''.join(self._output_channel.readline().decode('utf-8')
300 for _ in range(line_count))
301 return res[:-len('\n')]
303 def to_screen(self, message, skip_eol=False):
304 """Print message to stdout if not in quiet mode."""
305 return self.to_stdout(message, skip_eol, check_quiet=True)
307 def _write_string(self, s, out=None):
308 write_string(s, out=out, encoding=self.params.get('encoding'))
310 def to_stdout(self, message, skip_eol=False, check_quiet=False):
311 """Print message to stdout if not in quiet mode."""
312 if self.params.get('logger'):
313 self.params['logger'].debug(message)
314 elif not check_quiet or not self.params.get('quiet', False):
315 message = self._bidi_workaround(message)
316 terminator = ['\n', ''][skip_eol]
317 output = message + terminator
319 self._write_string(output, self._screen_file)
321 def to_stderr(self, message):
322 """Print message to stderr."""
323 assert isinstance(message, compat_str)
324 if self.params.get('logger'):
325 self.params['logger'].error(message)
327 message = self._bidi_workaround(message)
328 output = message + '\n'
329 self._write_string(output, self._err_file)
331 def to_console_title(self, message):
332 if not self.params.get('consoletitle', False):
334 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
335 # c_wchar_p() might not be necessary if `message` is
336 # already of type unicode()
337 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
338 elif 'TERM' in os.environ:
339 self._write_string('\033]0;%s\007' % message, self._screen_file)
341 def save_console_title(self):
342 if not self.params.get('consoletitle', False):
344 if 'TERM' in os.environ:
345 # Save the title on stack
346 self._write_string('\033[22;0t', self._screen_file)
348 def restore_console_title(self):
349 if not self.params.get('consoletitle', False):
351 if 'TERM' in os.environ:
352 # Restore the title from stack
353 self._write_string('\033[23;0t', self._screen_file)
356 self.save_console_title()
359 def __exit__(self, *args):
360 self.restore_console_title()
362 if self.params.get('cookiefile') is not None:
363 self.cookiejar.save()
365 def trouble(self, message=None, tb=None):
366 """Determine action to take when a download problem appears.
368 Depending on if the downloader has been configured to ignore
369 download errors or not, this method may throw an exception or
370 not when errors are found, after printing the message.
372 tb, if given, is additional traceback information.
374 if message is not None:
375 self.to_stderr(message)
376 if self.params.get('verbose'):
378 if sys.exc_info()[0]: # if .trouble has been called from an except block
380 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
381 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
382 tb += compat_str(traceback.format_exc())
384 tb_data = traceback.format_list(traceback.extract_stack())
385 tb = ''.join(tb_data)
387 if not self.params.get('ignoreerrors', False):
388 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
389 exc_info = sys.exc_info()[1].exc_info
391 exc_info = sys.exc_info()
392 raise DownloadError(message, exc_info)
393 self._download_retcode = 1
395 def report_warning(self, message):
397 Print the message to stderr, it will be prefixed with 'WARNING:'
398 If stderr is a tty file the 'WARNING:' will be colored
400 if self.params.get('logger') is not None:
401 self.params['logger'].warning(message)
403 if self.params.get('no_warnings'):
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;33mWARNING:\033[0m'
408 _msg_header = 'WARNING:'
409 warning_message = '%s %s' % (_msg_header, message)
410 self.to_stderr(warning_message)
412 def report_error(self, message, tb=None):
414 Do the same as trouble, but prefixes the message with 'ERROR:', colored
415 in red if stderr is a tty file.
417 if self._err_file.isatty() and os.name != 'nt':
418 _msg_header = '\033[0;31mERROR:\033[0m'
420 _msg_header = 'ERROR:'
421 error_message = '%s %s' % (_msg_header, message)
422 self.trouble(error_message, tb)
424 def report_file_already_downloaded(self, file_name):
425 """Report file has already been fully downloaded."""
427 self.to_screen('[download] %s has already been downloaded' % file_name)
428 except UnicodeEncodeError:
429 self.to_screen('[download] The file has already been downloaded')
431 def prepare_filename(self, info_dict):
432 """Generate the output filename."""
434 template_dict = dict(info_dict)
436 template_dict['epoch'] = int(time.time())
437 autonumber_size = self.params.get('autonumber_size')
438 if autonumber_size is None:
440 autonumber_templ = '%0' + str(autonumber_size) + 'd'
441 template_dict['autonumber'] = autonumber_templ % self._num_downloads
442 if template_dict.get('playlist_index') is not None:
443 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
444 if template_dict.get('resolution') is None:
445 if template_dict.get('width') and template_dict.get('height'):
446 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
447 elif template_dict.get('height'):
448 template_dict['resolution'] = '%sp' % template_dict['height']
449 elif template_dict.get('width'):
450 template_dict['resolution'] = '?x%d' % template_dict['width']
452 sanitize = lambda k, v: sanitize_filename(
454 restricted=self.params.get('restrictfilenames'),
456 template_dict = dict((k, sanitize(k, v))
457 for k, v in template_dict.items()
459 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
461 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
462 tmpl = compat_expanduser(outtmpl)
463 filename = tmpl % template_dict
465 except ValueError as err:
466 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
469 def _match_entry(self, info_dict):
470 """ Returns None iff the file should be downloaded """
472 video_title = info_dict.get('title', info_dict.get('id', 'video'))
473 if 'title' in info_dict:
474 # This can happen when we're just evaluating the playlist
475 title = info_dict['title']
476 matchtitle = self.params.get('matchtitle', False)
478 if not re.search(matchtitle, title, re.IGNORECASE):
479 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
480 rejecttitle = self.params.get('rejecttitle', False)
482 if re.search(rejecttitle, title, re.IGNORECASE):
483 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
484 date = info_dict.get('upload_date', None)
486 dateRange = self.params.get('daterange', DateRange())
487 if date not in dateRange:
488 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
489 view_count = info_dict.get('view_count', None)
490 if view_count is not None:
491 min_views = self.params.get('min_views')
492 if min_views is not None and view_count < min_views:
493 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
494 max_views = self.params.get('max_views')
495 if max_views is not None and view_count > max_views:
496 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
497 age_limit = self.params.get('age_limit')
498 if age_limit is not None:
499 actual_age_limit = info_dict.get('age_limit')
500 if actual_age_limit is None:
502 if age_limit < actual_age_limit:
503 return 'Skipping "' + title + '" because it is age restricted'
504 if self.in_download_archive(info_dict):
505 return '%s has already been recorded in archive' % video_title
509 def add_extra_info(info_dict, extra_info):
510 '''Set the keys from extra_info in info dict if they are missing'''
511 for key, value in extra_info.items():
512 info_dict.setdefault(key, value)
514 def extract_info(self, url, download=True, ie_key=None, extra_info={},
517 Returns a list with a dictionary for each video we find.
518 If 'download', also downloads the videos.
519 extra_info is a dict containing the extra values to add to each result
523 ies = [self.get_info_extractor(ie_key)]
528 if not ie.suitable(url):
532 self.report_warning('The program functionality for this site has been marked as broken, '
533 'and will probably not work.')
536 ie_result = ie.extract(url)
537 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
539 if isinstance(ie_result, list):
540 # Backwards compatibility: old IE result format
542 '_type': 'compat_list',
543 'entries': ie_result,
545 self.add_default_extra_info(ie_result, ie, url)
547 return self.process_ie_result(ie_result, download, extra_info)
550 except ExtractorError as de: # An error we somewhat expected
551 self.report_error(compat_str(de), de.format_traceback())
553 except MaxDownloadsReached:
555 except Exception as e:
556 if self.params.get('ignoreerrors', False):
557 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
562 self.report_error('no suitable InfoExtractor for URL %s' % url)
564 def add_default_extra_info(self, ie_result, ie, url):
565 self.add_extra_info(ie_result, {
566 'extractor': ie.IE_NAME,
568 'webpage_url_basename': url_basename(url),
569 'extractor_key': ie.ie_key(),
572 def process_ie_result(self, ie_result, download=True, extra_info={}):
574 Take the result of the ie(may be modified) and resolve all unresolved
575 references (URLs, playlist items).
577 It will also download the videos if 'download'.
578 Returns the resolved ie_result.
581 result_type = ie_result.get('_type', 'video')
583 if result_type in ('url', 'url_transparent'):
584 extract_flat = self.params.get('extract_flat', False)
585 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
586 extract_flat is True):
587 if self.params.get('forcejson', False):
588 self.to_stdout(json.dumps(ie_result))
591 if result_type == 'video':
592 self.add_extra_info(ie_result, extra_info)
593 return self.process_video_result(ie_result, download=download)
594 elif result_type == 'url':
595 # We have to add extra_info to the results because it may be
596 # contained in a playlist
597 return self.extract_info(ie_result['url'],
599 ie_key=ie_result.get('ie_key'),
600 extra_info=extra_info)
601 elif result_type == 'url_transparent':
602 # Use the information from the embedding page
603 info = self.extract_info(
604 ie_result['url'], ie_key=ie_result.get('ie_key'),
605 extra_info=extra_info, download=False, process=False)
607 def make_result(embedded_info):
608 new_result = ie_result.copy()
609 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
610 'entries', 'ie_key', 'duration',
611 'subtitles', 'annotations', 'format',
612 'thumbnail', 'thumbnails'):
615 if f in embedded_info:
616 new_result[f] = embedded_info[f]
618 new_result = make_result(info)
620 assert new_result.get('_type') != 'url_transparent'
621 if new_result.get('_type') == 'compat_list':
622 new_result['entries'] = [
623 make_result(e) for e in new_result['entries']]
625 return self.process_ie_result(
626 new_result, download=download, extra_info=extra_info)
627 elif result_type == 'playlist' or playlist == 'multi_video':
628 # We process each entry in the playlist
629 playlist = ie_result.get('title', None) or ie_result.get('id', None)
630 self.to_screen('[download] Downloading playlist: %s' % playlist)
632 playlist_results = []
634 playliststart = self.params.get('playliststart', 1) - 1
635 playlistend = self.params.get('playlistend', None)
636 # For backwards compatibility, interpret -1 as whole list
637 if playlistend == -1:
640 if isinstance(ie_result['entries'], list):
641 n_all_entries = len(ie_result['entries'])
642 entries = ie_result['entries'][playliststart:playlistend]
643 n_entries = len(entries)
645 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
646 (ie_result['extractor'], playlist, n_all_entries, n_entries))
648 assert isinstance(ie_result['entries'], PagedList)
649 entries = ie_result['entries'].getslice(
650 playliststart, playlistend)
651 n_entries = len(entries)
653 "[%s] playlist %s: Downloading %d videos" %
654 (ie_result['extractor'], playlist, n_entries))
656 for i, entry in enumerate(entries, 1):
657 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
659 'n_entries': n_entries,
660 'playlist': playlist,
661 'playlist_id': ie_result.get('id'),
662 'playlist_title': ie_result.get('title'),
663 'playlist_index': i + playliststart,
664 'extractor': ie_result['extractor'],
665 'webpage_url': ie_result['webpage_url'],
666 'webpage_url_basename': url_basename(ie_result['webpage_url']),
667 'extractor_key': ie_result['extractor_key'],
670 reason = self._match_entry(entry)
671 if reason is not None:
672 self.to_screen('[download] ' + reason)
675 entry_result = self.process_ie_result(entry,
678 playlist_results.append(entry_result)
679 ie_result['entries'] = playlist_results
681 elif result_type == 'compat_list':
683 'Extractor %s returned a compat_list result. '
684 'It needs to be updated.' % ie_result.get('extractor'))
686 self.add_extra_info(r,
688 'extractor': ie_result['extractor'],
689 'webpage_url': ie_result['webpage_url'],
690 'webpage_url_basename': url_basename(ie_result['webpage_url']),
691 'extractor_key': ie_result['extractor_key'],
694 ie_result['entries'] = [
695 self.process_ie_result(_fixup(r), download, extra_info)
696 for r in ie_result['entries']
700 raise Exception('Invalid result type: %s' % result_type)
702 def select_format(self, format_spec, available_formats):
703 if format_spec == 'best' or format_spec is None:
704 return available_formats[-1]
705 elif format_spec == 'worst':
706 return available_formats[0]
707 elif format_spec == 'bestaudio':
709 f for f in available_formats
710 if f.get('vcodec') == 'none']
712 return audio_formats[-1]
713 elif format_spec == 'worstaudio':
715 f for f in available_formats
716 if f.get('vcodec') == 'none']
718 return audio_formats[0]
719 elif format_spec == 'bestvideo':
721 f for f in available_formats
722 if f.get('acodec') == 'none']
724 return video_formats[-1]
725 elif format_spec == 'worstvideo':
727 f for f in available_formats
728 if f.get('acodec') == 'none']
730 return video_formats[0]
732 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
733 if format_spec in extensions:
734 filter_f = lambda f: f['ext'] == format_spec
736 filter_f = lambda f: f['format_id'] == format_spec
737 matches = list(filter(filter_f, available_formats))
742 def process_video_result(self, info_dict, download=True):
743 assert info_dict.get('_type', 'video') == 'video'
745 if 'id' not in info_dict:
746 raise ExtractorError('Missing "id" field in extractor result')
747 if 'title' not in info_dict:
748 raise ExtractorError('Missing "title" field in extractor result')
750 if 'playlist' not in info_dict:
751 # It isn't part of a playlist
752 info_dict['playlist'] = None
753 info_dict['playlist_index'] = None
755 thumbnails = info_dict.get('thumbnails')
757 thumbnails.sort(key=lambda t: (
758 t.get('width'), t.get('height'), t.get('url')))
760 if 'width' in t and 'height' in t:
761 t['resolution'] = '%dx%d' % (t['width'], t['height'])
763 if thumbnails and 'thumbnail' not in info_dict:
764 info_dict['thumbnail'] = thumbnails[-1]['url']
766 if 'display_id' not in info_dict and 'id' in info_dict:
767 info_dict['display_id'] = info_dict['id']
769 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
770 upload_date = datetime.datetime.utcfromtimestamp(
771 info_dict['timestamp'])
772 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
774 # This extractors handle format selection themselves
775 if info_dict['extractor'] in ['Youku']:
777 self.process_info(info_dict)
780 # We now pick which formats have to be downloaded
781 if info_dict.get('formats') is None:
782 # There's only one format available
783 formats = [info_dict]
785 formats = info_dict['formats']
788 raise ExtractorError('No video formats found!')
790 # We check that all the formats have the format and format_id fields
791 for i, format in enumerate(formats):
792 if 'url' not in format:
793 raise ExtractorError('Missing "url" key in result (index %d)' % i)
795 if format.get('format_id') is None:
796 format['format_id'] = compat_str(i)
797 if format.get('format') is None:
798 format['format'] = '{id} - {res}{note}'.format(
799 id=format['format_id'],
800 res=self.format_resolution(format),
801 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
803 # Automatically determine file extension if missing
804 if 'ext' not in format:
805 format['ext'] = determine_ext(format['url']).lower()
807 format_limit = self.params.get('format_limit', None)
809 formats = list(takewhile_inclusive(
810 lambda f: f['format_id'] != format_limit, formats
813 # TODO Central sorting goes here
815 if formats[0] is not info_dict:
816 # only set the 'formats' fields if the original info_dict list them
817 # otherwise we end up with a circular reference, the first (and unique)
818 # element in the 'formats' field in info_dict is info_dict itself,
819 # wich can't be exported to json
820 info_dict['formats'] = formats
821 if self.params.get('listformats', None):
822 self.list_formats(info_dict)
825 req_format = self.params.get('format')
826 if req_format is None:
828 formats_to_download = []
829 # The -1 is for supporting YoutubeIE
830 if req_format in ('-1', 'all'):
831 formats_to_download = formats
833 for rfstr in req_format.split(','):
834 # We can accept formats requested in the format: 34/5/best, we pick
835 # the first that is available, starting from left
836 req_formats = rfstr.split('/')
837 for rf in req_formats:
838 if re.match(r'.+?\+.+?', rf) is not None:
839 # Two formats have been requested like '137+139'
840 format_1, format_2 = rf.split('+')
841 formats_info = (self.select_format(format_1, formats),
842 self.select_format(format_2, formats))
843 if all(formats_info):
844 # The first format must contain the video and the
846 if formats_info[0].get('vcodec') == 'none':
847 self.report_error('The first format must '
848 'contain the video, try using '
849 '"-f %s+%s"' % (format_2, format_1))
852 'requested_formats': formats_info,
854 'ext': formats_info[0]['ext'],
857 selected_format = None
859 selected_format = self.select_format(rf, formats)
860 if selected_format is not None:
861 formats_to_download.append(selected_format)
863 if not formats_to_download:
864 raise ExtractorError('requested format not available',
868 if len(formats_to_download) > 1:
869 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
870 for format in formats_to_download:
871 new_info = dict(info_dict)
872 new_info.update(format)
873 self.process_info(new_info)
874 # We update the info dict with the best quality format (backwards compatibility)
875 info_dict.update(formats_to_download[-1])
878 def process_info(self, info_dict):
879 """Process a single resolved IE result."""
881 assert info_dict.get('_type', 'video') == 'video'
883 max_downloads = self.params.get('max_downloads')
884 if max_downloads is not None:
885 if self._num_downloads >= int(max_downloads):
886 raise MaxDownloadsReached()
888 info_dict['fulltitle'] = info_dict['title']
889 if len(info_dict['title']) > 200:
890 info_dict['title'] = info_dict['title'][:197] + '...'
892 # Keep for backwards compatibility
893 info_dict['stitle'] = info_dict['title']
895 if 'format' not in info_dict:
896 info_dict['format'] = info_dict['ext']
898 reason = self._match_entry(info_dict)
899 if reason is not None:
900 self.to_screen('[download] ' + reason)
903 self._num_downloads += 1
905 filename = self.prepare_filename(info_dict)
908 if self.params.get('forcetitle', False):
909 self.to_stdout(info_dict['fulltitle'])
910 if self.params.get('forceid', False):
911 self.to_stdout(info_dict['id'])
912 if self.params.get('forceurl', False):
913 # For RTMP URLs, also include the playpath
914 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
915 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
916 self.to_stdout(info_dict['thumbnail'])
917 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
918 self.to_stdout(info_dict['description'])
919 if self.params.get('forcefilename', False) and filename is not None:
920 self.to_stdout(filename)
921 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
922 self.to_stdout(formatSeconds(info_dict['duration']))
923 if self.params.get('forceformat', False):
924 self.to_stdout(info_dict['format'])
925 if self.params.get('forcejson', False):
926 info_dict['_filename'] = filename
927 self.to_stdout(json.dumps(info_dict))
928 if self.params.get('dump_single_json', False):
929 info_dict['_filename'] = filename
931 # Do nothing else if in simulate mode
932 if self.params.get('simulate', False):
939 dn = os.path.dirname(encodeFilename(filename))
940 if dn and not os.path.exists(dn):
942 except (OSError, IOError) as err:
943 self.report_error('unable to create directory ' + compat_str(err))
946 if self.params.get('writedescription', False):
947 descfn = filename + '.description'
948 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
949 self.to_screen('[info] Video description is already present')
952 self.to_screen('[info] Writing video description to: ' + descfn)
953 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
954 descfile.write(info_dict['description'])
955 except (KeyError, TypeError):
956 self.report_warning('There\'s no description to write.')
957 except (OSError, IOError):
958 self.report_error('Cannot write description file ' + descfn)
961 if self.params.get('writeannotations', False):
962 annofn = filename + '.annotations.xml'
963 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
964 self.to_screen('[info] Video annotations are already present')
967 self.to_screen('[info] Writing video annotations to: ' + annofn)
968 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
969 annofile.write(info_dict['annotations'])
970 except (KeyError, TypeError):
971 self.report_warning('There are no annotations to write.')
972 except (OSError, IOError):
973 self.report_error('Cannot write annotations file: ' + annofn)
976 subtitles_are_requested = any([self.params.get('writesubtitles', False),
977 self.params.get('writeautomaticsub')])
979 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
980 # subtitles download errors are already managed as troubles in relevant IE
981 # that way it will silently go on when used with unsupporting IE
982 subtitles = info_dict['subtitles']
983 sub_format = self.params.get('subtitlesformat', 'srt')
984 for sub_lang in subtitles.keys():
985 sub = subtitles[sub_lang]
989 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
990 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
991 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
993 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
994 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
996 except (OSError, IOError):
997 self.report_error('Cannot write subtitles file ' + sub_filename)
1000 if self.params.get('writeinfojson', False):
1001 infofn = os.path.splitext(filename)[0] + '.info.json'
1002 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1003 self.to_screen('[info] Video description metadata is already present')
1005 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1007 write_json_file(info_dict, infofn)
1008 except (OSError, IOError):
1009 self.report_error('Cannot write metadata to JSON file ' + infofn)
1012 if self.params.get('writethumbnail', False):
1013 if info_dict.get('thumbnail') is not None:
1014 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1015 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1016 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1017 self.to_screen('[%s] %s: Thumbnail is already present' %
1018 (info_dict['extractor'], info_dict['id']))
1020 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1021 (info_dict['extractor'], info_dict['id']))
1023 uf = self.urlopen(info_dict['thumbnail'])
1024 with open(thumb_filename, 'wb') as thumbf:
1025 shutil.copyfileobj(uf, thumbf)
1026 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1027 (info_dict['extractor'], info_dict['id'], thumb_filename))
1028 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1029 self.report_warning('Unable to download thumbnail "%s": %s' %
1030 (info_dict['thumbnail'], compat_str(err)))
1032 if not self.params.get('skip_download', False):
1033 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1038 fd = get_suitable_downloader(info)(self, self.params)
1039 for ph in self._progress_hooks:
1040 fd.add_progress_hook(ph)
1041 if self.params.get('verbose'):
1042 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1043 return fd.download(name, info)
1044 if info_dict.get('requested_formats') is not None:
1047 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1048 if not merger._executable:
1050 self.report_warning('You have requested multiple '
1051 'formats but ffmpeg or avconv are not installed.'
1052 ' The formats won\'t be merged')
1054 postprocessors = [merger]
1055 for f in info_dict['requested_formats']:
1056 new_info = dict(info_dict)
1058 fname = self.prepare_filename(new_info)
1059 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1060 downloaded.append(fname)
1061 partial_success = dl(fname, new_info)
1062 success = success and partial_success
1063 info_dict['__postprocessors'] = postprocessors
1064 info_dict['__files_to_merge'] = downloaded
1066 # Just a single file
1067 success = dl(filename, info_dict)
1068 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1069 self.report_error('unable to download video data: %s' % str(err))
1071 except (OSError, IOError) as err:
1072 raise UnavailableVideoError(err)
1073 except (ContentTooShortError, ) as err:
1074 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1079 self.post_process(filename, info_dict)
1080 except (PostProcessingError) as err:
1081 self.report_error('postprocessing: %s' % str(err))
1084 self.record_download_archive(info_dict)
1086 def download(self, url_list):
1087 """Download a given list of URLs."""
1088 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1089 if (len(url_list) > 1 and
1091 and self.params.get('max_downloads') != 1):
1092 raise SameFileError(outtmpl)
1094 for url in url_list:
1096 #It also downloads the videos
1097 res = self.extract_info(url)
1098 except UnavailableVideoError:
1099 self.report_error('unable to download video')
1100 except MaxDownloadsReached:
1101 self.to_screen('[info] Maximum number of downloaded files reached.')
1104 if self.params.get('dump_single_json', False):
1105 self.to_stdout(json.dumps(res))
1107 return self._download_retcode
1109 def download_with_info_file(self, info_filename):
1110 with io.open(info_filename, 'r', encoding='utf-8') as f:
1113 self.process_ie_result(info, download=True)
1114 except DownloadError:
1115 webpage_url = info.get('webpage_url')
1116 if webpage_url is not None:
1117 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1118 return self.download([webpage_url])
1121 return self._download_retcode
1123 def post_process(self, filename, ie_info):
1124 """Run all the postprocessors on the given file."""
1125 info = dict(ie_info)
1126 info['filepath'] = filename
1129 if ie_info.get('__postprocessors') is not None:
1130 pps_chain.extend(ie_info['__postprocessors'])
1131 pps_chain.extend(self._pps)
1132 for pp in pps_chain:
1134 keep_video_wish, new_info = pp.run(info)
1135 if keep_video_wish is not None:
1137 keep_video = keep_video_wish
1138 elif keep_video is None:
1139 # No clear decision yet, let IE decide
1140 keep_video = keep_video_wish
1141 except PostProcessingError as e:
1142 self.report_error(e.msg)
1143 if keep_video is False and not self.params.get('keepvideo', False):
1145 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1146 os.remove(encodeFilename(filename))
1147 except (IOError, OSError):
1148 self.report_warning('Unable to remove downloaded video file')
1150 def _make_archive_id(self, info_dict):
1151 # Future-proof against any change in case
1152 # and backwards compatibility with prior versions
1153 extractor = info_dict.get('extractor_key')
1154 if extractor is None:
1155 if 'id' in info_dict:
1156 extractor = info_dict.get('ie_key') # key in a playlist
1157 if extractor is None:
1158 return None # Incomplete video information
1159 return extractor.lower() + ' ' + info_dict['id']
1161 def in_download_archive(self, info_dict):
1162 fn = self.params.get('download_archive')
1166 vid_id = self._make_archive_id(info_dict)
1168 return False # Incomplete video information
1171 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1172 for line in archive_file:
1173 if line.strip() == vid_id:
1175 except IOError as ioe:
1176 if ioe.errno != errno.ENOENT:
1180 def record_download_archive(self, info_dict):
1181 fn = self.params.get('download_archive')
1184 vid_id = self._make_archive_id(info_dict)
1186 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1187 archive_file.write(vid_id + '\n')
1190 def format_resolution(format, default='unknown'):
1191 if format.get('vcodec') == 'none':
1193 if format.get('resolution') is not None:
1194 return format['resolution']
1195 if format.get('height') is not None:
1196 if format.get('width') is not None:
1197 res = '%sx%s' % (format['width'], format['height'])
1199 res = '%sp' % format['height']
1200 elif format.get('width') is not None:
1201 res = '?x%d' % format['width']
1206 def _format_note(self, fdict):
1208 if fdict.get('ext') in ['f4f', 'f4m']:
1209 res += '(unsupported) '
1210 if fdict.get('format_note') is not None:
1211 res += fdict['format_note'] + ' '
1212 if fdict.get('tbr') is not None:
1213 res += '%4dk ' % fdict['tbr']
1214 if fdict.get('container') is not None:
1217 res += '%s container' % fdict['container']
1218 if (fdict.get('vcodec') is not None and
1219 fdict.get('vcodec') != 'none'):
1222 res += fdict['vcodec']
1223 if fdict.get('vbr') is not None:
1225 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1227 if fdict.get('vbr') is not None:
1228 res += '%4dk' % fdict['vbr']
1229 if fdict.get('fps') is not None:
1230 res += ', %sfps' % fdict['fps']
1231 if fdict.get('acodec') is not None:
1234 if fdict['acodec'] == 'none':
1237 res += '%-5s' % fdict['acodec']
1238 elif fdict.get('abr') is not None:
1242 if fdict.get('abr') is not None:
1243 res += '@%3dk' % fdict['abr']
1244 if fdict.get('asr') is not None:
1245 res += ' (%5dHz)' % fdict['asr']
1246 if fdict.get('filesize') is not None:
1249 res += format_bytes(fdict['filesize'])
1250 elif fdict.get('filesize_approx') is not None:
1253 res += '~' + format_bytes(fdict['filesize_approx'])
1256 def list_formats(self, info_dict):
1257 def line(format, idlen=20):
1258 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1259 format['format_id'],
1261 self.format_resolution(format),
1262 self._format_note(format),
1265 formats = info_dict.get('formats', [info_dict])
1266 idlen = max(len('format code'),
1267 max(len(f['format_id']) for f in formats))
1268 formats_s = [line(f, idlen) for f in formats]
1269 if len(formats) > 1:
1270 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1271 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1273 header_line = line({
1274 'format_id': 'format code', 'ext': 'extension',
1275 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1276 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1277 (info_dict['id'], header_line, '\n'.join(formats_s)))
1279 def urlopen(self, req):
1280 """ Start an HTTP download """
1282 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1283 # always respected by websites, some tend to give out URLs with non percent-encoded
1284 # non-ASCII characters (see telemb.py, ard.py [#3412])
1285 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1286 # To work around aforementioned issue we will replace request's original URL with
1287 # percent-encoded one
1288 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1289 url = req if req_is_string else req.get_full_url()
1290 url_escaped = escape_url(url)
1292 # Substitute URL if any change after escaping
1293 if url != url_escaped:
1297 req = compat_urllib_request.Request(
1298 url_escaped, data=req.data, headers=req.headers,
1299 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1301 return self._opener.open(req, timeout=self._socket_timeout)
1303 def print_debug_header(self):
1304 if not self.params.get('verbose'):
1307 if type('') is not compat_str:
1308 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1309 self.report_warning(
1310 'Your Python is broken! Update to a newer and supported version')
1312 stdout_encoding = getattr(
1313 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1315 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1316 locale.getpreferredencoding(),
1317 sys.getfilesystemencoding(),
1319 self.get_encoding()))
1320 write_string(encoding_str, encoding=None)
1322 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1324 sp = subprocess.Popen(
1325 ['git', 'rev-parse', '--short', 'HEAD'],
1326 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1327 cwd=os.path.dirname(os.path.abspath(__file__)))
1328 out, err = sp.communicate()
1329 out = out.decode().strip()
1330 if re.match('[0-9a-f]+', out):
1331 self._write_string('[debug] Git HEAD: ' + out + '\n')
1337 self._write_string('[debug] Python version %s - %s\n' % (
1338 platform.python_version(), platform_name()))
1340 exe_versions = FFmpegPostProcessor.get_versions()
1341 exe_versions['rtmpdump'] = rtmpdump_version()
1342 exe_str = ', '.join(
1344 for exe, v in sorted(exe_versions.items())
1349 self._write_string('[debug] exe versions: %s\n' % exe_str)
1352 for handler in self._opener.handlers:
1353 if hasattr(handler, 'proxies'):
1354 proxy_map.update(handler.proxies)
1355 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1357 def _setup_opener(self):
1358 timeout_val = self.params.get('socket_timeout')
1359 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1361 opts_cookiefile = self.params.get('cookiefile')
1362 opts_proxy = self.params.get('proxy')
1364 if opts_cookiefile is None:
1365 self.cookiejar = compat_cookiejar.CookieJar()
1367 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1369 if os.access(opts_cookiefile, os.R_OK):
1370 self.cookiejar.load()
1372 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1374 if opts_proxy is not None:
1375 if opts_proxy == '':
1378 proxies = {'http': opts_proxy, 'https': opts_proxy}
1380 proxies = compat_urllib_request.getproxies()
1381 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1382 if 'http' in proxies and 'https' not in proxies:
1383 proxies['https'] = proxies['http']
1384 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1386 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1387 https_handler = make_HTTPS_handler(
1388 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1389 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1390 opener = compat_urllib_request.build_opener(
1391 https_handler, proxy_handler, cookie_processor, ydlh)
1392 # Delete the default user-agent header, which would otherwise apply in
1393 # cases where our custom HTTP handler doesn't come into play
1394 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1395 opener.addheaders = []
1396 self._opener = opener
1398 def encode(self, s):
1399 if isinstance(s, bytes):
1400 return s # Already encoded
1403 return s.encode(self.get_encoding())
1404 except UnicodeEncodeError as err:
1405 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1408 def get_encoding(self):
1409 encoding = self.params.get('encoding')
1410 if encoding is None:
1411 encoding = preferredencoding()