2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
57 UnavailableVideoError,
64 from .cache import Cache
65 from .extractor import get_info_extractor, gen_extractors
66 from .downloader import get_suitable_downloader
67 from .downloader.rtmp import rtmpdump_version
68 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
69 from .version import __version__
72 class YoutubeDL(object):
75 YoutubeDL objects are the ones responsible of downloading the
76 actual video file and writing it to disk if the user has requested
77 it, among some other tasks. In most cases there should be one per
78 program. As, given a video URL, the downloader doesn't know how to
79 extract all the needed information, task that InfoExtractors do, it
80 has to pass the URL to one of them.
82 For this, YoutubeDL objects have a method that allows
83 InfoExtractors to be registered in a given order. When it is passed
84 a URL, the YoutubeDL object handles it to the first InfoExtractor it
85 finds that reports being able to handle it. The InfoExtractor extracts
86 all the information about the video or videos the URL refers to, and
87 YoutubeDL process the extracted information, possibly using a File
88 Downloader to download the video.
90 YoutubeDL objects accept a lot of parameters. In order not to saturate
91 the object constructor with arguments, it receives a dictionary of
92 options instead. These options are available through the params
93 attribute for the InfoExtractors to use. The YoutubeDL also
94 registers itself as the downloader in charge for the InfoExtractors
95 that are added to it, so this is a "mutual registration".
99 username: Username for authentication purposes.
100 password: Password for authentication purposes.
101 videopassword: Password for acces a video.
102 usenetrc: Use netrc for authentication instead.
103 verbose: Print additional info to stdout.
104 quiet: Do not print messages to stdout.
105 no_warnings: Do not print out anything for warnings.
106 forceurl: Force printing final URL.
107 forcetitle: Force printing title.
108 forceid: Force printing ID.
109 forcethumbnail: Force printing thumbnail URL.
110 forcedescription: Force printing description.
111 forcefilename: Force printing final filename.
112 forceduration: Force printing duration.
113 forcejson: Force printing info_dict as JSON.
114 dump_single_json: Force printing the info_dict of the whole playlist
115 (or video) as a single JSON line.
116 simulate: Do not download the video files.
117 format: Video format code.
118 format_limit: Highest quality format to try.
119 outtmpl: Template for output names.
120 restrictfilenames: Do not allow "&" and spaces in file names
121 ignoreerrors: Do not stop on download errors.
122 nooverwrites: Prevent overwriting files.
123 playliststart: Playlist item to start at.
124 playlistend: Playlist item to end at.
125 matchtitle: Download only matching titles.
126 rejecttitle: Reject downloads for matching titles.
127 logger: Log messages to a logging.Logger instance.
128 logtostderr: Log messages to stderr instead of stdout.
129 writedescription: Write the video description to a .description file
130 writeinfojson: Write the video description to a .info.json file
131 writeannotations: Write the video annotations to a .annotations.xml file
132 writethumbnail: Write the thumbnail image to a file
133 writesubtitles: Write the video subtitles to a file
134 writeautomaticsub: Write the automatic subtitles to a file
135 allsubtitles: Downloads all the subtitles of the video
136 (requires writesubtitles or writeautomaticsub)
137 listsubtitles: Lists all available subtitles for the video
138 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
139 subtitleslangs: List of languages of the subtitles to download
140 keepvideo: Keep the video file after post-processing
141 daterange: A DateRange object, download only if the upload_date is in the range.
142 skip_download: Skip the actual download of the video file
143 cachedir: Location of the cache files in the filesystem.
144 False to disable filesystem cache.
145 noplaylist: Download single video instead of a playlist if in doubt.
146 age_limit: An integer representing the user's age in years.
147 Unsuitable videos for the given age are skipped.
148 min_views: An integer representing the minimum view count the video
149 must have in order to not be skipped.
150 Videos without view count information are always
151 downloaded. None for no limit.
152 max_views: An integer representing the maximum view count.
153 Videos that are more popular than that are not
155 Videos without view count information are always
156 downloaded. None for no limit.
157 download_archive: File name of a file where all downloads are recorded.
158 Videos already present in the file are not downloaded
160 cookiefile: File name where cookies should be read from and dumped to.
161 nocheckcertificate:Do not verify SSL certificates
162 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
163 At the moment, this is only supported by YouTube.
164 proxy: URL of the proxy server to use
165 socket_timeout: Time to wait for unresponsive hosts, in seconds
166 bidi_workaround: Work around buggy terminals without bidirectional text
167 support, using fridibi
168 debug_printtraffic:Print out sent and received HTTP traffic
169 include_ads: Download ads as well
170 default_search: Prepend this string if an input url is not valid.
171 'auto' for elaborate guessing
172 encoding: Use this encoding instead of the system-specified.
173 extract_flat: Do not resolve URLs, return the immediate result.
174 Pass in 'in_playlist' to only show this behavior for
177 The following parameters are not used by YoutubeDL itself, they are used by
179 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
180 noresizebuffer, retries, continuedl, noprogress, consoletitle
182 The following options are used by the post processors:
183 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
184 otherwise prefer avconv.
185 exec_cmd: Arbitrary command to run after downloading
191 _download_retcode = None
192 _num_downloads = None
195 def __init__(self, params=None, auto_init=True):
196 """Create a FileDownloader object with the given options."""
200 self._ies_instances = {}
202 self._progress_hooks = []
203 self._download_retcode = 0
204 self._num_downloads = 0
205 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
206 self._err_file = sys.stderr
208 self.cache = Cache(self)
210 if params.get('bidi_workaround', False):
213 master, slave = pty.openpty()
214 width = get_term_width()
218 width_args = ['-w', str(width)]
220 stdin=subprocess.PIPE,
222 stderr=self._err_file)
224 self._output_process = subprocess.Popen(
225 ['bidiv'] + width_args, **sp_kwargs
228 self._output_process = subprocess.Popen(
229 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
230 self._output_channel = os.fdopen(master, 'rb')
231 except OSError as ose:
233 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
237 if (sys.version_info >= (3,) and sys.platform != 'win32' and
238 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
239 and not params.get('restrictfilenames', False)):
240 # On Python 3, the Unicode filesystem API will throw errors (#1474)
242 'Assuming --restrict-filenames since file system encoding '
243 'cannot encode all characters. '
244 'Set the LC_ALL environment variable to fix this.')
245 self.params['restrictfilenames'] = True
247 if '%(stitle)s' in self.params.get('outtmpl', ''):
248 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
253 self.print_debug_header()
254 self.add_default_info_extractors()
256 def add_info_extractor(self, ie):
257 """Add an InfoExtractor object to the end of the list."""
259 self._ies_instances[ie.ie_key()] = ie
260 ie.set_downloader(self)
262 def get_info_extractor(self, ie_key):
264 Get an instance of an IE with name ie_key, it will try to get one from
265 the _ies list, if there's no instance it will create a new one and add
266 it to the extractor list.
268 ie = self._ies_instances.get(ie_key)
270 ie = get_info_extractor(ie_key)()
271 self.add_info_extractor(ie)
274 def add_default_info_extractors(self):
276 Add the InfoExtractors returned by gen_extractors to the end of the list
278 for ie in gen_extractors():
279 self.add_info_extractor(ie)
281 def add_post_processor(self, pp):
282 """Add a PostProcessor object to the end of the chain."""
284 pp.set_downloader(self)
286 def add_progress_hook(self, ph):
287 """Add the progress hook (currently only for the file downloader)"""
288 self._progress_hooks.append(ph)
290 def _bidi_workaround(self, message):
291 if not hasattr(self, '_output_channel'):
294 assert hasattr(self, '_output_process')
295 assert isinstance(message, compat_str)
296 line_count = message.count('\n') + 1
297 self._output_process.stdin.write((message + '\n').encode('utf-8'))
298 self._output_process.stdin.flush()
299 res = ''.join(self._output_channel.readline().decode('utf-8')
300 for _ in range(line_count))
301 return res[:-len('\n')]
303 def to_screen(self, message, skip_eol=False):
304 """Print message to stdout if not in quiet mode."""
305 return self.to_stdout(message, skip_eol, check_quiet=True)
307 def _write_string(self, s, out=None):
308 write_string(s, out=out, encoding=self.params.get('encoding'))
310 def to_stdout(self, message, skip_eol=False, check_quiet=False):
311 """Print message to stdout if not in quiet mode."""
312 if self.params.get('logger'):
313 self.params['logger'].debug(message)
314 elif not check_quiet or not self.params.get('quiet', False):
315 message = self._bidi_workaround(message)
316 terminator = ['\n', ''][skip_eol]
317 output = message + terminator
319 self._write_string(output, self._screen_file)
321 def to_stderr(self, message):
322 """Print message to stderr."""
323 assert isinstance(message, compat_str)
324 if self.params.get('logger'):
325 self.params['logger'].error(message)
327 message = self._bidi_workaround(message)
328 output = message + '\n'
329 self._write_string(output, self._err_file)
331 def to_console_title(self, message):
332 if not self.params.get('consoletitle', False):
334 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
335 # c_wchar_p() might not be necessary if `message` is
336 # already of type unicode()
337 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
338 elif 'TERM' in os.environ:
339 self._write_string('\033]0;%s\007' % message, self._screen_file)
341 def save_console_title(self):
342 if not self.params.get('consoletitle', False):
344 if 'TERM' in os.environ:
345 # Save the title on stack
346 self._write_string('\033[22;0t', self._screen_file)
348 def restore_console_title(self):
349 if not self.params.get('consoletitle', False):
351 if 'TERM' in os.environ:
352 # Restore the title from stack
353 self._write_string('\033[23;0t', self._screen_file)
356 self.save_console_title()
359 def __exit__(self, *args):
360 self.restore_console_title()
362 if self.params.get('cookiefile') is not None:
363 self.cookiejar.save()
365 def trouble(self, message=None, tb=None):
366 """Determine action to take when a download problem appears.
368 Depending on if the downloader has been configured to ignore
369 download errors or not, this method may throw an exception or
370 not when errors are found, after printing the message.
372 tb, if given, is additional traceback information.
374 if message is not None:
375 self.to_stderr(message)
376 if self.params.get('verbose'):
378 if sys.exc_info()[0]: # if .trouble has been called from an except block
380 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
381 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
382 tb += compat_str(traceback.format_exc())
384 tb_data = traceback.format_list(traceback.extract_stack())
385 tb = ''.join(tb_data)
387 if not self.params.get('ignoreerrors', False):
388 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
389 exc_info = sys.exc_info()[1].exc_info
391 exc_info = sys.exc_info()
392 raise DownloadError(message, exc_info)
393 self._download_retcode = 1
395 def report_warning(self, message):
397 Print the message to stderr, it will be prefixed with 'WARNING:'
398 If stderr is a tty file the 'WARNING:' will be colored
400 if self.params.get('logger') is not None:
401 self.params['logger'].warning(message)
403 if self.params.get('no_warnings'):
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;33mWARNING:\033[0m'
408 _msg_header = 'WARNING:'
409 warning_message = '%s %s' % (_msg_header, message)
410 self.to_stderr(warning_message)
412 def report_error(self, message, tb=None):
414 Do the same as trouble, but prefixes the message with 'ERROR:', colored
415 in red if stderr is a tty file.
417 if self._err_file.isatty() and os.name != 'nt':
418 _msg_header = '\033[0;31mERROR:\033[0m'
420 _msg_header = 'ERROR:'
421 error_message = '%s %s' % (_msg_header, message)
422 self.trouble(error_message, tb)
424 def report_file_already_downloaded(self, file_name):
425 """Report file has already been fully downloaded."""
427 self.to_screen('[download] %s has already been downloaded' % file_name)
428 except UnicodeEncodeError:
429 self.to_screen('[download] The file has already been downloaded')
431 def prepare_filename(self, info_dict):
432 """Generate the output filename."""
434 template_dict = dict(info_dict)
436 template_dict['epoch'] = int(time.time())
437 autonumber_size = self.params.get('autonumber_size')
438 if autonumber_size is None:
440 autonumber_templ = '%0' + str(autonumber_size) + 'd'
441 template_dict['autonumber'] = autonumber_templ % self._num_downloads
442 if template_dict.get('playlist_index') is not None:
443 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
444 if template_dict.get('resolution') is None:
445 if template_dict.get('width') and template_dict.get('height'):
446 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
447 elif template_dict.get('height'):
448 template_dict['resolution'] = '%sp' % template_dict['height']
449 elif template_dict.get('width'):
450 template_dict['resolution'] = '?x%d' % template_dict['width']
452 sanitize = lambda k, v: sanitize_filename(
454 restricted=self.params.get('restrictfilenames'),
456 template_dict = dict((k, sanitize(k, v))
457 for k, v in template_dict.items()
459 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
461 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
462 tmpl = compat_expanduser(outtmpl)
463 filename = tmpl % template_dict
465 except ValueError as err:
466 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
469 def _match_entry(self, info_dict):
470 """ Returns None iff the file should be downloaded """
472 video_title = info_dict.get('title', info_dict.get('id', 'video'))
473 if 'title' in info_dict:
474 # This can happen when we're just evaluating the playlist
475 title = info_dict['title']
476 matchtitle = self.params.get('matchtitle', False)
478 if not re.search(matchtitle, title, re.IGNORECASE):
479 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
480 rejecttitle = self.params.get('rejecttitle', False)
482 if re.search(rejecttitle, title, re.IGNORECASE):
483 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
484 date = info_dict.get('upload_date', None)
486 dateRange = self.params.get('daterange', DateRange())
487 if date not in dateRange:
488 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
489 view_count = info_dict.get('view_count', None)
490 if view_count is not None:
491 min_views = self.params.get('min_views')
492 if min_views is not None and view_count < min_views:
493 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
494 max_views = self.params.get('max_views')
495 if max_views is not None and view_count > max_views:
496 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
497 age_limit = self.params.get('age_limit')
498 if age_limit is not None:
499 actual_age_limit = info_dict.get('age_limit')
500 if actual_age_limit is None:
502 if age_limit < actual_age_limit:
503 return 'Skipping "' + title + '" because it is age restricted'
504 if self.in_download_archive(info_dict):
505 return '%s has already been recorded in archive' % video_title
509 def add_extra_info(info_dict, extra_info):
510 '''Set the keys from extra_info in info dict if they are missing'''
511 for key, value in extra_info.items():
512 info_dict.setdefault(key, value)
514 def extract_info(self, url, download=True, ie_key=None, extra_info={},
517 Returns a list with a dictionary for each video we find.
518 If 'download', also downloads the videos.
519 extra_info is a dict containing the extra values to add to each result
523 ies = [self.get_info_extractor(ie_key)]
528 if not ie.suitable(url):
532 self.report_warning('The program functionality for this site has been marked as broken, '
533 'and will probably not work.')
536 ie_result = ie.extract(url)
537 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
539 if isinstance(ie_result, list):
540 # Backwards compatibility: old IE result format
542 '_type': 'compat_list',
543 'entries': ie_result,
545 self.add_default_extra_info(ie_result, ie, url)
547 return self.process_ie_result(ie_result, download, extra_info)
550 except ExtractorError as de: # An error we somewhat expected
551 self.report_error(compat_str(de), de.format_traceback())
553 except MaxDownloadsReached:
555 except Exception as e:
556 if self.params.get('ignoreerrors', False):
557 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
562 self.report_error('no suitable InfoExtractor for URL %s' % url)
564 def add_default_extra_info(self, ie_result, ie, url):
565 self.add_extra_info(ie_result, {
566 'extractor': ie.IE_NAME,
568 'webpage_url_basename': url_basename(url),
569 'extractor_key': ie.ie_key(),
572 def process_ie_result(self, ie_result, download=True, extra_info={}):
574 Take the result of the ie(may be modified) and resolve all unresolved
575 references (URLs, playlist items).
577 It will also download the videos if 'download'.
578 Returns the resolved ie_result.
581 result_type = ie_result.get('_type', 'video')
583 if result_type in ('url', 'url_transparent'):
584 extract_flat = self.params.get('extract_flat', False)
585 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
586 extract_flat is True):
587 if self.params.get('forcejson', False):
588 self.to_stdout(json.dumps(ie_result))
591 if result_type == 'video':
592 self.add_extra_info(ie_result, extra_info)
593 return self.process_video_result(ie_result, download=download)
594 elif result_type == 'url':
595 # We have to add extra_info to the results because it may be
596 # contained in a playlist
597 return self.extract_info(ie_result['url'],
599 ie_key=ie_result.get('ie_key'),
600 extra_info=extra_info)
601 elif result_type == 'url_transparent':
602 # Use the information from the embedding page
603 info = self.extract_info(
604 ie_result['url'], ie_key=ie_result.get('ie_key'),
605 extra_info=extra_info, download=False, process=False)
607 def make_result(embedded_info):
608 new_result = ie_result.copy()
609 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
610 'entries', 'ie_key', 'duration',
611 'subtitles', 'annotations', 'format',
612 'thumbnail', 'thumbnails'):
615 if f in embedded_info:
616 new_result[f] = embedded_info[f]
618 new_result = make_result(info)
620 assert new_result.get('_type') != 'url_transparent'
621 if new_result.get('_type') == 'compat_list':
622 new_result['entries'] = [
623 make_result(e) for e in new_result['entries']]
625 return self.process_ie_result(
626 new_result, download=download, extra_info=extra_info)
627 elif result_type == 'playlist':
628 # We process each entry in the playlist
629 playlist = ie_result.get('title', None) or ie_result.get('id', None)
630 self.to_screen('[download] Downloading playlist: %s' % playlist)
632 playlist_results = []
634 playliststart = self.params.get('playliststart', 1) - 1
635 playlistend = self.params.get('playlistend', None)
636 # For backwards compatibility, interpret -1 as whole list
637 if playlistend == -1:
640 if isinstance(ie_result['entries'], list):
641 n_all_entries = len(ie_result['entries'])
642 entries = ie_result['entries'][playliststart:playlistend]
643 n_entries = len(entries)
645 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
646 (ie_result['extractor'], playlist, n_all_entries, n_entries))
648 assert isinstance(ie_result['entries'], PagedList)
649 entries = ie_result['entries'].getslice(
650 playliststart, playlistend)
651 n_entries = len(entries)
653 "[%s] playlist %s: Downloading %d videos" %
654 (ie_result['extractor'], playlist, n_entries))
656 for i, entry in enumerate(entries, 1):
657 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
659 'n_entries': n_entries,
660 'playlist': playlist,
661 'playlist_id': ie_result.get('id'),
662 'playlist_title': ie_result.get('title'),
663 'playlist_index': i + playliststart,
664 'extractor': ie_result['extractor'],
665 'webpage_url': ie_result['webpage_url'],
666 'webpage_url_basename': url_basename(ie_result['webpage_url']),
667 'extractor_key': ie_result['extractor_key'],
670 reason = self._match_entry(entry)
671 if reason is not None:
672 self.to_screen('[download] ' + reason)
675 entry_result = self.process_ie_result(entry,
678 playlist_results.append(entry_result)
679 ie_result['entries'] = playlist_results
681 elif result_type == 'compat_list':
683 self.add_extra_info(r,
685 'extractor': ie_result['extractor'],
686 'webpage_url': ie_result['webpage_url'],
687 'webpage_url_basename': url_basename(ie_result['webpage_url']),
688 'extractor_key': ie_result['extractor_key'],
691 ie_result['entries'] = [
692 self.process_ie_result(_fixup(r), download, extra_info)
693 for r in ie_result['entries']
697 raise Exception('Invalid result type: %s' % result_type)
699 def select_format(self, format_spec, available_formats):
700 if format_spec == 'best' or format_spec is None:
701 return available_formats[-1]
702 elif format_spec == 'worst':
703 return available_formats[0]
704 elif format_spec == 'bestaudio':
706 f for f in available_formats
707 if f.get('vcodec') == 'none']
709 return audio_formats[-1]
710 elif format_spec == 'worstaudio':
712 f for f in available_formats
713 if f.get('vcodec') == 'none']
715 return audio_formats[0]
716 elif format_spec == 'bestvideo':
718 f for f in available_formats
719 if f.get('acodec') == 'none']
721 return video_formats[-1]
722 elif format_spec == 'worstvideo':
724 f for f in available_formats
725 if f.get('acodec') == 'none']
727 return video_formats[0]
729 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
730 if format_spec in extensions:
731 filter_f = lambda f: f['ext'] == format_spec
733 filter_f = lambda f: f['format_id'] == format_spec
734 matches = list(filter(filter_f, available_formats))
739 def process_video_result(self, info_dict, download=True):
740 assert info_dict.get('_type', 'video') == 'video'
742 if 'id' not in info_dict:
743 raise ExtractorError('Missing "id" field in extractor result')
744 if 'title' not in info_dict:
745 raise ExtractorError('Missing "title" field in extractor result')
747 if 'playlist' not in info_dict:
748 # It isn't part of a playlist
749 info_dict['playlist'] = None
750 info_dict['playlist_index'] = None
752 thumbnails = info_dict.get('thumbnails')
754 thumbnails.sort(key=lambda t: (
755 t.get('width'), t.get('height'), t.get('url')))
757 if 'width' in t and 'height' in t:
758 t['resolution'] = '%dx%d' % (t['width'], t['height'])
760 if thumbnails and 'thumbnail' not in info_dict:
761 info_dict['thumbnail'] = thumbnails[-1]['url']
763 if 'display_id' not in info_dict and 'id' in info_dict:
764 info_dict['display_id'] = info_dict['id']
766 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
767 upload_date = datetime.datetime.utcfromtimestamp(
768 info_dict['timestamp'])
769 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
771 # This extractors handle format selection themselves
772 if info_dict['extractor'] in ['Youku']:
774 self.process_info(info_dict)
777 # We now pick which formats have to be downloaded
778 if info_dict.get('formats') is None:
779 # There's only one format available
780 formats = [info_dict]
782 formats = info_dict['formats']
785 raise ExtractorError('No video formats found!')
787 # We check that all the formats have the format and format_id fields
788 for i, format in enumerate(formats):
789 if 'url' not in format:
790 raise ExtractorError('Missing "url" key in result (index %d)' % i)
792 if format.get('format_id') is None:
793 format['format_id'] = compat_str(i)
794 if format.get('format') is None:
795 format['format'] = '{id} - {res}{note}'.format(
796 id=format['format_id'],
797 res=self.format_resolution(format),
798 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
800 # Automatically determine file extension if missing
801 if 'ext' not in format:
802 format['ext'] = determine_ext(format['url']).lower()
804 format_limit = self.params.get('format_limit', None)
806 formats = list(takewhile_inclusive(
807 lambda f: f['format_id'] != format_limit, formats
810 # TODO Central sorting goes here
812 if formats[0] is not info_dict:
813 # only set the 'formats' fields if the original info_dict list them
814 # otherwise we end up with a circular reference, the first (and unique)
815 # element in the 'formats' field in info_dict is info_dict itself,
816 # wich can't be exported to json
817 info_dict['formats'] = formats
818 if self.params.get('listformats', None):
819 self.list_formats(info_dict)
822 req_format = self.params.get('format')
823 if req_format is None:
825 formats_to_download = []
826 # The -1 is for supporting YoutubeIE
827 if req_format in ('-1', 'all'):
828 formats_to_download = formats
830 for rfstr in req_format.split(','):
831 # We can accept formats requested in the format: 34/5/best, we pick
832 # the first that is available, starting from left
833 req_formats = rfstr.split('/')
834 for rf in req_formats:
835 if re.match(r'.+?\+.+?', rf) is not None:
836 # Two formats have been requested like '137+139'
837 format_1, format_2 = rf.split('+')
838 formats_info = (self.select_format(format_1, formats),
839 self.select_format(format_2, formats))
840 if all(formats_info):
841 # The first format must contain the video and the
843 if formats_info[0].get('vcodec') == 'none':
844 self.report_error('The first format must '
845 'contain the video, try using '
846 '"-f %s+%s"' % (format_2, format_1))
849 'requested_formats': formats_info,
851 'ext': formats_info[0]['ext'],
854 selected_format = None
856 selected_format = self.select_format(rf, formats)
857 if selected_format is not None:
858 formats_to_download.append(selected_format)
860 if not formats_to_download:
861 raise ExtractorError('requested format not available',
865 if len(formats_to_download) > 1:
866 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
867 for format in formats_to_download:
868 new_info = dict(info_dict)
869 new_info.update(format)
870 self.process_info(new_info)
871 # We update the info dict with the best quality format (backwards compatibility)
872 info_dict.update(formats_to_download[-1])
875 def process_info(self, info_dict):
876 """Process a single resolved IE result."""
878 assert info_dict.get('_type', 'video') == 'video'
880 max_downloads = self.params.get('max_downloads')
881 if max_downloads is not None:
882 if self._num_downloads >= int(max_downloads):
883 raise MaxDownloadsReached()
885 info_dict['fulltitle'] = info_dict['title']
886 if len(info_dict['title']) > 200:
887 info_dict['title'] = info_dict['title'][:197] + '...'
889 # Keep for backwards compatibility
890 info_dict['stitle'] = info_dict['title']
892 if 'format' not in info_dict:
893 info_dict['format'] = info_dict['ext']
895 reason = self._match_entry(info_dict)
896 if reason is not None:
897 self.to_screen('[download] ' + reason)
900 self._num_downloads += 1
902 filename = self.prepare_filename(info_dict)
905 if self.params.get('forcetitle', False):
906 self.to_stdout(info_dict['fulltitle'])
907 if self.params.get('forceid', False):
908 self.to_stdout(info_dict['id'])
909 if self.params.get('forceurl', False):
910 # For RTMP URLs, also include the playpath
911 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
912 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
913 self.to_stdout(info_dict['thumbnail'])
914 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
915 self.to_stdout(info_dict['description'])
916 if self.params.get('forcefilename', False) and filename is not None:
917 self.to_stdout(filename)
918 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
919 self.to_stdout(formatSeconds(info_dict['duration']))
920 if self.params.get('forceformat', False):
921 self.to_stdout(info_dict['format'])
922 if self.params.get('forcejson', False):
923 info_dict['_filename'] = filename
924 self.to_stdout(json.dumps(info_dict))
925 if self.params.get('dump_single_json', False):
926 info_dict['_filename'] = filename
928 # Do nothing else if in simulate mode
929 if self.params.get('simulate', False):
936 dn = os.path.dirname(encodeFilename(filename))
937 if dn and not os.path.exists(dn):
939 except (OSError, IOError) as err:
940 self.report_error('unable to create directory ' + compat_str(err))
943 if self.params.get('writedescription', False):
944 descfn = filename + '.description'
945 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
946 self.to_screen('[info] Video description is already present')
949 self.to_screen('[info] Writing video description to: ' + descfn)
950 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
951 descfile.write(info_dict['description'])
952 except (KeyError, TypeError):
953 self.report_warning('There\'s no description to write.')
954 except (OSError, IOError):
955 self.report_error('Cannot write description file ' + descfn)
958 if self.params.get('writeannotations', False):
959 annofn = filename + '.annotations.xml'
960 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
961 self.to_screen('[info] Video annotations are already present')
964 self.to_screen('[info] Writing video annotations to: ' + annofn)
965 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
966 annofile.write(info_dict['annotations'])
967 except (KeyError, TypeError):
968 self.report_warning('There are no annotations to write.')
969 except (OSError, IOError):
970 self.report_error('Cannot write annotations file: ' + annofn)
973 subtitles_are_requested = any([self.params.get('writesubtitles', False),
974 self.params.get('writeautomaticsub')])
976 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
977 # subtitles download errors are already managed as troubles in relevant IE
978 # that way it will silently go on when used with unsupporting IE
979 subtitles = info_dict['subtitles']
980 sub_format = self.params.get('subtitlesformat', 'srt')
981 for sub_lang in subtitles.keys():
982 sub = subtitles[sub_lang]
986 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
987 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
988 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
990 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
991 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
993 except (OSError, IOError):
994 self.report_error('Cannot write subtitles file ' + sub_filename)
997 if self.params.get('writeinfojson', False):
998 infofn = os.path.splitext(filename)[0] + '.info.json'
999 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1000 self.to_screen('[info] Video description metadata is already present')
1002 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1004 write_json_file(info_dict, encodeFilename(infofn))
1005 except (OSError, IOError):
1006 self.report_error('Cannot write metadata to JSON file ' + infofn)
1009 if self.params.get('writethumbnail', False):
1010 if info_dict.get('thumbnail') is not None:
1011 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1012 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1013 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1014 self.to_screen('[%s] %s: Thumbnail is already present' %
1015 (info_dict['extractor'], info_dict['id']))
1017 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1018 (info_dict['extractor'], info_dict['id']))
1020 uf = self.urlopen(info_dict['thumbnail'])
1021 with open(thumb_filename, 'wb') as thumbf:
1022 shutil.copyfileobj(uf, thumbf)
1023 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1024 (info_dict['extractor'], info_dict['id'], thumb_filename))
1025 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1026 self.report_warning('Unable to download thumbnail "%s": %s' %
1027 (info_dict['thumbnail'], compat_str(err)))
1029 if not self.params.get('skip_download', False):
1030 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1035 fd = get_suitable_downloader(info)(self, self.params)
1036 for ph in self._progress_hooks:
1037 fd.add_progress_hook(ph)
1038 if self.params.get('verbose'):
1039 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1040 return fd.download(name, info)
1041 if info_dict.get('requested_formats') is not None:
1044 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1045 if not merger._executable:
1047 self.report_warning('You have requested multiple '
1048 'formats but ffmpeg or avconv are not installed.'
1049 ' The formats won\'t be merged')
1051 postprocessors = [merger]
1052 for f in info_dict['requested_formats']:
1053 new_info = dict(info_dict)
1055 fname = self.prepare_filename(new_info)
1056 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1057 downloaded.append(fname)
1058 partial_success = dl(fname, new_info)
1059 success = success and partial_success
1060 info_dict['__postprocessors'] = postprocessors
1061 info_dict['__files_to_merge'] = downloaded
1063 # Just a single file
1064 success = dl(filename, info_dict)
1065 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1066 self.report_error('unable to download video data: %s' % str(err))
1068 except (OSError, IOError) as err:
1069 raise UnavailableVideoError(err)
1070 except (ContentTooShortError, ) as err:
1071 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1076 self.post_process(filename, info_dict)
1077 except (PostProcessingError) as err:
1078 self.report_error('postprocessing: %s' % str(err))
1081 self.record_download_archive(info_dict)
1083 def download(self, url_list):
1084 """Download a given list of URLs."""
1085 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1086 if (len(url_list) > 1 and
1088 and self.params.get('max_downloads') != 1):
1089 raise SameFileError(outtmpl)
1091 for url in url_list:
1093 #It also downloads the videos
1094 res = self.extract_info(url)
1095 except UnavailableVideoError:
1096 self.report_error('unable to download video')
1097 except MaxDownloadsReached:
1098 self.to_screen('[info] Maximum number of downloaded files reached.')
1101 if self.params.get('dump_single_json', False):
1102 self.to_stdout(json.dumps(res))
1104 return self._download_retcode
1106 def download_with_info_file(self, info_filename):
1107 with io.open(info_filename, 'r', encoding='utf-8') as f:
1110 self.process_ie_result(info, download=True)
1111 except DownloadError:
1112 webpage_url = info.get('webpage_url')
1113 if webpage_url is not None:
1114 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1115 return self.download([webpage_url])
1118 return self._download_retcode
1120 def post_process(self, filename, ie_info):
1121 """Run all the postprocessors on the given file."""
1122 info = dict(ie_info)
1123 info['filepath'] = filename
1126 if ie_info.get('__postprocessors') is not None:
1127 pps_chain.extend(ie_info['__postprocessors'])
1128 pps_chain.extend(self._pps)
1129 for pp in pps_chain:
1131 keep_video_wish, new_info = pp.run(info)
1132 if keep_video_wish is not None:
1134 keep_video = keep_video_wish
1135 elif keep_video is None:
1136 # No clear decision yet, let IE decide
1137 keep_video = keep_video_wish
1138 except PostProcessingError as e:
1139 self.report_error(e.msg)
1140 if keep_video is False and not self.params.get('keepvideo', False):
1142 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1143 os.remove(encodeFilename(filename))
1144 except (IOError, OSError):
1145 self.report_warning('Unable to remove downloaded video file')
1147 def _make_archive_id(self, info_dict):
1148 # Future-proof against any change in case
1149 # and backwards compatibility with prior versions
1150 extractor = info_dict.get('extractor_key')
1151 if extractor is None:
1152 if 'id' in info_dict:
1153 extractor = info_dict.get('ie_key') # key in a playlist
1154 if extractor is None:
1155 return None # Incomplete video information
1156 return extractor.lower() + ' ' + info_dict['id']
1158 def in_download_archive(self, info_dict):
1159 fn = self.params.get('download_archive')
1163 vid_id = self._make_archive_id(info_dict)
1165 return False # Incomplete video information
1168 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1169 for line in archive_file:
1170 if line.strip() == vid_id:
1172 except IOError as ioe:
1173 if ioe.errno != errno.ENOENT:
1177 def record_download_archive(self, info_dict):
1178 fn = self.params.get('download_archive')
1181 vid_id = self._make_archive_id(info_dict)
1183 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1184 archive_file.write(vid_id + '\n')
1187 def format_resolution(format, default='unknown'):
1188 if format.get('vcodec') == 'none':
1190 if format.get('resolution') is not None:
1191 return format['resolution']
1192 if format.get('height') is not None:
1193 if format.get('width') is not None:
1194 res = '%sx%s' % (format['width'], format['height'])
1196 res = '%sp' % format['height']
1197 elif format.get('width') is not None:
1198 res = '?x%d' % format['width']
1203 def _format_note(self, fdict):
1205 if fdict.get('ext') in ['f4f', 'f4m']:
1206 res += '(unsupported) '
1207 if fdict.get('format_note') is not None:
1208 res += fdict['format_note'] + ' '
1209 if fdict.get('tbr') is not None:
1210 res += '%4dk ' % fdict['tbr']
1211 if fdict.get('container') is not None:
1214 res += '%s container' % fdict['container']
1215 if (fdict.get('vcodec') is not None and
1216 fdict.get('vcodec') != 'none'):
1219 res += fdict['vcodec']
1220 if fdict.get('vbr') is not None:
1222 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1224 if fdict.get('vbr') is not None:
1225 res += '%4dk' % fdict['vbr']
1226 if fdict.get('fps') is not None:
1227 res += ', %sfps' % fdict['fps']
1228 if fdict.get('acodec') is not None:
1231 if fdict['acodec'] == 'none':
1234 res += '%-5s' % fdict['acodec']
1235 elif fdict.get('abr') is not None:
1239 if fdict.get('abr') is not None:
1240 res += '@%3dk' % fdict['abr']
1241 if fdict.get('asr') is not None:
1242 res += ' (%5dHz)' % fdict['asr']
1243 if fdict.get('filesize') is not None:
1246 res += format_bytes(fdict['filesize'])
1247 elif fdict.get('filesize_approx') is not None:
1250 res += '~' + format_bytes(fdict['filesize_approx'])
1253 def list_formats(self, info_dict):
1254 def line(format, idlen=20):
1255 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1256 format['format_id'],
1258 self.format_resolution(format),
1259 self._format_note(format),
1262 formats = info_dict.get('formats', [info_dict])
1263 idlen = max(len('format code'),
1264 max(len(f['format_id']) for f in formats))
1265 formats_s = [line(f, idlen) for f in formats]
1266 if len(formats) > 1:
1267 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1268 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1270 header_line = line({
1271 'format_id': 'format code', 'ext': 'extension',
1272 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1273 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1274 (info_dict['id'], header_line, '\n'.join(formats_s)))
1276 def urlopen(self, req):
1277 """ Start an HTTP download """
1279 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1280 # always respected by websites, some tend to give out URLs with non percent-encoded
1281 # non-ASCII characters (see telemb.py, ard.py [#3412])
1282 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1283 # To work around aforementioned issue we will replace request's original URL with
1284 # percent-encoded one
1285 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1286 url = req if req_is_string else req.get_full_url()
1287 url_escaped = escape_url(url)
1289 # Substitute URL if any change after escaping
1290 if url != url_escaped:
1294 req = compat_urllib_request.Request(
1295 url_escaped, data=req.data, headers=req.headers,
1296 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1298 return self._opener.open(req, timeout=self._socket_timeout)
1300 def print_debug_header(self):
1301 if not self.params.get('verbose'):
1304 if type('') is not compat_str:
1305 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1306 self.report_warning(
1307 'Your Python is broken! Update to a newer and supported version')
1309 stdout_encoding = getattr(
1310 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1312 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1313 locale.getpreferredencoding(),
1314 sys.getfilesystemencoding(),
1316 self.get_encoding()))
1317 write_string(encoding_str, encoding=None)
1319 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1321 sp = subprocess.Popen(
1322 ['git', 'rev-parse', '--short', 'HEAD'],
1323 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1324 cwd=os.path.dirname(os.path.abspath(__file__)))
1325 out, err = sp.communicate()
1326 out = out.decode().strip()
1327 if re.match('[0-9a-f]+', out):
1328 self._write_string('[debug] Git HEAD: ' + out + '\n')
1334 self._write_string('[debug] Python version %s - %s\n' % (
1335 platform.python_version(), platform_name()))
1337 exe_versions = FFmpegPostProcessor.get_versions()
1338 exe_versions['rtmpdump'] = rtmpdump_version()
1339 exe_str = ', '.join(
1341 for exe, v in sorted(exe_versions.items())
1346 self._write_string('[debug] exe versions: %s\n' % exe_str)
1349 for handler in self._opener.handlers:
1350 if hasattr(handler, 'proxies'):
1351 proxy_map.update(handler.proxies)
1352 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1354 def _setup_opener(self):
1355 timeout_val = self.params.get('socket_timeout')
1356 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1358 opts_cookiefile = self.params.get('cookiefile')
1359 opts_proxy = self.params.get('proxy')
1361 if opts_cookiefile is None:
1362 self.cookiejar = compat_cookiejar.CookieJar()
1364 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1366 if os.access(opts_cookiefile, os.R_OK):
1367 self.cookiejar.load()
1369 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1371 if opts_proxy is not None:
1372 if opts_proxy == '':
1375 proxies = {'http': opts_proxy, 'https': opts_proxy}
1377 proxies = compat_urllib_request.getproxies()
1378 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1379 if 'http' in proxies and 'https' not in proxies:
1380 proxies['https'] = proxies['http']
1381 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1383 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1384 https_handler = make_HTTPS_handler(
1385 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1386 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1387 opener = compat_urllib_request.build_opener(
1388 https_handler, proxy_handler, cookie_processor, ydlh)
1389 # Delete the default user-agent header, which would otherwise apply in
1390 # cases where our custom HTTP handler doesn't come into play
1391 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1392 opener.addheaders = []
1393 self._opener = opener
1395 def encode(self, s):
1396 if isinstance(s, bytes):
1397 return s # Already encoded
1400 return s.encode(self.get_encoding())
1401 except UnicodeEncodeError as err:
1402 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1405 def get_encoding(self):
1406 encoding = self.params.get('encoding')
1407 if encoding is None:
1408 encoding = preferredencoding()