2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
29 compat_urllib_request,
51 UnavailableVideoError,
58 from .extractor import get_info_extractor, gen_extractors
59 from .downloader import get_suitable_downloader
60 from .postprocessor import FFmpegMergerPP
61 from .version import __version__
64 class YoutubeDL(object):
67 YoutubeDL objects are the ones responsible of downloading the
68 actual video file and writing it to disk if the user has requested
69 it, among some other tasks. In most cases there should be one per
70 program. As, given a video URL, the downloader doesn't know how to
71 extract all the needed information, task that InfoExtractors do, it
72 has to pass the URL to one of them.
74 For this, YoutubeDL objects have a method that allows
75 InfoExtractors to be registered in a given order. When it is passed
76 a URL, the YoutubeDL object handles it to the first InfoExtractor it
77 finds that reports being able to handle it. The InfoExtractor extracts
78 all the information about the video or videos the URL refers to, and
79 YoutubeDL process the extracted information, possibly using a File
80 Downloader to download the video.
82 YoutubeDL objects accept a lot of parameters. In order not to saturate
83 the object constructor with arguments, it receives a dictionary of
84 options instead. These options are available through the params
85 attribute for the InfoExtractors to use. The YoutubeDL also
86 registers itself as the downloader in charge for the InfoExtractors
87 that are added to it, so this is a "mutual registration".
91 username: Username for authentication purposes.
92 password: Password for authentication purposes.
93 videopassword: Password for acces a video.
94 usenetrc: Use netrc for authentication instead.
95 verbose: Print additional info to stdout.
96 quiet: Do not print messages to stdout.
97 no_warnings: Do not print out anything for warnings.
98 forceurl: Force printing final URL.
99 forcetitle: Force printing title.
100 forceid: Force printing ID.
101 forcethumbnail: Force printing thumbnail URL.
102 forcedescription: Force printing description.
103 forcefilename: Force printing final filename.
104 forceduration: Force printing duration.
105 forcejson: Force printing info_dict as JSON.
106 simulate: Do not download the video files.
107 format: Video format code.
108 format_limit: Highest quality format to try.
109 outtmpl: Template for output names.
110 restrictfilenames: Do not allow "&" and spaces in file names
111 ignoreerrors: Do not stop on download errors.
112 nooverwrites: Prevent overwriting files.
113 playliststart: Playlist item to start at.
114 playlistend: Playlist item to end at.
115 matchtitle: Download only matching titles.
116 rejecttitle: Reject downloads for matching titles.
117 logger: Log messages to a logging.Logger instance.
118 logtostderr: Log messages to stderr instead of stdout.
119 writedescription: Write the video description to a .description file
120 writeinfojson: Write the video description to a .info.json file
121 writeannotations: Write the video annotations to a .annotations.xml file
122 writethumbnail: Write the thumbnail image to a file
123 writesubtitles: Write the video subtitles to a file
124 writeautomaticsub: Write the automatic subtitles to a file
125 allsubtitles: Downloads all the subtitles of the video
126 (requires writesubtitles or writeautomaticsub)
127 listsubtitles: Lists all available subtitles for the video
128 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
129 subtitleslangs: List of languages of the subtitles to download
130 keepvideo: Keep the video file after post-processing
131 daterange: A DateRange object, download only if the upload_date is in the range.
132 skip_download: Skip the actual download of the video file
133 cachedir: Location of the cache files in the filesystem.
134 None to disable filesystem cache.
135 noplaylist: Download single video instead of a playlist if in doubt.
136 age_limit: An integer representing the user's age in years.
137 Unsuitable videos for the given age are skipped.
138 min_views: An integer representing the minimum view count the video
139 must have in order to not be skipped.
140 Videos without view count information are always
141 downloaded. None for no limit.
142 max_views: An integer representing the maximum view count.
143 Videos that are more popular than that are not
145 Videos without view count information are always
146 downloaded. None for no limit.
147 download_archive: File name of a file where all downloads are recorded.
148 Videos already present in the file are not downloaded
150 cookiefile: File name where cookies should be read from and dumped to.
151 nocheckcertificate:Do not verify SSL certificates
152 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
153 At the moment, this is only supported by YouTube.
154 proxy: URL of the proxy server to use
155 socket_timeout: Time to wait for unresponsive hosts, in seconds
156 bidi_workaround: Work around buggy terminals without bidirectional text
157 support, using fridibi
158 debug_printtraffic:Print out sent and received HTTP traffic
159 include_ads: Download ads as well
160 default_search: Prepend this string if an input url is not valid.
161 'auto' for elaborate guessing
163 The following parameters are not used by YoutubeDL itself, they are used by
165 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
166 noresizebuffer, retries, continuedl, noprogress, consoletitle
168 The following options are used by the post processors:
169 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
170 otherwise prefer avconv.
176 _download_retcode = None
177 _num_downloads = None
180 def __init__(self, params=None):
181 """Create a FileDownloader object with the given options."""
185 self._ies_instances = {}
187 self._progress_hooks = []
188 self._download_retcode = 0
189 self._num_downloads = 0
190 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
191 self._err_file = sys.stderr
194 if params.get('bidi_workaround', False):
197 master, slave = pty.openpty()
198 width = get_term_width()
202 width_args = ['-w', str(width)]
204 stdin=subprocess.PIPE,
206 stderr=self._err_file)
208 self._output_process = subprocess.Popen(
209 ['bidiv'] + width_args, **sp_kwargs
212 self._output_process = subprocess.Popen(
213 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
214 self._output_channel = os.fdopen(master, 'rb')
215 except OSError as ose:
217 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
221 if (sys.version_info >= (3,) and sys.platform != 'win32' and
222 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
223 and not params['restrictfilenames']):
224 # On Python 3, the Unicode filesystem API will throw errors (#1474)
226 'Assuming --restrict-filenames since file system encoding '
227 'cannot encode all charactes. '
228 'Set the LC_ALL environment variable to fix this.')
229 self.params['restrictfilenames'] = True
231 if '%(stitle)s' in self.params.get('outtmpl', ''):
232 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
236 def add_info_extractor(self, ie):
237 """Add an InfoExtractor object to the end of the list."""
239 self._ies_instances[ie.ie_key()] = ie
240 ie.set_downloader(self)
242 def get_info_extractor(self, ie_key):
244 Get an instance of an IE with name ie_key, it will try to get one from
245 the _ies list, if there's no instance it will create a new one and add
246 it to the extractor list.
248 ie = self._ies_instances.get(ie_key)
250 ie = get_info_extractor(ie_key)()
251 self.add_info_extractor(ie)
254 def add_default_info_extractors(self):
256 Add the InfoExtractors returned by gen_extractors to the end of the list
258 for ie in gen_extractors():
259 self.add_info_extractor(ie)
261 def add_post_processor(self, pp):
262 """Add a PostProcessor object to the end of the chain."""
264 pp.set_downloader(self)
266 def add_progress_hook(self, ph):
267 """Add the progress hook (currently only for the file downloader)"""
268 self._progress_hooks.append(ph)
270 def _bidi_workaround(self, message):
271 if not hasattr(self, '_output_channel'):
274 assert hasattr(self, '_output_process')
275 assert type(message) == type('')
276 line_count = message.count('\n') + 1
277 self._output_process.stdin.write((message + '\n').encode('utf-8'))
278 self._output_process.stdin.flush()
279 res = ''.join(self._output_channel.readline().decode('utf-8')
280 for _ in range(line_count))
281 return res[:-len('\n')]
283 def to_screen(self, message, skip_eol=False):
284 """Print message to stdout if not in quiet mode."""
285 return self.to_stdout(message, skip_eol, check_quiet=True)
287 def to_stdout(self, message, skip_eol=False, check_quiet=False):
288 """Print message to stdout if not in quiet mode."""
289 if self.params.get('logger'):
290 self.params['logger'].debug(message)
291 elif not check_quiet or not self.params.get('quiet', False):
292 message = self._bidi_workaround(message)
293 terminator = ['\n', ''][skip_eol]
294 output = message + terminator
296 write_string(output, self._screen_file)
298 def to_stderr(self, message):
299 """Print message to stderr."""
300 assert type(message) == type('')
301 if self.params.get('logger'):
302 self.params['logger'].error(message)
304 message = self._bidi_workaround(message)
305 output = message + '\n'
306 write_string(output, self._err_file)
308 def to_console_title(self, message):
309 if not self.params.get('consoletitle', False):
311 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
312 # c_wchar_p() might not be necessary if `message` is
313 # already of type unicode()
314 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
315 elif 'TERM' in os.environ:
316 write_string('\033]0;%s\007' % message, self._screen_file)
318 def save_console_title(self):
319 if not self.params.get('consoletitle', False):
321 if 'TERM' in os.environ:
322 # Save the title on stack
323 write_string('\033[22;0t', self._screen_file)
325 def restore_console_title(self):
326 if not self.params.get('consoletitle', False):
328 if 'TERM' in os.environ:
329 # Restore the title from stack
330 write_string('\033[23;0t', self._screen_file)
333 self.save_console_title()
336 def __exit__(self, *args):
337 self.restore_console_title()
339 if self.params.get('cookiefile') is not None:
340 self.cookiejar.save()
342 def trouble(self, message=None, tb=None):
343 """Determine action to take when a download problem appears.
345 Depending on if the downloader has been configured to ignore
346 download errors or not, this method may throw an exception or
347 not when errors are found, after printing the message.
349 tb, if given, is additional traceback information.
351 if message is not None:
352 self.to_stderr(message)
353 if self.params.get('verbose'):
355 if sys.exc_info()[0]: # if .trouble has been called from an except block
357 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
358 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
359 tb += compat_str(traceback.format_exc())
361 tb_data = traceback.format_list(traceback.extract_stack())
362 tb = ''.join(tb_data)
364 if not self.params.get('ignoreerrors', False):
365 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
366 exc_info = sys.exc_info()[1].exc_info
368 exc_info = sys.exc_info()
369 raise DownloadError(message, exc_info)
370 self._download_retcode = 1
372 def report_warning(self, message):
374 Print the message to stderr, it will be prefixed with 'WARNING:'
375 If stderr is a tty file the 'WARNING:' will be colored
377 if self.params.get('logger') is not None:
378 self.params['logger'].warning(message)
380 if self.params.get('no_warnings'):
382 if self._err_file.isatty() and os.name != 'nt':
383 _msg_header = '\033[0;33mWARNING:\033[0m'
385 _msg_header = 'WARNING:'
386 warning_message = '%s %s' % (_msg_header, message)
387 self.to_stderr(warning_message)
389 def report_error(self, message, tb=None):
391 Do the same as trouble, but prefixes the message with 'ERROR:', colored
392 in red if stderr is a tty file.
394 if self._err_file.isatty() and os.name != 'nt':
395 _msg_header = '\033[0;31mERROR:\033[0m'
397 _msg_header = 'ERROR:'
398 error_message = '%s %s' % (_msg_header, message)
399 self.trouble(error_message, tb)
401 def report_file_already_downloaded(self, file_name):
402 """Report file has already been fully downloaded."""
404 self.to_screen('[download] %s has already been downloaded' % file_name)
405 except UnicodeEncodeError:
406 self.to_screen('[download] The file has already been downloaded')
408 def prepare_filename(self, info_dict):
409 """Generate the output filename."""
411 template_dict = dict(info_dict)
413 template_dict['epoch'] = int(time.time())
414 autonumber_size = self.params.get('autonumber_size')
415 if autonumber_size is None:
417 autonumber_templ = '%0' + str(autonumber_size) + 'd'
418 template_dict['autonumber'] = autonumber_templ % self._num_downloads
419 if template_dict.get('playlist_index') is not None:
420 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
421 if template_dict.get('resolution') is None:
422 if template_dict.get('width') and template_dict.get('height'):
423 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
424 elif template_dict.get('height'):
425 template_dict['resolution'] = '%sp' % template_dict['height']
426 elif template_dict.get('width'):
427 template_dict['resolution'] = '?x%d' % template_dict['width']
429 sanitize = lambda k, v: sanitize_filename(
431 restricted=self.params.get('restrictfilenames'),
433 template_dict = dict((k, sanitize(k, v))
434 for k, v in template_dict.items()
436 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
438 tmpl = os.path.expanduser(self.params['outtmpl'])
439 filename = tmpl % template_dict
441 except ValueError as err:
442 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
445 def _match_entry(self, info_dict):
446 """ Returns None iff the file should be downloaded """
448 video_title = info_dict.get('title', info_dict.get('id', 'video'))
449 if 'title' in info_dict:
450 # This can happen when we're just evaluating the playlist
451 title = info_dict['title']
452 matchtitle = self.params.get('matchtitle', False)
454 if not re.search(matchtitle, title, re.IGNORECASE):
455 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
456 rejecttitle = self.params.get('rejecttitle', False)
458 if re.search(rejecttitle, title, re.IGNORECASE):
459 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
460 date = info_dict.get('upload_date', None)
462 dateRange = self.params.get('daterange', DateRange())
463 if date not in dateRange:
464 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
465 view_count = info_dict.get('view_count', None)
466 if view_count is not None:
467 min_views = self.params.get('min_views')
468 if min_views is not None and view_count < min_views:
469 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
470 max_views = self.params.get('max_views')
471 if max_views is not None and view_count > max_views:
472 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
473 age_limit = self.params.get('age_limit')
474 if age_limit is not None:
475 if age_limit < info_dict.get('age_limit', 0):
476 return 'Skipping "' + title + '" because it is age restricted'
477 if self.in_download_archive(info_dict):
478 return '%s has already been recorded in archive' % video_title
482 def add_extra_info(info_dict, extra_info):
483 '''Set the keys from extra_info in info dict if they are missing'''
484 for key, value in extra_info.items():
485 info_dict.setdefault(key, value)
487 def extract_info(self, url, download=True, ie_key=None, extra_info={},
490 Returns a list with a dictionary for each video we find.
491 If 'download', also downloads the videos.
492 extra_info is a dict containing the extra values to add to each result
496 ies = [self.get_info_extractor(ie_key)]
501 if not ie.suitable(url):
505 self.report_warning('The program functionality for this site has been marked as broken, '
506 'and will probably not work.')
509 ie_result = ie.extract(url)
510 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
512 if isinstance(ie_result, list):
513 # Backwards compatibility: old IE result format
515 '_type': 'compat_list',
516 'entries': ie_result,
518 self.add_default_extra_info(ie_result, ie, url)
520 return self.process_ie_result(ie_result, download, extra_info)
523 except ExtractorError as de: # An error we somewhat expected
524 self.report_error(compat_str(de), de.format_traceback())
526 except MaxDownloadsReached:
528 except Exception as e:
529 if self.params.get('ignoreerrors', False):
530 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
535 self.report_error('no suitable InfoExtractor for URL %s' % url)
537 def add_default_extra_info(self, ie_result, ie, url):
538 self.add_extra_info(ie_result, {
539 'extractor': ie.IE_NAME,
541 'webpage_url_basename': url_basename(url),
542 'extractor_key': ie.ie_key(),
545 def process_ie_result(self, ie_result, download=True, extra_info={}):
547 Take the result of the ie(may be modified) and resolve all unresolved
548 references (URLs, playlist items).
550 It will also download the videos if 'download'.
551 Returns the resolved ie_result.
554 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
555 if result_type == 'video':
556 self.add_extra_info(ie_result, extra_info)
557 return self.process_video_result(ie_result, download=download)
558 elif result_type == 'url':
559 # We have to add extra_info to the results because it may be
560 # contained in a playlist
561 return self.extract_info(ie_result['url'],
563 ie_key=ie_result.get('ie_key'),
564 extra_info=extra_info)
565 elif result_type == 'url_transparent':
566 # Use the information from the embedding page
567 info = self.extract_info(
568 ie_result['url'], ie_key=ie_result.get('ie_key'),
569 extra_info=extra_info, download=False, process=False)
571 def make_result(embedded_info):
572 new_result = ie_result.copy()
573 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
574 'entries', 'ie_key', 'duration',
575 'subtitles', 'annotations', 'format',
576 'thumbnail', 'thumbnails'):
579 if f in embedded_info:
580 new_result[f] = embedded_info[f]
582 new_result = make_result(info)
584 assert new_result.get('_type') != 'url_transparent'
585 if new_result.get('_type') == 'compat_list':
586 new_result['entries'] = [
587 make_result(e) for e in new_result['entries']]
589 return self.process_ie_result(
590 new_result, download=download, extra_info=extra_info)
591 elif result_type == 'playlist':
592 # We process each entry in the playlist
593 playlist = ie_result.get('title', None) or ie_result.get('id', None)
594 self.to_screen('[download] Downloading playlist: %s' % playlist)
596 playlist_results = []
598 playliststart = self.params.get('playliststart', 1) - 1
599 playlistend = self.params.get('playlistend', None)
600 # For backwards compatibility, interpret -1 as whole list
601 if playlistend == -1:
604 if isinstance(ie_result['entries'], list):
605 n_all_entries = len(ie_result['entries'])
606 entries = ie_result['entries'][playliststart:playlistend]
607 n_entries = len(entries)
609 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
610 (ie_result['extractor'], playlist, n_all_entries, n_entries))
612 assert isinstance(ie_result['entries'], PagedList)
613 entries = ie_result['entries'].getslice(
614 playliststart, playlistend)
615 n_entries = len(entries)
617 "[%s] playlist %s: Downloading %d videos" %
618 (ie_result['extractor'], playlist, n_entries))
620 for i, entry in enumerate(entries, 1):
621 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
623 'playlist': playlist,
624 'playlist_index': i + playliststart,
625 'extractor': ie_result['extractor'],
626 'webpage_url': ie_result['webpage_url'],
627 'webpage_url_basename': url_basename(ie_result['webpage_url']),
628 'extractor_key': ie_result['extractor_key'],
631 reason = self._match_entry(entry)
632 if reason is not None:
633 self.to_screen('[download] ' + reason)
636 entry_result = self.process_ie_result(entry,
639 playlist_results.append(entry_result)
640 ie_result['entries'] = playlist_results
642 elif result_type == 'compat_list':
644 self.add_extra_info(r,
646 'extractor': ie_result['extractor'],
647 'webpage_url': ie_result['webpage_url'],
648 'webpage_url_basename': url_basename(ie_result['webpage_url']),
649 'extractor_key': ie_result['extractor_key'],
652 ie_result['entries'] = [
653 self.process_ie_result(_fixup(r), download, extra_info)
654 for r in ie_result['entries']
658 raise Exception('Invalid result type: %s' % result_type)
660 def select_format(self, format_spec, available_formats):
661 if format_spec == 'best' or format_spec is None:
662 return available_formats[-1]
663 elif format_spec == 'worst':
664 return available_formats[0]
665 elif format_spec == 'bestaudio':
667 f for f in available_formats
668 if f.get('vcodec') == 'none']
670 return audio_formats[-1]
671 elif format_spec == 'worstaudio':
673 f for f in available_formats
674 if f.get('vcodec') == 'none']
676 return audio_formats[0]
677 elif format_spec == 'bestvideo':
679 f for f in available_formats
680 if f.get('acodec') == 'none']
682 return video_formats[-1]
683 elif format_spec == 'worstvideo':
685 f for f in available_formats
686 if f.get('acodec') == 'none']
688 return video_formats[0]
690 extensions = ['mp4', 'flv', 'webm', '3gp']
691 if format_spec in extensions:
692 filter_f = lambda f: f['ext'] == format_spec
694 filter_f = lambda f: f['format_id'] == format_spec
695 matches = list(filter(filter_f, available_formats))
700 def process_video_result(self, info_dict, download=True):
701 assert info_dict.get('_type', 'video') == 'video'
703 if 'playlist' not in info_dict:
704 # It isn't part of a playlist
705 info_dict['playlist'] = None
706 info_dict['playlist_index'] = None
708 if 'display_id' not in info_dict and 'id' in info_dict:
709 info_dict['display_id'] = info_dict['id']
711 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
712 upload_date = datetime.datetime.utcfromtimestamp(
713 info_dict['timestamp'])
714 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
716 # This extractors handle format selection themselves
717 if info_dict['extractor'] in ['Youku']:
719 self.process_info(info_dict)
722 # We now pick which formats have to be downloaded
723 if info_dict.get('formats') is None:
724 # There's only one format available
725 formats = [info_dict]
727 formats = info_dict['formats']
730 raise ExtractorError('No video formats found!')
732 # We check that all the formats have the format and format_id fields
733 for i, format in enumerate(formats):
734 if format.get('format_id') is None:
735 format['format_id'] = compat_str(i)
736 if format.get('format') is None:
737 format['format'] = '{id} - {res}{note}'.format(
738 id=format['format_id'],
739 res=self.format_resolution(format),
740 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
742 # Automatically determine file extension if missing
743 if 'ext' not in format:
744 format['ext'] = determine_ext(format['url'])
746 format_limit = self.params.get('format_limit', None)
748 formats = list(takewhile_inclusive(
749 lambda f: f['format_id'] != format_limit, formats
752 # TODO Central sorting goes here
754 if formats[0] is not info_dict:
755 # only set the 'formats' fields if the original info_dict list them
756 # otherwise we end up with a circular reference, the first (and unique)
757 # element in the 'formats' field in info_dict is info_dict itself,
758 # wich can't be exported to json
759 info_dict['formats'] = formats
760 if self.params.get('listformats', None):
761 self.list_formats(info_dict)
764 req_format = self.params.get('format')
765 if req_format is None:
767 formats_to_download = []
768 # The -1 is for supporting YoutubeIE
769 if req_format in ('-1', 'all'):
770 formats_to_download = formats
772 # We can accept formats requested in the format: 34/5/best, we pick
773 # the first that is available, starting from left
774 req_formats = req_format.split('/')
775 for rf in req_formats:
776 if re.match(r'.+?\+.+?', rf) is not None:
777 # Two formats have been requested like '137+139'
778 format_1, format_2 = rf.split('+')
779 formats_info = (self.select_format(format_1, formats),
780 self.select_format(format_2, formats))
781 if all(formats_info):
783 'requested_formats': formats_info,
785 'ext': formats_info[0]['ext'],
788 selected_format = None
790 selected_format = self.select_format(rf, formats)
791 if selected_format is not None:
792 formats_to_download = [selected_format]
794 if not formats_to_download:
795 raise ExtractorError('requested format not available',
799 if len(formats_to_download) > 1:
800 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
801 for format in formats_to_download:
802 new_info = dict(info_dict)
803 new_info.update(format)
804 self.process_info(new_info)
805 # We update the info dict with the best quality format (backwards compatibility)
806 info_dict.update(formats_to_download[-1])
809 def process_info(self, info_dict):
810 """Process a single resolved IE result."""
812 assert info_dict.get('_type', 'video') == 'video'
814 max_downloads = self.params.get('max_downloads')
815 if max_downloads is not None:
816 if self._num_downloads >= int(max_downloads):
817 raise MaxDownloadsReached()
819 info_dict['fulltitle'] = info_dict['title']
820 if len(info_dict['title']) > 200:
821 info_dict['title'] = info_dict['title'][:197] + '...'
823 # Keep for backwards compatibility
824 info_dict['stitle'] = info_dict['title']
826 if not 'format' in info_dict:
827 info_dict['format'] = info_dict['ext']
829 reason = self._match_entry(info_dict)
830 if reason is not None:
831 self.to_screen('[download] ' + reason)
834 self._num_downloads += 1
836 filename = self.prepare_filename(info_dict)
839 if self.params.get('forcetitle', False):
840 self.to_stdout(info_dict['fulltitle'])
841 if self.params.get('forceid', False):
842 self.to_stdout(info_dict['id'])
843 if self.params.get('forceurl', False):
844 # For RTMP URLs, also include the playpath
845 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
846 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
847 self.to_stdout(info_dict['thumbnail'])
848 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
849 self.to_stdout(info_dict['description'])
850 if self.params.get('forcefilename', False) and filename is not None:
851 self.to_stdout(filename)
852 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
853 self.to_stdout(formatSeconds(info_dict['duration']))
854 if self.params.get('forceformat', False):
855 self.to_stdout(info_dict['format'])
856 if self.params.get('forcejson', False):
857 info_dict['_filename'] = filename
858 self.to_stdout(json.dumps(info_dict))
860 # Do nothing else if in simulate mode
861 if self.params.get('simulate', False):
868 dn = os.path.dirname(encodeFilename(filename))
869 if dn != '' and not os.path.exists(dn):
871 except (OSError, IOError) as err:
872 self.report_error('unable to create directory ' + compat_str(err))
875 if self.params.get('writedescription', False):
876 descfn = filename + '.description'
877 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
878 self.to_screen('[info] Video description is already present')
881 self.to_screen('[info] Writing video description to: ' + descfn)
882 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
883 descfile.write(info_dict['description'])
884 except (KeyError, TypeError):
885 self.report_warning('There\'s no description to write.')
886 except (OSError, IOError):
887 self.report_error('Cannot write description file ' + descfn)
890 if self.params.get('writeannotations', False):
891 annofn = filename + '.annotations.xml'
892 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
893 self.to_screen('[info] Video annotations are already present')
896 self.to_screen('[info] Writing video annotations to: ' + annofn)
897 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
898 annofile.write(info_dict['annotations'])
899 except (KeyError, TypeError):
900 self.report_warning('There are no annotations to write.')
901 except (OSError, IOError):
902 self.report_error('Cannot write annotations file: ' + annofn)
905 subtitles_are_requested = any([self.params.get('writesubtitles', False),
906 self.params.get('writeautomaticsub')])
908 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
909 # subtitles download errors are already managed as troubles in relevant IE
910 # that way it will silently go on when used with unsupporting IE
911 subtitles = info_dict['subtitles']
912 sub_format = self.params.get('subtitlesformat', 'srt')
913 for sub_lang in subtitles.keys():
914 sub = subtitles[sub_lang]
918 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
919 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
920 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
922 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
923 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
925 except (OSError, IOError):
926 self.report_error('Cannot write subtitles file ' + descfn)
929 if self.params.get('writeinfojson', False):
930 infofn = os.path.splitext(filename)[0] + '.info.json'
931 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
932 self.to_screen('[info] Video description metadata is already present')
934 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
936 write_json_file(info_dict, encodeFilename(infofn))
937 except (OSError, IOError):
938 self.report_error('Cannot write metadata to JSON file ' + infofn)
941 if self.params.get('writethumbnail', False):
942 if info_dict.get('thumbnail') is not None:
943 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
944 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
945 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
946 self.to_screen('[%s] %s: Thumbnail is already present' %
947 (info_dict['extractor'], info_dict['id']))
949 self.to_screen('[%s] %s: Downloading thumbnail ...' %
950 (info_dict['extractor'], info_dict['id']))
952 uf = self.urlopen(info_dict['thumbnail'])
953 with open(thumb_filename, 'wb') as thumbf:
954 shutil.copyfileobj(uf, thumbf)
955 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
956 (info_dict['extractor'], info_dict['id'], thumb_filename))
957 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
958 self.report_warning('Unable to download thumbnail "%s": %s' %
959 (info_dict['thumbnail'], compat_str(err)))
961 if not self.params.get('skip_download', False):
962 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
967 fd = get_suitable_downloader(info)(self, self.params)
968 for ph in self._progress_hooks:
969 fd.add_progress_hook(ph)
970 return fd.download(name, info)
971 if info_dict.get('requested_formats') is not None:
974 merger = FFmpegMergerPP(self)
975 if not merger._get_executable():
977 self.report_warning('You have requested multiple '
978 'formats but ffmpeg or avconv are not installed.'
979 ' The formats won\'t be merged')
981 postprocessors = [merger]
982 for f in info_dict['requested_formats']:
983 new_info = dict(info_dict)
985 fname = self.prepare_filename(new_info)
986 fname = prepend_extension(fname, 'f%s' % f['format_id'])
987 downloaded.append(fname)
988 partial_success = dl(fname, new_info)
989 success = success and partial_success
990 info_dict['__postprocessors'] = postprocessors
991 info_dict['__files_to_merge'] = downloaded
994 success = dl(filename, info_dict)
995 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
996 self.report_error('unable to download video data: %s' % str(err))
998 except (OSError, IOError) as err:
999 raise UnavailableVideoError(err)
1000 except (ContentTooShortError, ) as err:
1001 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1006 self.post_process(filename, info_dict)
1007 except (PostProcessingError) as err:
1008 self.report_error('postprocessing: %s' % str(err))
1011 self.record_download_archive(info_dict)
1013 def download(self, url_list):
1014 """Download a given list of URLs."""
1015 if (len(url_list) > 1 and
1016 '%' not in self.params['outtmpl']
1017 and self.params.get('max_downloads') != 1):
1018 raise SameFileError(self.params['outtmpl'])
1020 for url in url_list:
1022 #It also downloads the videos
1023 self.extract_info(url)
1024 except UnavailableVideoError:
1025 self.report_error('unable to download video')
1026 except MaxDownloadsReached:
1027 self.to_screen('[info] Maximum number of downloaded files reached.')
1030 return self._download_retcode
1032 def download_with_info_file(self, info_filename):
1033 with io.open(info_filename, 'r', encoding='utf-8') as f:
1036 self.process_ie_result(info, download=True)
1037 except DownloadError:
1038 webpage_url = info.get('webpage_url')
1039 if webpage_url is not None:
1040 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1041 return self.download([webpage_url])
1044 return self._download_retcode
1046 def post_process(self, filename, ie_info):
1047 """Run all the postprocessors on the given file."""
1048 info = dict(ie_info)
1049 info['filepath'] = filename
1052 if ie_info.get('__postprocessors') is not None:
1053 pps_chain.extend(ie_info['__postprocessors'])
1054 pps_chain.extend(self._pps)
1055 for pp in pps_chain:
1057 keep_video_wish, new_info = pp.run(info)
1058 if keep_video_wish is not None:
1060 keep_video = keep_video_wish
1061 elif keep_video is None:
1062 # No clear decision yet, let IE decide
1063 keep_video = keep_video_wish
1064 except PostProcessingError as e:
1065 self.report_error(e.msg)
1066 if keep_video is False and not self.params.get('keepvideo', False):
1068 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1069 os.remove(encodeFilename(filename))
1070 except (IOError, OSError):
1071 self.report_warning('Unable to remove downloaded video file')
1073 def _make_archive_id(self, info_dict):
1074 # Future-proof against any change in case
1075 # and backwards compatibility with prior versions
1076 extractor = info_dict.get('extractor_key')
1077 if extractor is None:
1078 if 'id' in info_dict:
1079 extractor = info_dict.get('ie_key') # key in a playlist
1080 if extractor is None:
1081 return None # Incomplete video information
1082 return extractor.lower() + ' ' + info_dict['id']
1084 def in_download_archive(self, info_dict):
1085 fn = self.params.get('download_archive')
1089 vid_id = self._make_archive_id(info_dict)
1091 return False # Incomplete video information
1094 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1095 for line in archive_file:
1096 if line.strip() == vid_id:
1098 except IOError as ioe:
1099 if ioe.errno != errno.ENOENT:
1103 def record_download_archive(self, info_dict):
1104 fn = self.params.get('download_archive')
1107 vid_id = self._make_archive_id(info_dict)
1109 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1110 archive_file.write(vid_id + '\n')
1113 def format_resolution(format, default='unknown'):
1114 if format.get('vcodec') == 'none':
1116 if format.get('resolution') is not None:
1117 return format['resolution']
1118 if format.get('height') is not None:
1119 if format.get('width') is not None:
1120 res = '%sx%s' % (format['width'], format['height'])
1122 res = '%sp' % format['height']
1123 elif format.get('width') is not None:
1124 res = '?x%d' % format['width']
1129 def list_formats(self, info_dict):
1130 def format_note(fdict):
1132 if fdict.get('ext') in ['f4f', 'f4m']:
1133 res += '(unsupported) '
1134 if fdict.get('format_note') is not None:
1135 res += fdict['format_note'] + ' '
1136 if fdict.get('tbr') is not None:
1137 res += '%4dk ' % fdict['tbr']
1138 if fdict.get('container') is not None:
1141 res += '%s container' % fdict['container']
1142 if (fdict.get('vcodec') is not None and
1143 fdict.get('vcodec') != 'none'):
1146 res += fdict['vcodec']
1147 if fdict.get('vbr') is not None:
1149 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1151 if fdict.get('vbr') is not None:
1152 res += '%4dk' % fdict['vbr']
1153 if fdict.get('acodec') is not None:
1156 if fdict['acodec'] == 'none':
1159 res += '%-5s' % fdict['acodec']
1160 elif fdict.get('abr') is not None:
1164 if fdict.get('abr') is not None:
1165 res += '@%3dk' % fdict['abr']
1166 if fdict.get('asr') is not None:
1167 res += ' (%5dHz)' % fdict['asr']
1168 if fdict.get('filesize') is not None:
1171 res += format_bytes(fdict['filesize'])
1174 def line(format, idlen=20):
1175 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1176 format['format_id'],
1178 self.format_resolution(format),
1179 format_note(format),
1182 formats = info_dict.get('formats', [info_dict])
1183 idlen = max(len('format code'),
1184 max(len(f['format_id']) for f in formats))
1185 formats_s = [line(f, idlen) for f in formats]
1186 if len(formats) > 1:
1187 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1188 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1190 header_line = line({
1191 'format_id': 'format code', 'ext': 'extension',
1192 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1193 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1194 (info_dict['id'], header_line, '\n'.join(formats_s)))
1196 def urlopen(self, req):
1197 """ Start an HTTP download """
1198 return self._opener.open(req, timeout=self._socket_timeout)
1200 def print_debug_header(self):
1201 if not self.params.get('verbose'):
1203 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1205 sp = subprocess.Popen(
1206 ['git', 'rev-parse', '--short', 'HEAD'],
1207 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1208 cwd=os.path.dirname(os.path.abspath(__file__)))
1209 out, err = sp.communicate()
1210 out = out.decode().strip()
1211 if re.match('[0-9a-f]+', out):
1212 write_string('[debug] Git HEAD: ' + out + '\n')
1218 write_string('[debug] Python version %s - %s' %
1219 (platform.python_version(), platform_name()) + '\n')
1222 for handler in self._opener.handlers:
1223 if hasattr(handler, 'proxies'):
1224 proxy_map.update(handler.proxies)
1225 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1227 def _setup_opener(self):
1228 timeout_val = self.params.get('socket_timeout')
1229 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1231 opts_cookiefile = self.params.get('cookiefile')
1232 opts_proxy = self.params.get('proxy')
1234 if opts_cookiefile is None:
1235 self.cookiejar = compat_cookiejar.CookieJar()
1237 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1239 if os.access(opts_cookiefile, os.R_OK):
1240 self.cookiejar.load()
1242 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1244 if opts_proxy is not None:
1245 if opts_proxy == '':
1248 proxies = {'http': opts_proxy, 'https': opts_proxy}
1250 proxies = compat_urllib_request.getproxies()
1251 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1252 if 'http' in proxies and 'https' not in proxies:
1253 proxies['https'] = proxies['http']
1254 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1256 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1257 https_handler = make_HTTPS_handler(
1258 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1259 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1260 opener = compat_urllib_request.build_opener(
1261 https_handler, proxy_handler, cookie_processor, ydlh)
1262 # Delete the default user-agent header, which would otherwise apply in
1263 # cases where our custom HTTP handler doesn't come into play
1264 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1265 opener.addheaders = []
1266 self._opener = opener