2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
29 compat_urllib_request,
51 UnavailableVideoError,
58 from .extractor import get_info_extractor, gen_extractors
59 from .downloader import get_suitable_downloader
60 from .postprocessor import FFmpegMergerPP
61 from .version import __version__
64 class YoutubeDL(object):
67 YoutubeDL objects are the ones responsible of downloading the
68 actual video file and writing it to disk if the user has requested
69 it, among some other tasks. In most cases there should be one per
70 program. As, given a video URL, the downloader doesn't know how to
71 extract all the needed information, task that InfoExtractors do, it
72 has to pass the URL to one of them.
74 For this, YoutubeDL objects have a method that allows
75 InfoExtractors to be registered in a given order. When it is passed
76 a URL, the YoutubeDL object handles it to the first InfoExtractor it
77 finds that reports being able to handle it. The InfoExtractor extracts
78 all the information about the video or videos the URL refers to, and
79 YoutubeDL process the extracted information, possibly using a File
80 Downloader to download the video.
82 YoutubeDL objects accept a lot of parameters. In order not to saturate
83 the object constructor with arguments, it receives a dictionary of
84 options instead. These options are available through the params
85 attribute for the InfoExtractors to use. The YoutubeDL also
86 registers itself as the downloader in charge for the InfoExtractors
87 that are added to it, so this is a "mutual registration".
91 username: Username for authentication purposes.
92 password: Password for authentication purposes.
93 videopassword: Password for acces a video.
94 usenetrc: Use netrc for authentication instead.
95 verbose: Print additional info to stdout.
96 quiet: Do not print messages to stdout.
97 forceurl: Force printing final URL.
98 forcetitle: Force printing title.
99 forceid: Force printing ID.
100 forcethumbnail: Force printing thumbnail URL.
101 forcedescription: Force printing description.
102 forcefilename: Force printing final filename.
103 forceduration: Force printing duration.
104 forcejson: Force printing info_dict as JSON.
105 simulate: Do not download the video files.
106 format: Video format code.
107 format_limit: Highest quality format to try.
108 outtmpl: Template for output names.
109 restrictfilenames: Do not allow "&" and spaces in file names
110 ignoreerrors: Do not stop on download errors.
111 nooverwrites: Prevent overwriting files.
112 playliststart: Playlist item to start at.
113 playlistend: Playlist item to end at.
114 matchtitle: Download only matching titles.
115 rejecttitle: Reject downloads for matching titles.
116 logger: Log messages to a logging.Logger instance.
117 logtostderr: Log messages to stderr instead of stdout.
118 writedescription: Write the video description to a .description file
119 writeinfojson: Write the video description to a .info.json file
120 writeannotations: Write the video annotations to a .annotations.xml file
121 writethumbnail: Write the thumbnail image to a file
122 writesubtitles: Write the video subtitles to a file
123 writeautomaticsub: Write the automatic subtitles to a file
124 allsubtitles: Downloads all the subtitles of the video
125 (requires writesubtitles or writeautomaticsub)
126 listsubtitles: Lists all available subtitles for the video
127 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
128 subtitleslangs: List of languages of the subtitles to download
129 keepvideo: Keep the video file after post-processing
130 daterange: A DateRange object, download only if the upload_date is in the range.
131 skip_download: Skip the actual download of the video file
132 cachedir: Location of the cache files in the filesystem.
133 None to disable filesystem cache.
134 noplaylist: Download single video instead of a playlist if in doubt.
135 age_limit: An integer representing the user's age in years.
136 Unsuitable videos for the given age are skipped.
137 min_views: An integer representing the minimum view count the video
138 must have in order to not be skipped.
139 Videos without view count information are always
140 downloaded. None for no limit.
141 max_views: An integer representing the maximum view count.
142 Videos that are more popular than that are not
144 Videos without view count information are always
145 downloaded. None for no limit.
146 download_archive: File name of a file where all downloads are recorded.
147 Videos already present in the file are not downloaded
149 cookiefile: File name where cookies should be read from and dumped to.
150 nocheckcertificate:Do not verify SSL certificates
151 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
152 At the moment, this is only supported by YouTube.
153 proxy: URL of the proxy server to use
154 socket_timeout: Time to wait for unresponsive hosts, in seconds
155 bidi_workaround: Work around buggy terminals without bidirectional text
156 support, using fridibi
157 debug_printtraffic:Print out sent and received HTTP traffic
158 include_ads: Download ads as well
159 default_search: Prepend this string if an input url is not valid.
160 'auto' for elaborate guessing
162 The following parameters are not used by YoutubeDL itself, they are used by
164 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
165 noresizebuffer, retries, continuedl, noprogress, consoletitle
167 The following options are used by the post processors:
168 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
169 otherwise prefer avconv.
175 _download_retcode = None
176 _num_downloads = None
179 def __init__(self, params=None):
180 """Create a FileDownloader object with the given options."""
184 self._ies_instances = {}
186 self._progress_hooks = []
187 self._download_retcode = 0
188 self._num_downloads = 0
189 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
190 self._err_file = sys.stderr
193 if params.get('bidi_workaround', False):
196 master, slave = pty.openpty()
197 width = get_term_width()
201 width_args = ['-w', str(width)]
203 stdin=subprocess.PIPE,
205 stderr=self._err_file)
207 self._output_process = subprocess.Popen(
208 ['bidiv'] + width_args, **sp_kwargs
211 self._output_process = subprocess.Popen(
212 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
213 self._output_channel = os.fdopen(master, 'rb')
214 except OSError as ose:
216 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
220 if (sys.version_info >= (3,) and sys.platform != 'win32' and
221 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
222 and not params['restrictfilenames']):
223 # On Python 3, the Unicode filesystem API will throw errors (#1474)
225 'Assuming --restrict-filenames since file system encoding '
226 'cannot encode all charactes. '
227 'Set the LC_ALL environment variable to fix this.')
228 self.params['restrictfilenames'] = True
230 if '%(stitle)s' in self.params.get('outtmpl', ''):
231 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
235 def add_info_extractor(self, ie):
236 """Add an InfoExtractor object to the end of the list."""
238 self._ies_instances[ie.ie_key()] = ie
239 ie.set_downloader(self)
241 def get_info_extractor(self, ie_key):
243 Get an instance of an IE with name ie_key, it will try to get one from
244 the _ies list, if there's no instance it will create a new one and add
245 it to the extractor list.
247 ie = self._ies_instances.get(ie_key)
249 ie = get_info_extractor(ie_key)()
250 self.add_info_extractor(ie)
253 def add_default_info_extractors(self):
255 Add the InfoExtractors returned by gen_extractors to the end of the list
257 for ie in gen_extractors():
258 self.add_info_extractor(ie)
260 def add_post_processor(self, pp):
261 """Add a PostProcessor object to the end of the chain."""
263 pp.set_downloader(self)
265 def add_progress_hook(self, ph):
266 """Add the progress hook (currently only for the file downloader)"""
267 self._progress_hooks.append(ph)
269 def _bidi_workaround(self, message):
270 if not hasattr(self, '_output_channel'):
273 assert hasattr(self, '_output_process')
274 assert type(message) == type('')
275 line_count = message.count('\n') + 1
276 self._output_process.stdin.write((message + '\n').encode('utf-8'))
277 self._output_process.stdin.flush()
278 res = ''.join(self._output_channel.readline().decode('utf-8')
279 for _ in range(line_count))
280 return res[:-len('\n')]
282 def to_screen(self, message, skip_eol=False):
283 """Print message to stdout if not in quiet mode."""
284 return self.to_stdout(message, skip_eol, check_quiet=True)
286 def to_stdout(self, message, skip_eol=False, check_quiet=False):
287 """Print message to stdout if not in quiet mode."""
288 if self.params.get('logger'):
289 self.params['logger'].debug(message)
290 elif not check_quiet or not self.params.get('quiet', False):
291 message = self._bidi_workaround(message)
292 terminator = ['\n', ''][skip_eol]
293 output = message + terminator
295 write_string(output, self._screen_file)
297 def to_stderr(self, message):
298 """Print message to stderr."""
299 assert type(message) == type('')
300 if self.params.get('logger'):
301 self.params['logger'].error(message)
303 message = self._bidi_workaround(message)
304 output = message + '\n'
305 write_string(output, self._err_file)
307 def to_console_title(self, message):
308 if not self.params.get('consoletitle', False):
310 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
311 # c_wchar_p() might not be necessary if `message` is
312 # already of type unicode()
313 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
314 elif 'TERM' in os.environ:
315 write_string('\033]0;%s\007' % message, self._screen_file)
317 def save_console_title(self):
318 if not self.params.get('consoletitle', False):
320 if 'TERM' in os.environ:
321 # Save the title on stack
322 write_string('\033[22;0t', self._screen_file)
324 def restore_console_title(self):
325 if not self.params.get('consoletitle', False):
327 if 'TERM' in os.environ:
328 # Restore the title from stack
329 write_string('\033[23;0t', self._screen_file)
332 self.save_console_title()
335 def __exit__(self, *args):
336 self.restore_console_title()
338 if self.params.get('cookiefile') is not None:
339 self.cookiejar.save()
341 def trouble(self, message=None, tb=None):
342 """Determine action to take when a download problem appears.
344 Depending on if the downloader has been configured to ignore
345 download errors or not, this method may throw an exception or
346 not when errors are found, after printing the message.
348 tb, if given, is additional traceback information.
350 if message is not None:
351 self.to_stderr(message)
352 if self.params.get('verbose'):
354 if sys.exc_info()[0]: # if .trouble has been called from an except block
356 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
357 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
358 tb += compat_str(traceback.format_exc())
360 tb_data = traceback.format_list(traceback.extract_stack())
361 tb = ''.join(tb_data)
363 if not self.params.get('ignoreerrors', False):
364 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
365 exc_info = sys.exc_info()[1].exc_info
367 exc_info = sys.exc_info()
368 raise DownloadError(message, exc_info)
369 self._download_retcode = 1
371 def report_warning(self, message):
373 Print the message to stderr, it will be prefixed with 'WARNING:'
374 If stderr is a tty file the 'WARNING:' will be colored
376 if self.params.get('logger') is not None:
377 self.params['logger'].warning(message)
379 if self._err_file.isatty() and os.name != 'nt':
380 _msg_header = '\033[0;33mWARNING:\033[0m'
382 _msg_header = 'WARNING:'
383 warning_message = '%s %s' % (_msg_header, message)
384 self.to_stderr(warning_message)
386 def report_error(self, message, tb=None):
388 Do the same as trouble, but prefixes the message with 'ERROR:', colored
389 in red if stderr is a tty file.
391 if self._err_file.isatty() and os.name != 'nt':
392 _msg_header = '\033[0;31mERROR:\033[0m'
394 _msg_header = 'ERROR:'
395 error_message = '%s %s' % (_msg_header, message)
396 self.trouble(error_message, tb)
398 def report_file_already_downloaded(self, file_name):
399 """Report file has already been fully downloaded."""
401 self.to_screen('[download] %s has already been downloaded' % file_name)
402 except UnicodeEncodeError:
403 self.to_screen('[download] The file has already been downloaded')
405 def prepare_filename(self, info_dict):
406 """Generate the output filename."""
408 template_dict = dict(info_dict)
410 template_dict['epoch'] = int(time.time())
411 autonumber_size = self.params.get('autonumber_size')
412 if autonumber_size is None:
414 autonumber_templ = '%0' + str(autonumber_size) + 'd'
415 template_dict['autonumber'] = autonumber_templ % self._num_downloads
416 if template_dict.get('playlist_index') is not None:
417 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
418 if template_dict.get('resolution') is None:
419 if template_dict.get('width') and template_dict.get('height'):
420 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
421 elif template_dict.get('height'):
422 template_dict['resolution'] = '%sp' % template_dict['height']
423 elif template_dict.get('width'):
424 template_dict['resolution'] = '?x%d' % template_dict['width']
426 sanitize = lambda k, v: sanitize_filename(
428 restricted=self.params.get('restrictfilenames'),
430 template_dict = dict((k, sanitize(k, v))
431 for k, v in template_dict.items()
433 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
435 tmpl = os.path.expanduser(self.params['outtmpl'])
436 filename = tmpl % template_dict
438 except ValueError as err:
439 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
442 def _match_entry(self, info_dict):
443 """ Returns None iff the file should be downloaded """
445 video_title = info_dict.get('title', info_dict.get('id', 'video'))
446 if 'title' in info_dict:
447 # This can happen when we're just evaluating the playlist
448 title = info_dict['title']
449 matchtitle = self.params.get('matchtitle', False)
451 if not re.search(matchtitle, title, re.IGNORECASE):
452 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
453 rejecttitle = self.params.get('rejecttitle', False)
455 if re.search(rejecttitle, title, re.IGNORECASE):
456 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
457 date = info_dict.get('upload_date', None)
459 dateRange = self.params.get('daterange', DateRange())
460 if date not in dateRange:
461 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
462 view_count = info_dict.get('view_count', None)
463 if view_count is not None:
464 min_views = self.params.get('min_views')
465 if min_views is not None and view_count < min_views:
466 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
467 max_views = self.params.get('max_views')
468 if max_views is not None and view_count > max_views:
469 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
470 age_limit = self.params.get('age_limit')
471 if age_limit is not None:
472 if age_limit < info_dict.get('age_limit', 0):
473 return 'Skipping "' + title + '" because it is age restricted'
474 if self.in_download_archive(info_dict):
475 return '%s has already been recorded in archive' % video_title
479 def add_extra_info(info_dict, extra_info):
480 '''Set the keys from extra_info in info dict if they are missing'''
481 for key, value in extra_info.items():
482 info_dict.setdefault(key, value)
484 def extract_info(self, url, download=True, ie_key=None, extra_info={},
487 Returns a list with a dictionary for each video we find.
488 If 'download', also downloads the videos.
489 extra_info is a dict containing the extra values to add to each result
493 ies = [self.get_info_extractor(ie_key)]
498 if not ie.suitable(url):
502 self.report_warning('The program functionality for this site has been marked as broken, '
503 'and will probably not work.')
506 ie_result = ie.extract(url)
507 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
509 if isinstance(ie_result, list):
510 # Backwards compatibility: old IE result format
512 '_type': 'compat_list',
513 'entries': ie_result,
515 self.add_default_extra_info(ie_result, ie, url)
517 return self.process_ie_result(ie_result, download, extra_info)
520 except ExtractorError as de: # An error we somewhat expected
521 self.report_error(compat_str(de), de.format_traceback())
523 except MaxDownloadsReached:
525 except Exception as e:
526 if self.params.get('ignoreerrors', False):
527 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
532 self.report_error('no suitable InfoExtractor for URL %s' % url)
534 def add_default_extra_info(self, ie_result, ie, url):
535 self.add_extra_info(ie_result, {
536 'extractor': ie.IE_NAME,
538 'webpage_url_basename': url_basename(url),
539 'extractor_key': ie.ie_key(),
542 def process_ie_result(self, ie_result, download=True, extra_info={}):
544 Take the result of the ie(may be modified) and resolve all unresolved
545 references (URLs, playlist items).
547 It will also download the videos if 'download'.
548 Returns the resolved ie_result.
551 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
552 if result_type == 'video':
553 self.add_extra_info(ie_result, extra_info)
554 return self.process_video_result(ie_result, download=download)
555 elif result_type == 'url':
556 # We have to add extra_info to the results because it may be
557 # contained in a playlist
558 return self.extract_info(ie_result['url'],
560 ie_key=ie_result.get('ie_key'),
561 extra_info=extra_info)
562 elif result_type == 'url_transparent':
563 # Use the information from the embedding page
564 info = self.extract_info(
565 ie_result['url'], ie_key=ie_result.get('ie_key'),
566 extra_info=extra_info, download=False, process=False)
568 def make_result(embedded_info):
569 new_result = ie_result.copy()
570 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
571 'entries', 'ie_key', 'duration',
572 'subtitles', 'annotations', 'format',
573 'thumbnail', 'thumbnails'):
576 if f in embedded_info:
577 new_result[f] = embedded_info[f]
579 new_result = make_result(info)
581 assert new_result.get('_type') != 'url_transparent'
582 if new_result.get('_type') == 'compat_list':
583 new_result['entries'] = [
584 make_result(e) for e in new_result['entries']]
586 return self.process_ie_result(
587 new_result, download=download, extra_info=extra_info)
588 elif result_type == 'playlist':
589 # We process each entry in the playlist
590 playlist = ie_result.get('title', None) or ie_result.get('id', None)
591 self.to_screen('[download] Downloading playlist: %s' % playlist)
593 playlist_results = []
595 playliststart = self.params.get('playliststart', 1) - 1
596 playlistend = self.params.get('playlistend', None)
597 # For backwards compatibility, interpret -1 as whole list
598 if playlistend == -1:
601 if isinstance(ie_result['entries'], list):
602 n_all_entries = len(ie_result['entries'])
603 entries = ie_result['entries'][playliststart:playlistend]
604 n_entries = len(entries)
606 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
607 (ie_result['extractor'], playlist, n_all_entries, n_entries))
609 assert isinstance(ie_result['entries'], PagedList)
610 entries = ie_result['entries'].getslice(
611 playliststart, playlistend)
612 n_entries = len(entries)
614 "[%s] playlist %s: Downloading %d videos" %
615 (ie_result['extractor'], playlist, n_entries))
617 for i, entry in enumerate(entries, 1):
618 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
620 'playlist': playlist,
621 'playlist_index': i + playliststart,
622 'extractor': ie_result['extractor'],
623 'webpage_url': ie_result['webpage_url'],
624 'webpage_url_basename': url_basename(ie_result['webpage_url']),
625 'extractor_key': ie_result['extractor_key'],
628 reason = self._match_entry(entry)
629 if reason is not None:
630 self.to_screen('[download] ' + reason)
633 entry_result = self.process_ie_result(entry,
636 playlist_results.append(entry_result)
637 ie_result['entries'] = playlist_results
639 elif result_type == 'compat_list':
641 self.add_extra_info(r,
643 'extractor': ie_result['extractor'],
644 'webpage_url': ie_result['webpage_url'],
645 'webpage_url_basename': url_basename(ie_result['webpage_url']),
646 'extractor_key': ie_result['extractor_key'],
649 ie_result['entries'] = [
650 self.process_ie_result(_fixup(r), download, extra_info)
651 for r in ie_result['entries']
655 raise Exception('Invalid result type: %s' % result_type)
657 def select_format(self, format_spec, available_formats):
658 if format_spec == 'best' or format_spec is None:
659 return available_formats[-1]
660 elif format_spec == 'worst':
661 return available_formats[0]
662 elif format_spec == 'bestaudio':
664 f for f in available_formats
665 if f.get('vcodec') == 'none']
667 return audio_formats[-1]
668 elif format_spec == 'worstaudio':
670 f for f in available_formats
671 if f.get('vcodec') == 'none']
673 return audio_formats[0]
674 elif format_spec == 'bestvideo':
676 f for f in available_formats
677 if f.get('acodec') == 'none']
679 return video_formats[-1]
680 elif format_spec == 'worstvideo':
682 f for f in available_formats
683 if f.get('acodec') == 'none']
685 return video_formats[0]
687 extensions = ['mp4', 'flv', 'webm', '3gp']
688 if format_spec in extensions:
689 filter_f = lambda f: f['ext'] == format_spec
691 filter_f = lambda f: f['format_id'] == format_spec
692 matches = list(filter(filter_f, available_formats))
697 def process_video_result(self, info_dict, download=True):
698 assert info_dict.get('_type', 'video') == 'video'
700 if 'playlist' not in info_dict:
701 # It isn't part of a playlist
702 info_dict['playlist'] = None
703 info_dict['playlist_index'] = None
705 if 'display_id' not in info_dict and 'id' in info_dict:
706 info_dict['display_id'] = info_dict['id']
708 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
709 upload_date = datetime.datetime.utcfromtimestamp(
710 info_dict['timestamp'])
711 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
713 # This extractors handle format selection themselves
714 if info_dict['extractor'] in ['Youku']:
716 self.process_info(info_dict)
719 # We now pick which formats have to be downloaded
720 if info_dict.get('formats') is None:
721 # There's only one format available
722 formats = [info_dict]
724 formats = info_dict['formats']
727 raise ExtractorError('No video formats found!')
729 # We check that all the formats have the format and format_id fields
730 for i, format in enumerate(formats):
731 if format.get('format_id') is None:
732 format['format_id'] = compat_str(i)
733 if format.get('format') is None:
734 format['format'] = '{id} - {res}{note}'.format(
735 id=format['format_id'],
736 res=self.format_resolution(format),
737 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
739 # Automatically determine file extension if missing
740 if 'ext' not in format:
741 format['ext'] = determine_ext(format['url'])
743 format_limit = self.params.get('format_limit', None)
745 formats = list(takewhile_inclusive(
746 lambda f: f['format_id'] != format_limit, formats
749 # TODO Central sorting goes here
751 if formats[0] is not info_dict:
752 # only set the 'formats' fields if the original info_dict list them
753 # otherwise we end up with a circular reference, the first (and unique)
754 # element in the 'formats' field in info_dict is info_dict itself,
755 # wich can't be exported to json
756 info_dict['formats'] = formats
757 if self.params.get('listformats', None):
758 self.list_formats(info_dict)
761 req_format = self.params.get('format')
762 if req_format is None:
764 formats_to_download = []
765 # The -1 is for supporting YoutubeIE
766 if req_format in ('-1', 'all'):
767 formats_to_download = formats
769 # We can accept formats requested in the format: 34/5/best, we pick
770 # the first that is available, starting from left
771 req_formats = req_format.split('/')
772 for rf in req_formats:
773 if re.match(r'.+?\+.+?', rf) is not None:
774 # Two formats have been requested like '137+139'
775 format_1, format_2 = rf.split('+')
776 formats_info = (self.select_format(format_1, formats),
777 self.select_format(format_2, formats))
778 if all(formats_info):
780 'requested_formats': formats_info,
782 'ext': formats_info[0]['ext'],
785 selected_format = None
787 selected_format = self.select_format(rf, formats)
788 if selected_format is not None:
789 formats_to_download = [selected_format]
791 if not formats_to_download:
792 raise ExtractorError('requested format not available',
796 if len(formats_to_download) > 1:
797 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
798 for format in formats_to_download:
799 new_info = dict(info_dict)
800 new_info.update(format)
801 self.process_info(new_info)
802 # We update the info dict with the best quality format (backwards compatibility)
803 info_dict.update(formats_to_download[-1])
806 def process_info(self, info_dict):
807 """Process a single resolved IE result."""
809 assert info_dict.get('_type', 'video') == 'video'
811 max_downloads = self.params.get('max_downloads')
812 if max_downloads is not None:
813 if self._num_downloads >= int(max_downloads):
814 raise MaxDownloadsReached()
816 info_dict['fulltitle'] = info_dict['title']
817 if len(info_dict['title']) > 200:
818 info_dict['title'] = info_dict['title'][:197] + '...'
820 # Keep for backwards compatibility
821 info_dict['stitle'] = info_dict['title']
823 if not 'format' in info_dict:
824 info_dict['format'] = info_dict['ext']
826 reason = self._match_entry(info_dict)
827 if reason is not None:
828 self.to_screen('[download] ' + reason)
831 self._num_downloads += 1
833 filename = self.prepare_filename(info_dict)
836 if self.params.get('forcetitle', False):
837 self.to_stdout(info_dict['fulltitle'])
838 if self.params.get('forceid', False):
839 self.to_stdout(info_dict['id'])
840 if self.params.get('forceurl', False):
841 # For RTMP URLs, also include the playpath
842 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
843 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
844 self.to_stdout(info_dict['thumbnail'])
845 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
846 self.to_stdout(info_dict['description'])
847 if self.params.get('forcefilename', False) and filename is not None:
848 self.to_stdout(filename)
849 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
850 self.to_stdout(formatSeconds(info_dict['duration']))
851 if self.params.get('forceformat', False):
852 self.to_stdout(info_dict['format'])
853 if self.params.get('forcejson', False):
854 info_dict['_filename'] = filename
855 self.to_stdout(json.dumps(info_dict))
857 # Do nothing else if in simulate mode
858 if self.params.get('simulate', False):
865 dn = os.path.dirname(encodeFilename(filename))
866 if dn != '' and not os.path.exists(dn):
868 except (OSError, IOError) as err:
869 self.report_error('unable to create directory ' + compat_str(err))
872 if self.params.get('writedescription', False):
873 descfn = filename + '.description'
874 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
875 self.to_screen('[info] Video description is already present')
878 self.to_screen('[info] Writing video description to: ' + descfn)
879 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
880 descfile.write(info_dict['description'])
881 except (KeyError, TypeError):
882 self.report_warning('There\'s no description to write.')
883 except (OSError, IOError):
884 self.report_error('Cannot write description file ' + descfn)
887 if self.params.get('writeannotations', False):
888 annofn = filename + '.annotations.xml'
889 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
890 self.to_screen('[info] Video annotations are already present')
893 self.to_screen('[info] Writing video annotations to: ' + annofn)
894 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
895 annofile.write(info_dict['annotations'])
896 except (KeyError, TypeError):
897 self.report_warning('There are no annotations to write.')
898 except (OSError, IOError):
899 self.report_error('Cannot write annotations file: ' + annofn)
902 subtitles_are_requested = any([self.params.get('writesubtitles', False),
903 self.params.get('writeautomaticsub')])
905 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
906 # subtitles download errors are already managed as troubles in relevant IE
907 # that way it will silently go on when used with unsupporting IE
908 subtitles = info_dict['subtitles']
909 sub_format = self.params.get('subtitlesformat', 'srt')
910 for sub_lang in subtitles.keys():
911 sub = subtitles[sub_lang]
915 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
916 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
917 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
919 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
920 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
922 except (OSError, IOError):
923 self.report_error('Cannot write subtitles file ' + descfn)
926 if self.params.get('writeinfojson', False):
927 infofn = os.path.splitext(filename)[0] + '.info.json'
928 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
929 self.to_screen('[info] Video description metadata is already present')
931 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
933 write_json_file(info_dict, encodeFilename(infofn))
934 except (OSError, IOError):
935 self.report_error('Cannot write metadata to JSON file ' + infofn)
938 if self.params.get('writethumbnail', False):
939 if info_dict.get('thumbnail') is not None:
940 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
941 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
942 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
943 self.to_screen('[%s] %s: Thumbnail is already present' %
944 (info_dict['extractor'], info_dict['id']))
946 self.to_screen('[%s] %s: Downloading thumbnail ...' %
947 (info_dict['extractor'], info_dict['id']))
949 uf = self.urlopen(info_dict['thumbnail'])
950 with open(thumb_filename, 'wb') as thumbf:
951 shutil.copyfileobj(uf, thumbf)
952 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
953 (info_dict['extractor'], info_dict['id'], thumb_filename))
954 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
955 self.report_warning('Unable to download thumbnail "%s": %s' %
956 (info_dict['thumbnail'], compat_str(err)))
958 if not self.params.get('skip_download', False):
959 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
964 fd = get_suitable_downloader(info)(self, self.params)
965 for ph in self._progress_hooks:
966 fd.add_progress_hook(ph)
967 return fd.download(name, info)
968 if info_dict.get('requested_formats') is not None:
971 merger = FFmpegMergerPP(self)
972 if not merger._get_executable():
974 self.report_warning('You have requested multiple '
975 'formats but ffmpeg or avconv are not installed.'
976 ' The formats won\'t be merged')
978 postprocessors = [merger]
979 for f in info_dict['requested_formats']:
980 new_info = dict(info_dict)
982 fname = self.prepare_filename(new_info)
983 fname = prepend_extension(fname, 'f%s' % f['format_id'])
984 downloaded.append(fname)
985 partial_success = dl(fname, new_info)
986 success = success and partial_success
987 info_dict['__postprocessors'] = postprocessors
988 info_dict['__files_to_merge'] = downloaded
991 success = dl(filename, info_dict)
992 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
993 self.report_error('unable to download video data: %s' % str(err))
995 except (OSError, IOError) as err:
996 raise UnavailableVideoError(err)
997 except (ContentTooShortError, ) as err:
998 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1003 self.post_process(filename, info_dict)
1004 except (PostProcessingError) as err:
1005 self.report_error('postprocessing: %s' % str(err))
1008 self.record_download_archive(info_dict)
1010 def download(self, url_list):
1011 """Download a given list of URLs."""
1012 if (len(url_list) > 1 and
1013 '%' not in self.params['outtmpl']
1014 and self.params.get('max_downloads') != 1):
1015 raise SameFileError(self.params['outtmpl'])
1017 for url in url_list:
1019 #It also downloads the videos
1020 self.extract_info(url)
1021 except UnavailableVideoError:
1022 self.report_error('unable to download video')
1023 except MaxDownloadsReached:
1024 self.to_screen('[info] Maximum number of downloaded files reached.')
1027 return self._download_retcode
1029 def download_with_info_file(self, info_filename):
1030 with io.open(info_filename, 'r', encoding='utf-8') as f:
1033 self.process_ie_result(info, download=True)
1034 except DownloadError:
1035 webpage_url = info.get('webpage_url')
1036 if webpage_url is not None:
1037 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1038 return self.download([webpage_url])
1041 return self._download_retcode
1043 def post_process(self, filename, ie_info):
1044 """Run all the postprocessors on the given file."""
1045 info = dict(ie_info)
1046 info['filepath'] = filename
1049 if ie_info.get('__postprocessors') is not None:
1050 pps_chain.extend(ie_info['__postprocessors'])
1051 pps_chain.extend(self._pps)
1052 for pp in pps_chain:
1054 keep_video_wish, new_info = pp.run(info)
1055 if keep_video_wish is not None:
1057 keep_video = keep_video_wish
1058 elif keep_video is None:
1059 # No clear decision yet, let IE decide
1060 keep_video = keep_video_wish
1061 except PostProcessingError as e:
1062 self.report_error(e.msg)
1063 if keep_video is False and not self.params.get('keepvideo', False):
1065 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1066 os.remove(encodeFilename(filename))
1067 except (IOError, OSError):
1068 self.report_warning('Unable to remove downloaded video file')
1070 def _make_archive_id(self, info_dict):
1071 # Future-proof against any change in case
1072 # and backwards compatibility with prior versions
1073 extractor = info_dict.get('extractor_key')
1074 if extractor is None:
1075 if 'id' in info_dict:
1076 extractor = info_dict.get('ie_key') # key in a playlist
1077 if extractor is None:
1078 return None # Incomplete video information
1079 return extractor.lower() + ' ' + info_dict['id']
1081 def in_download_archive(self, info_dict):
1082 fn = self.params.get('download_archive')
1086 vid_id = self._make_archive_id(info_dict)
1088 return False # Incomplete video information
1091 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1092 for line in archive_file:
1093 if line.strip() == vid_id:
1095 except IOError as ioe:
1096 if ioe.errno != errno.ENOENT:
1100 def record_download_archive(self, info_dict):
1101 fn = self.params.get('download_archive')
1104 vid_id = self._make_archive_id(info_dict)
1106 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1107 archive_file.write(vid_id + '\n')
1110 def format_resolution(format, default='unknown'):
1111 if format.get('vcodec') == 'none':
1113 if format.get('resolution') is not None:
1114 return format['resolution']
1115 if format.get('height') is not None:
1116 if format.get('width') is not None:
1117 res = '%sx%s' % (format['width'], format['height'])
1119 res = '%sp' % format['height']
1120 elif format.get('width') is not None:
1121 res = '?x%d' % format['width']
1126 def list_formats(self, info_dict):
1127 def format_note(fdict):
1129 if fdict.get('ext') in ['f4f', 'f4m']:
1130 res += '(unsupported) '
1131 if fdict.get('format_note') is not None:
1132 res += fdict['format_note'] + ' '
1133 if fdict.get('tbr') is not None:
1134 res += '%4dk ' % fdict['tbr']
1135 if fdict.get('container') is not None:
1138 res += '%s container' % fdict['container']
1139 if (fdict.get('vcodec') is not None and
1140 fdict.get('vcodec') != 'none'):
1143 res += fdict['vcodec']
1144 if fdict.get('vbr') is not None:
1146 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1148 if fdict.get('vbr') is not None:
1149 res += '%4dk' % fdict['vbr']
1150 if fdict.get('acodec') is not None:
1153 if fdict['acodec'] == 'none':
1156 res += '%-5s' % fdict['acodec']
1157 elif fdict.get('abr') is not None:
1161 if fdict.get('abr') is not None:
1162 res += '@%3dk' % fdict['abr']
1163 if fdict.get('asr') is not None:
1164 res += ' (%5dHz)' % fdict['asr']
1165 if fdict.get('filesize') is not None:
1168 res += format_bytes(fdict['filesize'])
1171 def line(format, idlen=20):
1172 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1173 format['format_id'],
1175 self.format_resolution(format),
1176 format_note(format),
1179 formats = info_dict.get('formats', [info_dict])
1180 idlen = max(len('format code'),
1181 max(len(f['format_id']) for f in formats))
1182 formats_s = [line(f, idlen) for f in formats]
1183 if len(formats) > 1:
1184 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1185 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1187 header_line = line({
1188 'format_id': 'format code', 'ext': 'extension',
1189 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1190 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1191 (info_dict['id'], header_line, '\n'.join(formats_s)))
1193 def urlopen(self, req):
1194 """ Start an HTTP download """
1195 return self._opener.open(req, timeout=self._socket_timeout)
1197 def print_debug_header(self):
1198 if not self.params.get('verbose'):
1200 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1202 sp = subprocess.Popen(
1203 ['git', 'rev-parse', '--short', 'HEAD'],
1204 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1205 cwd=os.path.dirname(os.path.abspath(__file__)))
1206 out, err = sp.communicate()
1207 out = out.decode().strip()
1208 if re.match('[0-9a-f]+', out):
1209 write_string('[debug] Git HEAD: ' + out + '\n')
1215 write_string('[debug] Python version %s - %s' %
1216 (platform.python_version(), platform_name()) + '\n')
1219 for handler in self._opener.handlers:
1220 if hasattr(handler, 'proxies'):
1221 proxy_map.update(handler.proxies)
1222 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1224 def _setup_opener(self):
1225 timeout_val = self.params.get('socket_timeout')
1226 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1228 opts_cookiefile = self.params.get('cookiefile')
1229 opts_proxy = self.params.get('proxy')
1231 if opts_cookiefile is None:
1232 self.cookiejar = compat_cookiejar.CookieJar()
1234 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1236 if os.access(opts_cookiefile, os.R_OK):
1237 self.cookiejar.load()
1239 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1241 if opts_proxy is not None:
1242 if opts_proxy == '':
1245 proxies = {'http': opts_proxy, 'https': opts_proxy}
1247 proxies = compat_urllib_request.getproxies()
1248 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1249 if 'http' in proxies and 'https' not in proxies:
1250 proxies['https'] = proxies['http']
1251 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1253 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1254 https_handler = make_HTTPS_handler(
1255 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1256 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1257 opener = compat_urllib_request.build_opener(
1258 https_handler, proxy_handler, cookie_processor, ydlh)
1259 # Delete the default user-agent header, which would otherwise apply in
1260 # cases where our custom HTTP handler doesn't come into play
1261 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1262 opener.addheaders = []
1263 self._opener = opener