2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
29 compat_urllib_request,
51 UnavailableVideoError,
58 from .extractor import get_info_extractor, gen_extractors
59 from .downloader import get_suitable_downloader
60 from .postprocessor import FFmpegMergerPP
61 from .version import __version__
64 class YoutubeDL(object):
67 YoutubeDL objects are the ones responsible of downloading the
68 actual video file and writing it to disk if the user has requested
69 it, among some other tasks. In most cases there should be one per
70 program. As, given a video URL, the downloader doesn't know how to
71 extract all the needed information, task that InfoExtractors do, it
72 has to pass the URL to one of them.
74 For this, YoutubeDL objects have a method that allows
75 InfoExtractors to be registered in a given order. When it is passed
76 a URL, the YoutubeDL object handles it to the first InfoExtractor it
77 finds that reports being able to handle it. The InfoExtractor extracts
78 all the information about the video or videos the URL refers to, and
79 YoutubeDL process the extracted information, possibly using a File
80 Downloader to download the video.
82 YoutubeDL objects accept a lot of parameters. In order not to saturate
83 the object constructor with arguments, it receives a dictionary of
84 options instead. These options are available through the params
85 attribute for the InfoExtractors to use. The YoutubeDL also
86 registers itself as the downloader in charge for the InfoExtractors
87 that are added to it, so this is a "mutual registration".
91 username: Username for authentication purposes.
92 password: Password for authentication purposes.
93 videopassword: Password for acces a video.
94 usenetrc: Use netrc for authentication instead.
95 verbose: Print additional info to stdout.
96 quiet: Do not print messages to stdout.
97 forceurl: Force printing final URL.
98 forcetitle: Force printing title.
99 forceid: Force printing ID.
100 forcethumbnail: Force printing thumbnail URL.
101 forcedescription: Force printing description.
102 forcefilename: Force printing final filename.
103 forceduration: Force printing duration.
104 forcejson: Force printing info_dict as JSON.
105 simulate: Do not download the video files.
106 format: Video format code.
107 format_limit: Highest quality format to try.
108 outtmpl: Template for output names.
109 restrictfilenames: Do not allow "&" and spaces in file names
110 ignoreerrors: Do not stop on download errors.
111 nooverwrites: Prevent overwriting files.
112 playliststart: Playlist item to start at.
113 playlistend: Playlist item to end at.
114 matchtitle: Download only matching titles.
115 rejecttitle: Reject downloads for matching titles.
116 logger: Log messages to a logging.Logger instance.
117 logtostderr: Log messages to stderr instead of stdout.
118 writedescription: Write the video description to a .description file
119 writeinfojson: Write the video description to a .info.json file
120 writeannotations: Write the video annotations to a .annotations.xml file
121 writethumbnail: Write the thumbnail image to a file
122 writesubtitles: Write the video subtitles to a file
123 writeautomaticsub: Write the automatic subtitles to a file
124 allsubtitles: Downloads all the subtitles of the video
125 (requires writesubtitles or writeautomaticsub)
126 listsubtitles: Lists all available subtitles for the video
127 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
128 subtitleslangs: List of languages of the subtitles to download
129 keepvideo: Keep the video file after post-processing
130 daterange: A DateRange object, download only if the upload_date is in the range.
131 skip_download: Skip the actual download of the video file
132 cachedir: Location of the cache files in the filesystem.
133 None to disable filesystem cache.
134 noplaylist: Download single video instead of a playlist if in doubt.
135 age_limit: An integer representing the user's age in years.
136 Unsuitable videos for the given age are skipped.
137 min_views: An integer representing the minimum view count the video
138 must have in order to not be skipped.
139 Videos without view count information are always
140 downloaded. None for no limit.
141 max_views: An integer representing the maximum view count.
142 Videos that are more popular than that are not
144 Videos without view count information are always
145 downloaded. None for no limit.
146 download_archive: File name of a file where all downloads are recorded.
147 Videos already present in the file are not downloaded
149 cookiefile: File name where cookies should be read from and dumped to.
150 nocheckcertificate:Do not verify SSL certificates
151 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
152 At the moment, this is only supported by YouTube.
153 proxy: URL of the proxy server to use
154 socket_timeout: Time to wait for unresponsive hosts, in seconds
155 bidi_workaround: Work around buggy terminals without bidirectional text
156 support, using fridibi
157 debug_printtraffic:Print out sent and received HTTP traffic
158 include_ads: Download ads as well
159 default_search: Prepend this string if an input url is not valid.
160 'auto' for elaborate guessing
162 The following parameters are not used by YoutubeDL itself, they are used by
164 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
165 noresizebuffer, retries, continuedl, noprogress, consoletitle
167 The following options are used by the post processors:
168 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
169 otherwise prefer avconv.
175 _download_retcode = None
176 _num_downloads = None
179 def __init__(self, params=None):
180 """Create a FileDownloader object with the given options."""
184 self._ies_instances = {}
186 self._progress_hooks = []
187 self._download_retcode = 0
188 self._num_downloads = 0
189 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
190 self._err_file = sys.stderr
193 if params.get('bidi_workaround', False):
196 master, slave = pty.openpty()
197 width = get_term_width()
201 width_args = ['-w', str(width)]
203 stdin=subprocess.PIPE,
205 stderr=self._err_file)
207 self._output_process = subprocess.Popen(
208 ['bidiv'] + width_args, **sp_kwargs
211 self._output_process = subprocess.Popen(
212 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
213 self._output_channel = os.fdopen(master, 'rb')
214 except OSError as ose:
216 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
220 if (sys.version_info >= (3,) and sys.platform != 'win32' and
221 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
222 and not params['restrictfilenames']):
223 # On Python 3, the Unicode filesystem API will throw errors (#1474)
225 'Assuming --restrict-filenames since file system encoding '
226 'cannot encode all charactes. '
227 'Set the LC_ALL environment variable to fix this.')
228 self.params['restrictfilenames'] = True
230 if '%(stitle)s' in self.params.get('outtmpl', ''):
231 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
235 def add_info_extractor(self, ie):
236 """Add an InfoExtractor object to the end of the list."""
238 self._ies_instances[ie.ie_key()] = ie
239 ie.set_downloader(self)
241 def get_info_extractor(self, ie_key):
243 Get an instance of an IE with name ie_key, it will try to get one from
244 the _ies list, if there's no instance it will create a new one and add
245 it to the extractor list.
247 ie = self._ies_instances.get(ie_key)
249 ie = get_info_extractor(ie_key)()
250 self.add_info_extractor(ie)
253 def add_default_info_extractors(self):
255 Add the InfoExtractors returned by gen_extractors to the end of the list
257 for ie in gen_extractors():
258 self.add_info_extractor(ie)
260 def add_post_processor(self, pp):
261 """Add a PostProcessor object to the end of the chain."""
263 pp.set_downloader(self)
265 def add_progress_hook(self, ph):
266 """Add the progress hook (currently only for the file downloader)"""
267 self._progress_hooks.append(ph)
269 def _bidi_workaround(self, message):
270 if not hasattr(self, '_output_channel'):
273 assert hasattr(self, '_output_process')
274 assert type(message) == type('')
275 line_count = message.count('\n') + 1
276 self._output_process.stdin.write((message + '\n').encode('utf-8'))
277 self._output_process.stdin.flush()
278 res = ''.join(self._output_channel.readline().decode('utf-8')
279 for _ in range(line_count))
280 return res[:-len('\n')]
282 def to_screen(self, message, skip_eol=False):
283 """Print message to stdout if not in quiet mode."""
284 return self.to_stdout(message, skip_eol, check_quiet=True)
286 def to_stdout(self, message, skip_eol=False, check_quiet=False):
287 """Print message to stdout if not in quiet mode."""
288 if self.params.get('logger'):
289 self.params['logger'].debug(message)
290 elif not check_quiet or not self.params.get('quiet', False):
291 message = self._bidi_workaround(message)
292 terminator = ['\n', ''][skip_eol]
293 output = message + terminator
295 write_string(output, self._screen_file)
297 def to_stderr(self, message):
298 """Print message to stderr."""
299 assert type(message) == type('')
300 if self.params.get('logger'):
301 self.params['logger'].error(message)
303 message = self._bidi_workaround(message)
304 output = message + '\n'
305 write_string(output, self._err_file)
307 def to_console_title(self, message):
308 if not self.params.get('consoletitle', False):
310 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
311 # c_wchar_p() might not be necessary if `message` is
312 # already of type unicode()
313 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
314 elif 'TERM' in os.environ:
315 write_string('\033]0;%s\007' % message, self._screen_file)
317 def save_console_title(self):
318 if not self.params.get('consoletitle', False):
320 if 'TERM' in os.environ:
321 # Save the title on stack
322 write_string('\033[22;0t', self._screen_file)
324 def restore_console_title(self):
325 if not self.params.get('consoletitle', False):
327 if 'TERM' in os.environ:
328 # Restore the title from stack
329 write_string('\033[23;0t', self._screen_file)
332 self.save_console_title()
335 def __exit__(self, *args):
336 self.restore_console_title()
338 if self.params.get('cookiefile') is not None:
339 self.cookiejar.save()
341 def trouble(self, message=None, tb=None):
342 """Determine action to take when a download problem appears.
344 Depending on if the downloader has been configured to ignore
345 download errors or not, this method may throw an exception or
346 not when errors are found, after printing the message.
348 tb, if given, is additional traceback information.
350 if message is not None:
351 self.to_stderr(message)
352 if self.params.get('verbose'):
354 if sys.exc_info()[0]: # if .trouble has been called from an except block
356 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
357 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
358 tb += compat_str(traceback.format_exc())
360 tb_data = traceback.format_list(traceback.extract_stack())
361 tb = ''.join(tb_data)
363 if not self.params.get('ignoreerrors', False):
364 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
365 exc_info = sys.exc_info()[1].exc_info
367 exc_info = sys.exc_info()
368 raise DownloadError(message, exc_info)
369 self._download_retcode = 1
371 def report_warning(self, message):
373 Print the message to stderr, it will be prefixed with 'WARNING:'
374 If stderr is a tty file the 'WARNING:' will be colored
376 if self.params.get('logger') is not None:
377 self.params['logger'].warning(message)
379 if self._err_file.isatty() and os.name != 'nt':
380 _msg_header = '\033[0;33mWARNING:\033[0m'
382 _msg_header = 'WARNING:'
383 warning_message = '%s %s' % (_msg_header, message)
384 self.to_stderr(warning_message)
386 def report_error(self, message, tb=None):
388 Do the same as trouble, but prefixes the message with 'ERROR:', colored
389 in red if stderr is a tty file.
391 if self._err_file.isatty() and os.name != 'nt':
392 _msg_header = '\033[0;31mERROR:\033[0m'
394 _msg_header = 'ERROR:'
395 error_message = '%s %s' % (_msg_header, message)
396 self.trouble(error_message, tb)
398 def report_file_already_downloaded(self, file_name):
399 """Report file has already been fully downloaded."""
401 self.to_screen('[download] %s has already been downloaded' % file_name)
402 except UnicodeEncodeError:
403 self.to_screen('[download] The file has already been downloaded')
405 def prepare_filename(self, info_dict):
406 """Generate the output filename."""
408 template_dict = dict(info_dict)
410 template_dict['epoch'] = int(time.time())
411 autonumber_size = self.params.get('autonumber_size')
412 if autonumber_size is None:
414 autonumber_templ = '%0' + str(autonumber_size) + 'd'
415 template_dict['autonumber'] = autonumber_templ % self._num_downloads
416 if template_dict.get('playlist_index') is not None:
417 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
418 if template_dict.get('resolution') is None:
419 if template_dict.get('width') and template_dict.get('height'):
420 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
421 elif template_dict.get('height'):
422 template_dict['resolution'] = '%sp' % template_dict['height']
423 elif template_dict.get('width'):
424 template_dict['resolution'] = '?x%d' % template_dict['width']
426 sanitize = lambda k, v: sanitize_filename(
428 restricted=self.params.get('restrictfilenames'),
430 template_dict = dict((k, sanitize(k, v))
431 for k, v in template_dict.items()
433 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
435 tmpl = os.path.expanduser(self.params['outtmpl'])
436 filename = tmpl % template_dict
438 except ValueError as err:
439 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
442 def _match_entry(self, info_dict):
443 """ Returns None iff the file should be downloaded """
445 video_title = info_dict.get('title', info_dict.get('id', 'video'))
446 if 'title' in info_dict:
447 # This can happen when we're just evaluating the playlist
448 title = info_dict['title']
449 matchtitle = self.params.get('matchtitle', False)
451 if not re.search(matchtitle, title, re.IGNORECASE):
452 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
453 rejecttitle = self.params.get('rejecttitle', False)
455 if re.search(rejecttitle, title, re.IGNORECASE):
456 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
457 date = info_dict.get('upload_date', None)
459 dateRange = self.params.get('daterange', DateRange())
460 if date not in dateRange:
461 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
462 view_count = info_dict.get('view_count', None)
463 if view_count is not None:
464 min_views = self.params.get('min_views')
465 if min_views is not None and view_count < min_views:
466 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
467 max_views = self.params.get('max_views')
468 if max_views is not None and view_count > max_views:
469 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
470 age_limit = self.params.get('age_limit')
471 if age_limit is not None:
472 if age_limit < info_dict.get('age_limit', 0):
473 return 'Skipping "' + title + '" because it is age restricted'
474 if self.in_download_archive(info_dict):
475 return '%s has already been recorded in archive' % video_title
479 def add_extra_info(info_dict, extra_info):
480 '''Set the keys from extra_info in info dict if they are missing'''
481 for key, value in extra_info.items():
482 info_dict.setdefault(key, value)
484 def extract_info(self, url, download=True, ie_key=None, extra_info={},
487 Returns a list with a dictionary for each video we find.
488 If 'download', also downloads the videos.
489 extra_info is a dict containing the extra values to add to each result
493 ies = [self.get_info_extractor(ie_key)]
498 if not ie.suitable(url):
502 self.report_warning('The program functionality for this site has been marked as broken, '
503 'and will probably not work.')
506 ie_result = ie.extract(url)
507 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
509 if isinstance(ie_result, list):
510 # Backwards compatibility: old IE result format
512 '_type': 'compat_list',
513 'entries': ie_result,
515 self.add_extra_info(ie_result,
517 'extractor': ie.IE_NAME,
519 'webpage_url_basename': url_basename(url),
520 'extractor_key': ie.ie_key(),
523 return self.process_ie_result(ie_result, download, extra_info)
526 except ExtractorError as de: # An error we somewhat expected
527 self.report_error(compat_str(de), de.format_traceback())
529 except MaxDownloadsReached:
531 except Exception as e:
532 if self.params.get('ignoreerrors', False):
533 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
538 self.report_error('no suitable InfoExtractor for URL %s' % url)
540 def process_ie_result(self, ie_result, download=True, extra_info={}):
542 Take the result of the ie(may be modified) and resolve all unresolved
543 references (URLs, playlist items).
545 It will also download the videos if 'download'.
546 Returns the resolved ie_result.
549 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
550 if result_type == 'video':
551 self.add_extra_info(ie_result, extra_info)
552 return self.process_video_result(ie_result, download=download)
553 elif result_type == 'url':
554 # We have to add extra_info to the results because it may be
555 # contained in a playlist
556 return self.extract_info(ie_result['url'],
558 ie_key=ie_result.get('ie_key'),
559 extra_info=extra_info)
560 elif result_type == 'url_transparent':
561 # Use the information from the embedding page
562 info = self.extract_info(
563 ie_result['url'], ie_key=ie_result.get('ie_key'),
564 extra_info=extra_info, download=False, process=False)
566 def make_result(embedded_info):
567 new_result = ie_result.copy()
568 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
569 'entries', 'ie_key', 'duration',
570 'subtitles', 'annotations', 'format',
571 'thumbnail', 'thumbnails'):
574 if f in embedded_info:
575 new_result[f] = embedded_info[f]
577 new_result = make_result(info)
579 assert new_result.get('_type') != 'url_transparent'
580 if new_result.get('_type') == 'compat_list':
581 new_result['entries'] = [
582 make_result(e) for e in new_result['entries']]
584 return self.process_ie_result(
585 new_result, download=download, extra_info=extra_info)
586 elif result_type == 'playlist':
587 # We process each entry in the playlist
588 playlist = ie_result.get('title', None) or ie_result.get('id', None)
589 self.to_screen('[download] Downloading playlist: %s' % playlist)
591 playlist_results = []
593 playliststart = self.params.get('playliststart', 1) - 1
594 playlistend = self.params.get('playlistend', None)
595 # For backwards compatibility, interpret -1 as whole list
596 if playlistend == -1:
599 if isinstance(ie_result['entries'], list):
600 n_all_entries = len(ie_result['entries'])
601 entries = ie_result['entries'][playliststart:playlistend]
602 n_entries = len(entries)
604 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
605 (ie_result['extractor'], playlist, n_all_entries, n_entries))
607 assert isinstance(ie_result['entries'], PagedList)
608 entries = ie_result['entries'].getslice(
609 playliststart, playlistend)
610 n_entries = len(entries)
612 "[%s] playlist %s: Downloading %d videos" %
613 (ie_result['extractor'], playlist, n_entries))
615 for i, entry in enumerate(entries, 1):
616 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
618 'playlist': playlist,
619 'playlist_index': i + playliststart,
620 'extractor': ie_result['extractor'],
621 'webpage_url': ie_result['webpage_url'],
622 'webpage_url_basename': url_basename(ie_result['webpage_url']),
623 'extractor_key': ie_result['extractor_key'],
626 reason = self._match_entry(entry)
627 if reason is not None:
628 self.to_screen('[download] ' + reason)
631 entry_result = self.process_ie_result(entry,
634 playlist_results.append(entry_result)
635 ie_result['entries'] = playlist_results
637 elif result_type == 'compat_list':
639 self.add_extra_info(r,
641 'extractor': ie_result['extractor'],
642 'webpage_url': ie_result['webpage_url'],
643 'webpage_url_basename': url_basename(ie_result['webpage_url']),
644 'extractor_key': ie_result['extractor_key'],
647 ie_result['entries'] = [
648 self.process_ie_result(_fixup(r), download, extra_info)
649 for r in ie_result['entries']
653 raise Exception('Invalid result type: %s' % result_type)
655 def select_format(self, format_spec, available_formats):
656 if format_spec == 'best' or format_spec is None:
657 return available_formats[-1]
658 elif format_spec == 'worst':
659 return available_formats[0]
660 elif format_spec == 'bestaudio':
662 f for f in available_formats
663 if f.get('vcodec') == 'none']
665 return audio_formats[-1]
666 elif format_spec == 'worstaudio':
668 f for f in available_formats
669 if f.get('vcodec') == 'none']
671 return audio_formats[0]
672 elif format_spec == 'bestvideo':
674 f for f in available_formats
675 if f.get('acodec') == 'none']
677 return video_formats[-1]
678 elif format_spec == 'worstvideo':
680 f for f in available_formats
681 if f.get('acodec') == 'none']
683 return video_formats[0]
685 extensions = ['mp4', 'flv', 'webm', '3gp']
686 if format_spec in extensions:
687 filter_f = lambda f: f['ext'] == format_spec
689 filter_f = lambda f: f['format_id'] == format_spec
690 matches = list(filter(filter_f, available_formats))
695 def process_video_result(self, info_dict, download=True):
696 assert info_dict.get('_type', 'video') == 'video'
698 if 'playlist' not in info_dict:
699 # It isn't part of a playlist
700 info_dict['playlist'] = None
701 info_dict['playlist_index'] = None
703 if 'display_id' not in info_dict and 'id' in info_dict:
704 info_dict['display_id'] = info_dict['id']
706 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
707 upload_date = datetime.datetime.utcfromtimestamp(
708 info_dict['timestamp'])
709 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
711 # This extractors handle format selection themselves
712 if info_dict['extractor'] in ['Youku']:
714 self.process_info(info_dict)
717 # We now pick which formats have to be downloaded
718 if info_dict.get('formats') is None:
719 # There's only one format available
720 formats = [info_dict]
722 formats = info_dict['formats']
725 raise ExtractorError('No video formats found!')
727 # We check that all the formats have the format and format_id fields
728 for i, format in enumerate(formats):
729 if format.get('format_id') is None:
730 format['format_id'] = compat_str(i)
731 if format.get('format') is None:
732 format['format'] = '{id} - {res}{note}'.format(
733 id=format['format_id'],
734 res=self.format_resolution(format),
735 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
737 # Automatically determine file extension if missing
738 if 'ext' not in format:
739 format['ext'] = determine_ext(format['url'])
741 format_limit = self.params.get('format_limit', None)
743 formats = list(takewhile_inclusive(
744 lambda f: f['format_id'] != format_limit, formats
747 # TODO Central sorting goes here
749 if formats[0] is not info_dict:
750 # only set the 'formats' fields if the original info_dict list them
751 # otherwise we end up with a circular reference, the first (and unique)
752 # element in the 'formats' field in info_dict is info_dict itself,
753 # wich can't be exported to json
754 info_dict['formats'] = formats
755 if self.params.get('listformats', None):
756 self.list_formats(info_dict)
759 req_format = self.params.get('format')
760 if req_format is None:
762 formats_to_download = []
763 # The -1 is for supporting YoutubeIE
764 if req_format in ('-1', 'all'):
765 formats_to_download = formats
767 # We can accept formats requested in the format: 34/5/best, we pick
768 # the first that is available, starting from left
769 req_formats = req_format.split('/')
770 for rf in req_formats:
771 if re.match(r'.+?\+.+?', rf) is not None:
772 # Two formats have been requested like '137+139'
773 format_1, format_2 = rf.split('+')
774 formats_info = (self.select_format(format_1, formats),
775 self.select_format(format_2, formats))
776 if all(formats_info):
778 'requested_formats': formats_info,
780 'ext': formats_info[0]['ext'],
783 selected_format = None
785 selected_format = self.select_format(rf, formats)
786 if selected_format is not None:
787 formats_to_download = [selected_format]
789 if not formats_to_download:
790 raise ExtractorError('requested format not available',
794 if len(formats_to_download) > 1:
795 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
796 for format in formats_to_download:
797 new_info = dict(info_dict)
798 new_info.update(format)
799 self.process_info(new_info)
800 # We update the info dict with the best quality format (backwards compatibility)
801 info_dict.update(formats_to_download[-1])
804 def process_info(self, info_dict):
805 """Process a single resolved IE result."""
807 assert info_dict.get('_type', 'video') == 'video'
809 max_downloads = self.params.get('max_downloads')
810 if max_downloads is not None:
811 if self._num_downloads >= int(max_downloads):
812 raise MaxDownloadsReached()
814 info_dict['fulltitle'] = info_dict['title']
815 if len(info_dict['title']) > 200:
816 info_dict['title'] = info_dict['title'][:197] + '...'
818 # Keep for backwards compatibility
819 info_dict['stitle'] = info_dict['title']
821 if not 'format' in info_dict:
822 info_dict['format'] = info_dict['ext']
824 reason = self._match_entry(info_dict)
825 if reason is not None:
826 self.to_screen('[download] ' + reason)
829 self._num_downloads += 1
831 filename = self.prepare_filename(info_dict)
834 if self.params.get('forcetitle', False):
835 self.to_stdout(info_dict['fulltitle'])
836 if self.params.get('forceid', False):
837 self.to_stdout(info_dict['id'])
838 if self.params.get('forceurl', False):
839 # For RTMP URLs, also include the playpath
840 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
841 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
842 self.to_stdout(info_dict['thumbnail'])
843 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
844 self.to_stdout(info_dict['description'])
845 if self.params.get('forcefilename', False) and filename is not None:
846 self.to_stdout(filename)
847 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
848 self.to_stdout(formatSeconds(info_dict['duration']))
849 if self.params.get('forceformat', False):
850 self.to_stdout(info_dict['format'])
851 if self.params.get('forcejson', False):
852 info_dict['_filename'] = filename
853 self.to_stdout(json.dumps(info_dict))
855 # Do nothing else if in simulate mode
856 if self.params.get('simulate', False):
863 dn = os.path.dirname(encodeFilename(filename))
864 if dn != '' and not os.path.exists(dn):
866 except (OSError, IOError) as err:
867 self.report_error('unable to create directory ' + compat_str(err))
870 if self.params.get('writedescription', False):
871 descfn = filename + '.description'
872 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
873 self.to_screen('[info] Video description is already present')
876 self.to_screen('[info] Writing video description to: ' + descfn)
877 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
878 descfile.write(info_dict['description'])
879 except (KeyError, TypeError):
880 self.report_warning('There\'s no description to write.')
881 except (OSError, IOError):
882 self.report_error('Cannot write description file ' + descfn)
885 if self.params.get('writeannotations', False):
886 annofn = filename + '.annotations.xml'
887 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
888 self.to_screen('[info] Video annotations are already present')
891 self.to_screen('[info] Writing video annotations to: ' + annofn)
892 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
893 annofile.write(info_dict['annotations'])
894 except (KeyError, TypeError):
895 self.report_warning('There are no annotations to write.')
896 except (OSError, IOError):
897 self.report_error('Cannot write annotations file: ' + annofn)
900 subtitles_are_requested = any([self.params.get('writesubtitles', False),
901 self.params.get('writeautomaticsub')])
903 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
904 # subtitles download errors are already managed as troubles in relevant IE
905 # that way it will silently go on when used with unsupporting IE
906 subtitles = info_dict['subtitles']
907 sub_format = self.params.get('subtitlesformat', 'srt')
908 for sub_lang in subtitles.keys():
909 sub = subtitles[sub_lang]
913 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
914 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
915 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
917 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
918 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
920 except (OSError, IOError):
921 self.report_error('Cannot write subtitles file ' + descfn)
924 if self.params.get('writeinfojson', False):
925 infofn = os.path.splitext(filename)[0] + '.info.json'
926 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
927 self.to_screen('[info] Video description metadata is already present')
929 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
931 write_json_file(info_dict, encodeFilename(infofn))
932 except (OSError, IOError):
933 self.report_error('Cannot write metadata to JSON file ' + infofn)
936 if self.params.get('writethumbnail', False):
937 if info_dict.get('thumbnail') is not None:
938 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
939 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
940 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
941 self.to_screen('[%s] %s: Thumbnail is already present' %
942 (info_dict['extractor'], info_dict['id']))
944 self.to_screen('[%s] %s: Downloading thumbnail ...' %
945 (info_dict['extractor'], info_dict['id']))
947 uf = self.urlopen(info_dict['thumbnail'])
948 with open(thumb_filename, 'wb') as thumbf:
949 shutil.copyfileobj(uf, thumbf)
950 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
951 (info_dict['extractor'], info_dict['id'], thumb_filename))
952 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
953 self.report_warning('Unable to download thumbnail "%s": %s' %
954 (info_dict['thumbnail'], compat_str(err)))
956 if not self.params.get('skip_download', False):
957 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
962 fd = get_suitable_downloader(info)(self, self.params)
963 for ph in self._progress_hooks:
964 fd.add_progress_hook(ph)
965 return fd.download(name, info)
966 if info_dict.get('requested_formats') is not None:
969 merger = FFmpegMergerPP(self)
970 if not merger._get_executable():
972 self.report_warning('You have requested multiple '
973 'formats but ffmpeg or avconv are not installed.'
974 ' The formats won\'t be merged')
976 postprocessors = [merger]
977 for f in info_dict['requested_formats']:
978 new_info = dict(info_dict)
980 fname = self.prepare_filename(new_info)
981 fname = prepend_extension(fname, 'f%s' % f['format_id'])
982 downloaded.append(fname)
983 partial_success = dl(fname, new_info)
984 success = success and partial_success
985 info_dict['__postprocessors'] = postprocessors
986 info_dict['__files_to_merge'] = downloaded
989 success = dl(filename, info_dict)
990 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
991 self.report_error('unable to download video data: %s' % str(err))
993 except (OSError, IOError) as err:
994 raise UnavailableVideoError(err)
995 except (ContentTooShortError, ) as err:
996 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1001 self.post_process(filename, info_dict)
1002 except (PostProcessingError) as err:
1003 self.report_error('postprocessing: %s' % str(err))
1006 self.record_download_archive(info_dict)
1008 def download(self, url_list):
1009 """Download a given list of URLs."""
1010 if (len(url_list) > 1 and
1011 '%' not in self.params['outtmpl']
1012 and self.params.get('max_downloads') != 1):
1013 raise SameFileError(self.params['outtmpl'])
1015 for url in url_list:
1017 #It also downloads the videos
1018 self.extract_info(url)
1019 except UnavailableVideoError:
1020 self.report_error('unable to download video')
1021 except MaxDownloadsReached:
1022 self.to_screen('[info] Maximum number of downloaded files reached.')
1025 return self._download_retcode
1027 def download_with_info_file(self, info_filename):
1028 with io.open(info_filename, 'r', encoding='utf-8') as f:
1031 self.process_ie_result(info, download=True)
1032 except DownloadError:
1033 webpage_url = info.get('webpage_url')
1034 if webpage_url is not None:
1035 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1036 return self.download([webpage_url])
1039 return self._download_retcode
1041 def post_process(self, filename, ie_info):
1042 """Run all the postprocessors on the given file."""
1043 info = dict(ie_info)
1044 info['filepath'] = filename
1047 if ie_info.get('__postprocessors') is not None:
1048 pps_chain.extend(ie_info['__postprocessors'])
1049 pps_chain.extend(self._pps)
1050 for pp in pps_chain:
1052 keep_video_wish, new_info = pp.run(info)
1053 if keep_video_wish is not None:
1055 keep_video = keep_video_wish
1056 elif keep_video is None:
1057 # No clear decision yet, let IE decide
1058 keep_video = keep_video_wish
1059 except PostProcessingError as e:
1060 self.report_error(e.msg)
1061 if keep_video is False and not self.params.get('keepvideo', False):
1063 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1064 os.remove(encodeFilename(filename))
1065 except (IOError, OSError):
1066 self.report_warning('Unable to remove downloaded video file')
1068 def _make_archive_id(self, info_dict):
1069 # Future-proof against any change in case
1070 # and backwards compatibility with prior versions
1071 extractor = info_dict.get('extractor_key')
1072 if extractor is None:
1073 if 'id' in info_dict:
1074 extractor = info_dict.get('ie_key') # key in a playlist
1075 if extractor is None:
1076 return None # Incomplete video information
1077 return extractor.lower() + ' ' + info_dict['id']
1079 def in_download_archive(self, info_dict):
1080 fn = self.params.get('download_archive')
1084 vid_id = self._make_archive_id(info_dict)
1086 return False # Incomplete video information
1089 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1090 for line in archive_file:
1091 if line.strip() == vid_id:
1093 except IOError as ioe:
1094 if ioe.errno != errno.ENOENT:
1098 def record_download_archive(self, info_dict):
1099 fn = self.params.get('download_archive')
1102 vid_id = self._make_archive_id(info_dict)
1104 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1105 archive_file.write(vid_id + '\n')
1108 def format_resolution(format, default='unknown'):
1109 if format.get('vcodec') == 'none':
1111 if format.get('resolution') is not None:
1112 return format['resolution']
1113 if format.get('height') is not None:
1114 if format.get('width') is not None:
1115 res = '%sx%s' % (format['width'], format['height'])
1117 res = '%sp' % format['height']
1118 elif format.get('width') is not None:
1119 res = '?x%d' % format['width']
1124 def list_formats(self, info_dict):
1125 def format_note(fdict):
1127 if fdict.get('ext') in ['f4f', 'f4m']:
1128 res += '(unsupported) '
1129 if fdict.get('format_note') is not None:
1130 res += fdict['format_note'] + ' '
1131 if fdict.get('tbr') is not None:
1132 res += '%4dk ' % fdict['tbr']
1133 if fdict.get('container') is not None:
1136 res += '%s container' % fdict['container']
1137 if (fdict.get('vcodec') is not None and
1138 fdict.get('vcodec') != 'none'):
1141 res += fdict['vcodec']
1142 if fdict.get('vbr') is not None:
1144 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1146 if fdict.get('vbr') is not None:
1147 res += '%4dk' % fdict['vbr']
1148 if fdict.get('acodec') is not None:
1151 if fdict['acodec'] == 'none':
1154 res += '%-5s' % fdict['acodec']
1155 elif fdict.get('abr') is not None:
1159 if fdict.get('abr') is not None:
1160 res += '@%3dk' % fdict['abr']
1161 if fdict.get('asr') is not None:
1162 res += ' (%5dHz)' % fdict['asr']
1163 if fdict.get('filesize') is not None:
1166 res += format_bytes(fdict['filesize'])
1169 def line(format, idlen=20):
1170 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1171 format['format_id'],
1173 self.format_resolution(format),
1174 format_note(format),
1177 formats = info_dict.get('formats', [info_dict])
1178 idlen = max(len('format code'),
1179 max(len(f['format_id']) for f in formats))
1180 formats_s = [line(f, idlen) for f in formats]
1181 if len(formats) > 1:
1182 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1183 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1185 header_line = line({
1186 'format_id': 'format code', 'ext': 'extension',
1187 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1188 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1189 (info_dict['id'], header_line, '\n'.join(formats_s)))
1191 def urlopen(self, req):
1192 """ Start an HTTP download """
1193 return self._opener.open(req, timeout=self._socket_timeout)
1195 def print_debug_header(self):
1196 if not self.params.get('verbose'):
1198 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1200 sp = subprocess.Popen(
1201 ['git', 'rev-parse', '--short', 'HEAD'],
1202 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1203 cwd=os.path.dirname(os.path.abspath(__file__)))
1204 out, err = sp.communicate()
1205 out = out.decode().strip()
1206 if re.match('[0-9a-f]+', out):
1207 write_string('[debug] Git HEAD: ' + out + '\n')
1213 write_string('[debug] Python version %s - %s' %
1214 (platform.python_version(), platform_name()) + '\n')
1217 for handler in self._opener.handlers:
1218 if hasattr(handler, 'proxies'):
1219 proxy_map.update(handler.proxies)
1220 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1222 def _setup_opener(self):
1223 timeout_val = self.params.get('socket_timeout')
1224 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1226 opts_cookiefile = self.params.get('cookiefile')
1227 opts_proxy = self.params.get('proxy')
1229 if opts_cookiefile is None:
1230 self.cookiejar = compat_cookiejar.CookieJar()
1232 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1234 if os.access(opts_cookiefile, os.R_OK):
1235 self.cookiejar.load()
1237 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1239 if opts_proxy is not None:
1240 if opts_proxy == '':
1243 proxies = {'http': opts_proxy, 'https': opts_proxy}
1245 proxies = compat_urllib_request.getproxies()
1246 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1247 if 'http' in proxies and 'https' not in proxies:
1248 proxies['https'] = proxies['http']
1249 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1251 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1252 https_handler = make_HTTPS_handler(
1253 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1254 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1255 opener = compat_urllib_request.build_opener(
1256 https_handler, proxy_handler, cookie_processor, ydlh)
1257 # Delete the default user-agent header, which would otherwise apply in
1258 # cases where our custom HTTP handler doesn't come into play
1259 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1260 opener.addheaders = []
1261 self._opener = opener