2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
52 UnavailableVideoError,
59 from .extractor import get_info_extractor, gen_extractors
60 from .downloader import get_suitable_downloader
61 from .postprocessor import FFmpegMergerPP
62 from .version import __version__
65 class YoutubeDL(object):
68 YoutubeDL objects are the ones responsible of downloading the
69 actual video file and writing it to disk if the user has requested
70 it, among some other tasks. In most cases there should be one per
71 program. As, given a video URL, the downloader doesn't know how to
72 extract all the needed information, task that InfoExtractors do, it
73 has to pass the URL to one of them.
75 For this, YoutubeDL objects have a method that allows
76 InfoExtractors to be registered in a given order. When it is passed
77 a URL, the YoutubeDL object handles it to the first InfoExtractor it
78 finds that reports being able to handle it. The InfoExtractor extracts
79 all the information about the video or videos the URL refers to, and
80 YoutubeDL process the extracted information, possibly using a File
81 Downloader to download the video.
83 YoutubeDL objects accept a lot of parameters. In order not to saturate
84 the object constructor with arguments, it receives a dictionary of
85 options instead. These options are available through the params
86 attribute for the InfoExtractors to use. The YoutubeDL also
87 registers itself as the downloader in charge for the InfoExtractors
88 that are added to it, so this is a "mutual registration".
92 username: Username for authentication purposes.
93 password: Password for authentication purposes.
94 videopassword: Password for acces a video.
95 usenetrc: Use netrc for authentication instead.
96 verbose: Print additional info to stdout.
97 quiet: Do not print messages to stdout.
98 no_warnings: Do not print out anything for warnings.
99 forceurl: Force printing final URL.
100 forcetitle: Force printing title.
101 forceid: Force printing ID.
102 forcethumbnail: Force printing thumbnail URL.
103 forcedescription: Force printing description.
104 forcefilename: Force printing final filename.
105 forceduration: Force printing duration.
106 forcejson: Force printing info_dict as JSON.
107 simulate: Do not download the video files.
108 format: Video format code.
109 format_limit: Highest quality format to try.
110 outtmpl: Template for output names.
111 restrictfilenames: Do not allow "&" and spaces in file names
112 ignoreerrors: Do not stop on download errors.
113 nooverwrites: Prevent overwriting files.
114 playliststart: Playlist item to start at.
115 playlistend: Playlist item to end at.
116 matchtitle: Download only matching titles.
117 rejecttitle: Reject downloads for matching titles.
118 logger: Log messages to a logging.Logger instance.
119 logtostderr: Log messages to stderr instead of stdout.
120 writedescription: Write the video description to a .description file
121 writeinfojson: Write the video description to a .info.json file
122 writeannotations: Write the video annotations to a .annotations.xml file
123 writethumbnail: Write the thumbnail image to a file
124 writesubtitles: Write the video subtitles to a file
125 writeautomaticsub: Write the automatic subtitles to a file
126 allsubtitles: Downloads all the subtitles of the video
127 (requires writesubtitles or writeautomaticsub)
128 listsubtitles: Lists all available subtitles for the video
129 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
130 subtitleslangs: List of languages of the subtitles to download
131 keepvideo: Keep the video file after post-processing
132 daterange: A DateRange object, download only if the upload_date is in the range.
133 skip_download: Skip the actual download of the video file
134 cachedir: Location of the cache files in the filesystem.
135 None to disable filesystem cache.
136 noplaylist: Download single video instead of a playlist if in doubt.
137 age_limit: An integer representing the user's age in years.
138 Unsuitable videos for the given age are skipped.
139 min_views: An integer representing the minimum view count the video
140 must have in order to not be skipped.
141 Videos without view count information are always
142 downloaded. None for no limit.
143 max_views: An integer representing the maximum view count.
144 Videos that are more popular than that are not
146 Videos without view count information are always
147 downloaded. None for no limit.
148 download_archive: File name of a file where all downloads are recorded.
149 Videos already present in the file are not downloaded
151 cookiefile: File name where cookies should be read from and dumped to.
152 nocheckcertificate:Do not verify SSL certificates
153 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
154 At the moment, this is only supported by YouTube.
155 proxy: URL of the proxy server to use
156 socket_timeout: Time to wait for unresponsive hosts, in seconds
157 bidi_workaround: Work around buggy terminals without bidirectional text
158 support, using fridibi
159 debug_printtraffic:Print out sent and received HTTP traffic
160 include_ads: Download ads as well
161 default_search: Prepend this string if an input url is not valid.
162 'auto' for elaborate guessing
163 encoding: Use this encoding instead of the system-specified.
165 The following parameters are not used by YoutubeDL itself, they are used by
167 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
168 noresizebuffer, retries, continuedl, noprogress, consoletitle
170 The following options are used by the post processors:
171 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
172 otherwise prefer avconv.
178 _download_retcode = None
179 _num_downloads = None
182 def __init__(self, params=None):
183 """Create a FileDownloader object with the given options."""
187 self._ies_instances = {}
189 self._progress_hooks = []
190 self._download_retcode = 0
191 self._num_downloads = 0
192 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
193 self._err_file = sys.stderr
196 if params.get('bidi_workaround', False):
199 master, slave = pty.openpty()
200 width = get_term_width()
204 width_args = ['-w', str(width)]
206 stdin=subprocess.PIPE,
208 stderr=self._err_file)
210 self._output_process = subprocess.Popen(
211 ['bidiv'] + width_args, **sp_kwargs
214 self._output_process = subprocess.Popen(
215 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
216 self._output_channel = os.fdopen(master, 'rb')
217 except OSError as ose:
219 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
223 if (sys.version_info >= (3,) and sys.platform != 'win32' and
224 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
225 and not params['restrictfilenames']):
226 # On Python 3, the Unicode filesystem API will throw errors (#1474)
228 'Assuming --restrict-filenames since file system encoding '
229 'cannot encode all charactes. '
230 'Set the LC_ALL environment variable to fix this.')
231 self.params['restrictfilenames'] = True
233 if '%(stitle)s' in self.params.get('outtmpl', ''):
234 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
238 def add_info_extractor(self, ie):
239 """Add an InfoExtractor object to the end of the list."""
241 self._ies_instances[ie.ie_key()] = ie
242 ie.set_downloader(self)
244 def get_info_extractor(self, ie_key):
246 Get an instance of an IE with name ie_key, it will try to get one from
247 the _ies list, if there's no instance it will create a new one and add
248 it to the extractor list.
250 ie = self._ies_instances.get(ie_key)
252 ie = get_info_extractor(ie_key)()
253 self.add_info_extractor(ie)
256 def add_default_info_extractors(self):
258 Add the InfoExtractors returned by gen_extractors to the end of the list
260 for ie in gen_extractors():
261 self.add_info_extractor(ie)
263 def add_post_processor(self, pp):
264 """Add a PostProcessor object to the end of the chain."""
266 pp.set_downloader(self)
268 def add_progress_hook(self, ph):
269 """Add the progress hook (currently only for the file downloader)"""
270 self._progress_hooks.append(ph)
272 def _bidi_workaround(self, message):
273 if not hasattr(self, '_output_channel'):
276 assert hasattr(self, '_output_process')
277 assert type(message) == type('')
278 line_count = message.count('\n') + 1
279 self._output_process.stdin.write((message + '\n').encode('utf-8'))
280 self._output_process.stdin.flush()
281 res = ''.join(self._output_channel.readline().decode('utf-8')
282 for _ in range(line_count))
283 return res[:-len('\n')]
285 def to_screen(self, message, skip_eol=False):
286 """Print message to stdout if not in quiet mode."""
287 return self.to_stdout(message, skip_eol, check_quiet=True)
289 def to_stdout(self, message, skip_eol=False, check_quiet=False):
290 """Print message to stdout if not in quiet mode."""
291 if self.params.get('logger'):
292 self.params['logger'].debug(message)
293 elif not check_quiet or not self.params.get('quiet', False):
294 message = self._bidi_workaround(message)
295 terminator = ['\n', ''][skip_eol]
296 output = message + terminator
298 write_string(output, self._screen_file)
300 def to_stderr(self, message):
301 """Print message to stderr."""
302 assert type(message) == type('')
303 if self.params.get('logger'):
304 self.params['logger'].error(message)
306 message = self._bidi_workaround(message)
307 output = message + '\n'
308 write_string(output, self._err_file)
310 def to_console_title(self, message):
311 if not self.params.get('consoletitle', False):
313 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
314 # c_wchar_p() might not be necessary if `message` is
315 # already of type unicode()
316 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
317 elif 'TERM' in os.environ:
318 write_string('\033]0;%s\007' % message, self._screen_file)
320 def save_console_title(self):
321 if not self.params.get('consoletitle', False):
323 if 'TERM' in os.environ:
324 # Save the title on stack
325 write_string('\033[22;0t', self._screen_file)
327 def restore_console_title(self):
328 if not self.params.get('consoletitle', False):
330 if 'TERM' in os.environ:
331 # Restore the title from stack
332 write_string('\033[23;0t', self._screen_file)
335 self.save_console_title()
338 def __exit__(self, *args):
339 self.restore_console_title()
341 if self.params.get('cookiefile') is not None:
342 self.cookiejar.save()
344 def trouble(self, message=None, tb=None):
345 """Determine action to take when a download problem appears.
347 Depending on if the downloader has been configured to ignore
348 download errors or not, this method may throw an exception or
349 not when errors are found, after printing the message.
351 tb, if given, is additional traceback information.
353 if message is not None:
354 self.to_stderr(message)
355 if self.params.get('verbose'):
357 if sys.exc_info()[0]: # if .trouble has been called from an except block
359 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
360 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
361 tb += compat_str(traceback.format_exc())
363 tb_data = traceback.format_list(traceback.extract_stack())
364 tb = ''.join(tb_data)
366 if not self.params.get('ignoreerrors', False):
367 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
368 exc_info = sys.exc_info()[1].exc_info
370 exc_info = sys.exc_info()
371 raise DownloadError(message, exc_info)
372 self._download_retcode = 1
374 def report_warning(self, message):
376 Print the message to stderr, it will be prefixed with 'WARNING:'
377 If stderr is a tty file the 'WARNING:' will be colored
379 if self.params.get('logger') is not None:
380 self.params['logger'].warning(message)
382 if self.params.get('no_warnings'):
384 if self._err_file.isatty() and os.name != 'nt':
385 _msg_header = '\033[0;33mWARNING:\033[0m'
387 _msg_header = 'WARNING:'
388 warning_message = '%s %s' % (_msg_header, message)
389 self.to_stderr(warning_message)
391 def report_error(self, message, tb=None):
393 Do the same as trouble, but prefixes the message with 'ERROR:', colored
394 in red if stderr is a tty file.
396 if self._err_file.isatty() and os.name != 'nt':
397 _msg_header = '\033[0;31mERROR:\033[0m'
399 _msg_header = 'ERROR:'
400 error_message = '%s %s' % (_msg_header, message)
401 self.trouble(error_message, tb)
403 def report_file_already_downloaded(self, file_name):
404 """Report file has already been fully downloaded."""
406 self.to_screen('[download] %s has already been downloaded' % file_name)
407 except UnicodeEncodeError:
408 self.to_screen('[download] The file has already been downloaded')
410 def prepare_filename(self, info_dict):
411 """Generate the output filename."""
413 template_dict = dict(info_dict)
415 template_dict['epoch'] = int(time.time())
416 autonumber_size = self.params.get('autonumber_size')
417 if autonumber_size is None:
419 autonumber_templ = '%0' + str(autonumber_size) + 'd'
420 template_dict['autonumber'] = autonumber_templ % self._num_downloads
421 if template_dict.get('playlist_index') is not None:
422 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
423 if template_dict.get('resolution') is None:
424 if template_dict.get('width') and template_dict.get('height'):
425 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
426 elif template_dict.get('height'):
427 template_dict['resolution'] = '%sp' % template_dict['height']
428 elif template_dict.get('width'):
429 template_dict['resolution'] = '?x%d' % template_dict['width']
431 sanitize = lambda k, v: sanitize_filename(
433 restricted=self.params.get('restrictfilenames'),
435 template_dict = dict((k, sanitize(k, v))
436 for k, v in template_dict.items()
438 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
440 tmpl = os.path.expanduser(self.params['outtmpl'])
441 filename = tmpl % template_dict
443 except ValueError as err:
444 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
447 def _match_entry(self, info_dict):
448 """ Returns None iff the file should be downloaded """
450 video_title = info_dict.get('title', info_dict.get('id', 'video'))
451 if 'title' in info_dict:
452 # This can happen when we're just evaluating the playlist
453 title = info_dict['title']
454 matchtitle = self.params.get('matchtitle', False)
456 if not re.search(matchtitle, title, re.IGNORECASE):
457 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
458 rejecttitle = self.params.get('rejecttitle', False)
460 if re.search(rejecttitle, title, re.IGNORECASE):
461 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
462 date = info_dict.get('upload_date', None)
464 dateRange = self.params.get('daterange', DateRange())
465 if date not in dateRange:
466 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
467 view_count = info_dict.get('view_count', None)
468 if view_count is not None:
469 min_views = self.params.get('min_views')
470 if min_views is not None and view_count < min_views:
471 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
472 max_views = self.params.get('max_views')
473 if max_views is not None and view_count > max_views:
474 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
475 age_limit = self.params.get('age_limit')
476 if age_limit is not None:
477 if age_limit < info_dict.get('age_limit', 0):
478 return 'Skipping "' + title + '" because it is age restricted'
479 if self.in_download_archive(info_dict):
480 return '%s has already been recorded in archive' % video_title
484 def add_extra_info(info_dict, extra_info):
485 '''Set the keys from extra_info in info dict if they are missing'''
486 for key, value in extra_info.items():
487 info_dict.setdefault(key, value)
489 def extract_info(self, url, download=True, ie_key=None, extra_info={},
492 Returns a list with a dictionary for each video we find.
493 If 'download', also downloads the videos.
494 extra_info is a dict containing the extra values to add to each result
498 ies = [self.get_info_extractor(ie_key)]
503 if not ie.suitable(url):
507 self.report_warning('The program functionality for this site has been marked as broken, '
508 'and will probably not work.')
511 ie_result = ie.extract(url)
512 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
514 if isinstance(ie_result, list):
515 # Backwards compatibility: old IE result format
517 '_type': 'compat_list',
518 'entries': ie_result,
520 self.add_default_extra_info(ie_result, ie, url)
522 return self.process_ie_result(ie_result, download, extra_info)
525 except ExtractorError as de: # An error we somewhat expected
526 self.report_error(compat_str(de), de.format_traceback())
528 except MaxDownloadsReached:
530 except Exception as e:
531 if self.params.get('ignoreerrors', False):
532 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
537 self.report_error('no suitable InfoExtractor for URL %s' % url)
539 def add_default_extra_info(self, ie_result, ie, url):
540 self.add_extra_info(ie_result, {
541 'extractor': ie.IE_NAME,
543 'webpage_url_basename': url_basename(url),
544 'extractor_key': ie.ie_key(),
547 def process_ie_result(self, ie_result, download=True, extra_info={}):
549 Take the result of the ie(may be modified) and resolve all unresolved
550 references (URLs, playlist items).
552 It will also download the videos if 'download'.
553 Returns the resolved ie_result.
556 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
557 if result_type == 'video':
558 self.add_extra_info(ie_result, extra_info)
559 return self.process_video_result(ie_result, download=download)
560 elif result_type == 'url':
561 # We have to add extra_info to the results because it may be
562 # contained in a playlist
563 return self.extract_info(ie_result['url'],
565 ie_key=ie_result.get('ie_key'),
566 extra_info=extra_info)
567 elif result_type == 'url_transparent':
568 # Use the information from the embedding page
569 info = self.extract_info(
570 ie_result['url'], ie_key=ie_result.get('ie_key'),
571 extra_info=extra_info, download=False, process=False)
573 def make_result(embedded_info):
574 new_result = ie_result.copy()
575 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
576 'entries', 'ie_key', 'duration',
577 'subtitles', 'annotations', 'format',
578 'thumbnail', 'thumbnails'):
581 if f in embedded_info:
582 new_result[f] = embedded_info[f]
584 new_result = make_result(info)
586 assert new_result.get('_type') != 'url_transparent'
587 if new_result.get('_type') == 'compat_list':
588 new_result['entries'] = [
589 make_result(e) for e in new_result['entries']]
591 return self.process_ie_result(
592 new_result, download=download, extra_info=extra_info)
593 elif result_type == 'playlist':
594 # We process each entry in the playlist
595 playlist = ie_result.get('title', None) or ie_result.get('id', None)
596 self.to_screen('[download] Downloading playlist: %s' % playlist)
598 playlist_results = []
600 playliststart = self.params.get('playliststart', 1) - 1
601 playlistend = self.params.get('playlistend', None)
602 # For backwards compatibility, interpret -1 as whole list
603 if playlistend == -1:
606 if isinstance(ie_result['entries'], list):
607 n_all_entries = len(ie_result['entries'])
608 entries = ie_result['entries'][playliststart:playlistend]
609 n_entries = len(entries)
611 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
612 (ie_result['extractor'], playlist, n_all_entries, n_entries))
614 assert isinstance(ie_result['entries'], PagedList)
615 entries = ie_result['entries'].getslice(
616 playliststart, playlistend)
617 n_entries = len(entries)
619 "[%s] playlist %s: Downloading %d videos" %
620 (ie_result['extractor'], playlist, n_entries))
622 for i, entry in enumerate(entries, 1):
623 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
625 'playlist': playlist,
626 'playlist_index': i + playliststart,
627 'extractor': ie_result['extractor'],
628 'webpage_url': ie_result['webpage_url'],
629 'webpage_url_basename': url_basename(ie_result['webpage_url']),
630 'extractor_key': ie_result['extractor_key'],
633 reason = self._match_entry(entry)
634 if reason is not None:
635 self.to_screen('[download] ' + reason)
638 entry_result = self.process_ie_result(entry,
641 playlist_results.append(entry_result)
642 ie_result['entries'] = playlist_results
644 elif result_type == 'compat_list':
646 self.add_extra_info(r,
648 'extractor': ie_result['extractor'],
649 'webpage_url': ie_result['webpage_url'],
650 'webpage_url_basename': url_basename(ie_result['webpage_url']),
651 'extractor_key': ie_result['extractor_key'],
654 ie_result['entries'] = [
655 self.process_ie_result(_fixup(r), download, extra_info)
656 for r in ie_result['entries']
660 raise Exception('Invalid result type: %s' % result_type)
662 def select_format(self, format_spec, available_formats):
663 if format_spec == 'best' or format_spec is None:
664 return available_formats[-1]
665 elif format_spec == 'worst':
666 return available_formats[0]
667 elif format_spec == 'bestaudio':
669 f for f in available_formats
670 if f.get('vcodec') == 'none']
672 return audio_formats[-1]
673 elif format_spec == 'worstaudio':
675 f for f in available_formats
676 if f.get('vcodec') == 'none']
678 return audio_formats[0]
679 elif format_spec == 'bestvideo':
681 f for f in available_formats
682 if f.get('acodec') == 'none']
684 return video_formats[-1]
685 elif format_spec == 'worstvideo':
687 f for f in available_formats
688 if f.get('acodec') == 'none']
690 return video_formats[0]
692 extensions = ['mp4', 'flv', 'webm', '3gp']
693 if format_spec in extensions:
694 filter_f = lambda f: f['ext'] == format_spec
696 filter_f = lambda f: f['format_id'] == format_spec
697 matches = list(filter(filter_f, available_formats))
702 def process_video_result(self, info_dict, download=True):
703 assert info_dict.get('_type', 'video') == 'video'
705 if 'playlist' not in info_dict:
706 # It isn't part of a playlist
707 info_dict['playlist'] = None
708 info_dict['playlist_index'] = None
710 if 'display_id' not in info_dict and 'id' in info_dict:
711 info_dict['display_id'] = info_dict['id']
713 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
714 upload_date = datetime.datetime.utcfromtimestamp(
715 info_dict['timestamp'])
716 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
718 # This extractors handle format selection themselves
719 if info_dict['extractor'] in ['Youku']:
721 self.process_info(info_dict)
724 # We now pick which formats have to be downloaded
725 if info_dict.get('formats') is None:
726 # There's only one format available
727 formats = [info_dict]
729 formats = info_dict['formats']
732 raise ExtractorError('No video formats found!')
734 # We check that all the formats have the format and format_id fields
735 for i, format in enumerate(formats):
736 if format.get('format_id') is None:
737 format['format_id'] = compat_str(i)
738 if format.get('format') is None:
739 format['format'] = '{id} - {res}{note}'.format(
740 id=format['format_id'],
741 res=self.format_resolution(format),
742 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
744 # Automatically determine file extension if missing
745 if 'ext' not in format:
746 format['ext'] = determine_ext(format['url'])
748 format_limit = self.params.get('format_limit', None)
750 formats = list(takewhile_inclusive(
751 lambda f: f['format_id'] != format_limit, formats
754 # TODO Central sorting goes here
756 if formats[0] is not info_dict:
757 # only set the 'formats' fields if the original info_dict list them
758 # otherwise we end up with a circular reference, the first (and unique)
759 # element in the 'formats' field in info_dict is info_dict itself,
760 # wich can't be exported to json
761 info_dict['formats'] = formats
762 if self.params.get('listformats', None):
763 self.list_formats(info_dict)
766 req_format = self.params.get('format')
767 if req_format is None:
769 formats_to_download = []
770 # The -1 is for supporting YoutubeIE
771 if req_format in ('-1', 'all'):
772 formats_to_download = formats
774 # We can accept formats requested in the format: 34/5/best, we pick
775 # the first that is available, starting from left
776 req_formats = req_format.split('/')
777 for rf in req_formats:
778 if re.match(r'.+?\+.+?', rf) is not None:
779 # Two formats have been requested like '137+139'
780 format_1, format_2 = rf.split('+')
781 formats_info = (self.select_format(format_1, formats),
782 self.select_format(format_2, formats))
783 if all(formats_info):
785 'requested_formats': formats_info,
787 'ext': formats_info[0]['ext'],
790 selected_format = None
792 selected_format = self.select_format(rf, formats)
793 if selected_format is not None:
794 formats_to_download = [selected_format]
796 if not formats_to_download:
797 raise ExtractorError('requested format not available',
801 if len(formats_to_download) > 1:
802 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
803 for format in formats_to_download:
804 new_info = dict(info_dict)
805 new_info.update(format)
806 self.process_info(new_info)
807 # We update the info dict with the best quality format (backwards compatibility)
808 info_dict.update(formats_to_download[-1])
811 def process_info(self, info_dict):
812 """Process a single resolved IE result."""
814 assert info_dict.get('_type', 'video') == 'video'
816 max_downloads = self.params.get('max_downloads')
817 if max_downloads is not None:
818 if self._num_downloads >= int(max_downloads):
819 raise MaxDownloadsReached()
821 info_dict['fulltitle'] = info_dict['title']
822 if len(info_dict['title']) > 200:
823 info_dict['title'] = info_dict['title'][:197] + '...'
825 # Keep for backwards compatibility
826 info_dict['stitle'] = info_dict['title']
828 if not 'format' in info_dict:
829 info_dict['format'] = info_dict['ext']
831 reason = self._match_entry(info_dict)
832 if reason is not None:
833 self.to_screen('[download] ' + reason)
836 self._num_downloads += 1
838 filename = self.prepare_filename(info_dict)
841 if self.params.get('forcetitle', False):
842 self.to_stdout(info_dict['fulltitle'])
843 if self.params.get('forceid', False):
844 self.to_stdout(info_dict['id'])
845 if self.params.get('forceurl', False):
846 # For RTMP URLs, also include the playpath
847 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
848 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
849 self.to_stdout(info_dict['thumbnail'])
850 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
851 self.to_stdout(info_dict['description'])
852 if self.params.get('forcefilename', False) and filename is not None:
853 self.to_stdout(filename)
854 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
855 self.to_stdout(formatSeconds(info_dict['duration']))
856 if self.params.get('forceformat', False):
857 self.to_stdout(info_dict['format'])
858 if self.params.get('forcejson', False):
859 info_dict['_filename'] = filename
860 self.to_stdout(json.dumps(info_dict))
862 # Do nothing else if in simulate mode
863 if self.params.get('simulate', False):
870 dn = os.path.dirname(encodeFilename(filename))
871 if dn != '' and not os.path.exists(dn):
873 except (OSError, IOError) as err:
874 self.report_error('unable to create directory ' + compat_str(err))
877 if self.params.get('writedescription', False):
878 descfn = filename + '.description'
879 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
880 self.to_screen('[info] Video description is already present')
883 self.to_screen('[info] Writing video description to: ' + descfn)
884 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
885 descfile.write(info_dict['description'])
886 except (KeyError, TypeError):
887 self.report_warning('There\'s no description to write.')
888 except (OSError, IOError):
889 self.report_error('Cannot write description file ' + descfn)
892 if self.params.get('writeannotations', False):
893 annofn = filename + '.annotations.xml'
894 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
895 self.to_screen('[info] Video annotations are already present')
898 self.to_screen('[info] Writing video annotations to: ' + annofn)
899 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
900 annofile.write(info_dict['annotations'])
901 except (KeyError, TypeError):
902 self.report_warning('There are no annotations to write.')
903 except (OSError, IOError):
904 self.report_error('Cannot write annotations file: ' + annofn)
907 subtitles_are_requested = any([self.params.get('writesubtitles', False),
908 self.params.get('writeautomaticsub')])
910 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
911 # subtitles download errors are already managed as troubles in relevant IE
912 # that way it will silently go on when used with unsupporting IE
913 subtitles = info_dict['subtitles']
914 sub_format = self.params.get('subtitlesformat', 'srt')
915 for sub_lang in subtitles.keys():
916 sub = subtitles[sub_lang]
920 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
921 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
922 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
924 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
925 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
927 except (OSError, IOError):
928 self.report_error('Cannot write subtitles file ' + descfn)
931 if self.params.get('writeinfojson', False):
932 infofn = os.path.splitext(filename)[0] + '.info.json'
933 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
934 self.to_screen('[info] Video description metadata is already present')
936 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
938 write_json_file(info_dict, encodeFilename(infofn))
939 except (OSError, IOError):
940 self.report_error('Cannot write metadata to JSON file ' + infofn)
943 if self.params.get('writethumbnail', False):
944 if info_dict.get('thumbnail') is not None:
945 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
946 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
947 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
948 self.to_screen('[%s] %s: Thumbnail is already present' %
949 (info_dict['extractor'], info_dict['id']))
951 self.to_screen('[%s] %s: Downloading thumbnail ...' %
952 (info_dict['extractor'], info_dict['id']))
954 uf = self.urlopen(info_dict['thumbnail'])
955 with open(thumb_filename, 'wb') as thumbf:
956 shutil.copyfileobj(uf, thumbf)
957 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
958 (info_dict['extractor'], info_dict['id'], thumb_filename))
959 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
960 self.report_warning('Unable to download thumbnail "%s": %s' %
961 (info_dict['thumbnail'], compat_str(err)))
963 if not self.params.get('skip_download', False):
964 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
969 fd = get_suitable_downloader(info)(self, self.params)
970 for ph in self._progress_hooks:
971 fd.add_progress_hook(ph)
972 return fd.download(name, info)
973 if info_dict.get('requested_formats') is not None:
976 merger = FFmpegMergerPP(self)
977 if not merger._get_executable():
979 self.report_warning('You have requested multiple '
980 'formats but ffmpeg or avconv are not installed.'
981 ' The formats won\'t be merged')
983 postprocessors = [merger]
984 for f in info_dict['requested_formats']:
985 new_info = dict(info_dict)
987 fname = self.prepare_filename(new_info)
988 fname = prepend_extension(fname, 'f%s' % f['format_id'])
989 downloaded.append(fname)
990 partial_success = dl(fname, new_info)
991 success = success and partial_success
992 info_dict['__postprocessors'] = postprocessors
993 info_dict['__files_to_merge'] = downloaded
996 success = dl(filename, info_dict)
997 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
998 self.report_error('unable to download video data: %s' % str(err))
1000 except (OSError, IOError) as err:
1001 raise UnavailableVideoError(err)
1002 except (ContentTooShortError, ) as err:
1003 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1008 self.post_process(filename, info_dict)
1009 except (PostProcessingError) as err:
1010 self.report_error('postprocessing: %s' % str(err))
1013 self.record_download_archive(info_dict)
1015 def download(self, url_list):
1016 """Download a given list of URLs."""
1017 if (len(url_list) > 1 and
1018 '%' not in self.params['outtmpl']
1019 and self.params.get('max_downloads') != 1):
1020 raise SameFileError(self.params['outtmpl'])
1022 for url in url_list:
1024 #It also downloads the videos
1025 self.extract_info(url)
1026 except UnavailableVideoError:
1027 self.report_error('unable to download video')
1028 except MaxDownloadsReached:
1029 self.to_screen('[info] Maximum number of downloaded files reached.')
1032 return self._download_retcode
1034 def download_with_info_file(self, info_filename):
1035 with io.open(info_filename, 'r', encoding='utf-8') as f:
1038 self.process_ie_result(info, download=True)
1039 except DownloadError:
1040 webpage_url = info.get('webpage_url')
1041 if webpage_url is not None:
1042 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1043 return self.download([webpage_url])
1046 return self._download_retcode
1048 def post_process(self, filename, ie_info):
1049 """Run all the postprocessors on the given file."""
1050 info = dict(ie_info)
1051 info['filepath'] = filename
1054 if ie_info.get('__postprocessors') is not None:
1055 pps_chain.extend(ie_info['__postprocessors'])
1056 pps_chain.extend(self._pps)
1057 for pp in pps_chain:
1059 keep_video_wish, new_info = pp.run(info)
1060 if keep_video_wish is not None:
1062 keep_video = keep_video_wish
1063 elif keep_video is None:
1064 # No clear decision yet, let IE decide
1065 keep_video = keep_video_wish
1066 except PostProcessingError as e:
1067 self.report_error(e.msg)
1068 if keep_video is False and not self.params.get('keepvideo', False):
1070 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1071 os.remove(encodeFilename(filename))
1072 except (IOError, OSError):
1073 self.report_warning('Unable to remove downloaded video file')
1075 def _make_archive_id(self, info_dict):
1076 # Future-proof against any change in case
1077 # and backwards compatibility with prior versions
1078 extractor = info_dict.get('extractor_key')
1079 if extractor is None:
1080 if 'id' in info_dict:
1081 extractor = info_dict.get('ie_key') # key in a playlist
1082 if extractor is None:
1083 return None # Incomplete video information
1084 return extractor.lower() + ' ' + info_dict['id']
1086 def in_download_archive(self, info_dict):
1087 fn = self.params.get('download_archive')
1091 vid_id = self._make_archive_id(info_dict)
1093 return False # Incomplete video information
1096 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1097 for line in archive_file:
1098 if line.strip() == vid_id:
1100 except IOError as ioe:
1101 if ioe.errno != errno.ENOENT:
1105 def record_download_archive(self, info_dict):
1106 fn = self.params.get('download_archive')
1109 vid_id = self._make_archive_id(info_dict)
1111 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1112 archive_file.write(vid_id + '\n')
1115 def format_resolution(format, default='unknown'):
1116 if format.get('vcodec') == 'none':
1118 if format.get('resolution') is not None:
1119 return format['resolution']
1120 if format.get('height') is not None:
1121 if format.get('width') is not None:
1122 res = '%sx%s' % (format['width'], format['height'])
1124 res = '%sp' % format['height']
1125 elif format.get('width') is not None:
1126 res = '?x%d' % format['width']
1131 def list_formats(self, info_dict):
1132 def format_note(fdict):
1134 if fdict.get('ext') in ['f4f', 'f4m']:
1135 res += '(unsupported) '
1136 if fdict.get('format_note') is not None:
1137 res += fdict['format_note'] + ' '
1138 if fdict.get('tbr') is not None:
1139 res += '%4dk ' % fdict['tbr']
1140 if fdict.get('container') is not None:
1143 res += '%s container' % fdict['container']
1144 if (fdict.get('vcodec') is not None and
1145 fdict.get('vcodec') != 'none'):
1148 res += fdict['vcodec']
1149 if fdict.get('vbr') is not None:
1151 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1153 if fdict.get('vbr') is not None:
1154 res += '%4dk' % fdict['vbr']
1155 if fdict.get('acodec') is not None:
1158 if fdict['acodec'] == 'none':
1161 res += '%-5s' % fdict['acodec']
1162 elif fdict.get('abr') is not None:
1166 if fdict.get('abr') is not None:
1167 res += '@%3dk' % fdict['abr']
1168 if fdict.get('asr') is not None:
1169 res += ' (%5dHz)' % fdict['asr']
1170 if fdict.get('filesize') is not None:
1173 res += format_bytes(fdict['filesize'])
1176 def line(format, idlen=20):
1177 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1178 format['format_id'],
1180 self.format_resolution(format),
1181 format_note(format),
1184 formats = info_dict.get('formats', [info_dict])
1185 idlen = max(len('format code'),
1186 max(len(f['format_id']) for f in formats))
1187 formats_s = [line(f, idlen) for f in formats]
1188 if len(formats) > 1:
1189 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1190 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1192 header_line = line({
1193 'format_id': 'format code', 'ext': 'extension',
1194 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1195 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1196 (info_dict['id'], header_line, '\n'.join(formats_s)))
1198 def urlopen(self, req):
1199 """ Start an HTTP download """
1200 return self._opener.open(req, timeout=self._socket_timeout)
1202 def print_debug_header(self):
1203 if not self.params.get('verbose'):
1206 write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
1207 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
1208 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1210 sp = subprocess.Popen(
1211 ['git', 'rev-parse', '--short', 'HEAD'],
1212 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1213 cwd=os.path.dirname(os.path.abspath(__file__)))
1214 out, err = sp.communicate()
1215 out = out.decode().strip()
1216 if re.match('[0-9a-f]+', out):
1217 write_string('[debug] Git HEAD: ' + out + '\n')
1223 write_string('[debug] Python version %s - %s' %
1224 (platform.python_version(), platform_name()) + '\n')
1227 for handler in self._opener.handlers:
1228 if hasattr(handler, 'proxies'):
1229 proxy_map.update(handler.proxies)
1230 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1232 def _setup_opener(self):
1233 timeout_val = self.params.get('socket_timeout')
1234 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1236 opts_cookiefile = self.params.get('cookiefile')
1237 opts_proxy = self.params.get('proxy')
1239 if opts_cookiefile is None:
1240 self.cookiejar = compat_cookiejar.CookieJar()
1242 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1244 if os.access(opts_cookiefile, os.R_OK):
1245 self.cookiejar.load()
1247 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1249 if opts_proxy is not None:
1250 if opts_proxy == '':
1253 proxies = {'http': opts_proxy, 'https': opts_proxy}
1255 proxies = compat_urllib_request.getproxies()
1256 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1257 if 'http' in proxies and 'https' not in proxies:
1258 proxies['https'] = proxies['http']
1259 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1261 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1262 https_handler = make_HTTPS_handler(
1263 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1264 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1265 opener = compat_urllib_request.build_opener(
1266 https_handler, proxy_handler, cookie_processor, ydlh)
1267 # Delete the default user-agent header, which would otherwise apply in
1268 # cases where our custom HTTP handler doesn't come into play
1269 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1270 opener.addheaders = []
1271 self._opener = opener
1273 def encode(self, s):
1274 if isinstance(s, bytes):
1275 return s # Already encoded
1278 return s.encode(self.get_encoding())
1279 except UnicodeEncodeError as err:
1280 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1283 def get_encoding(self):
1284 encoding = self.params.get('encoding')
1285 if encoding is None:
1286 encoding = preferredencoding()