2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
52 UnavailableVideoError,
59 from .extractor import get_info_extractor, gen_extractors
60 from .downloader import get_suitable_downloader
61 from .postprocessor import FFmpegMergerPP
62 from .version import __version__
65 class YoutubeDL(object):
68 YoutubeDL objects are the ones responsible of downloading the
69 actual video file and writing it to disk if the user has requested
70 it, among some other tasks. In most cases there should be one per
71 program. As, given a video URL, the downloader doesn't know how to
72 extract all the needed information, task that InfoExtractors do, it
73 has to pass the URL to one of them.
75 For this, YoutubeDL objects have a method that allows
76 InfoExtractors to be registered in a given order. When it is passed
77 a URL, the YoutubeDL object handles it to the first InfoExtractor it
78 finds that reports being able to handle it. The InfoExtractor extracts
79 all the information about the video or videos the URL refers to, and
80 YoutubeDL process the extracted information, possibly using a File
81 Downloader to download the video.
83 YoutubeDL objects accept a lot of parameters. In order not to saturate
84 the object constructor with arguments, it receives a dictionary of
85 options instead. These options are available through the params
86 attribute for the InfoExtractors to use. The YoutubeDL also
87 registers itself as the downloader in charge for the InfoExtractors
88 that are added to it, so this is a "mutual registration".
92 username: Username for authentication purposes.
93 password: Password for authentication purposes.
94 videopassword: Password for acces a video.
95 usenetrc: Use netrc for authentication instead.
96 verbose: Print additional info to stdout.
97 quiet: Do not print messages to stdout.
98 no_warnings: Do not print out anything for warnings.
99 forceurl: Force printing final URL.
100 forcetitle: Force printing title.
101 forceid: Force printing ID.
102 forcethumbnail: Force printing thumbnail URL.
103 forcedescription: Force printing description.
104 forcefilename: Force printing final filename.
105 forceduration: Force printing duration.
106 forcejson: Force printing info_dict as JSON.
107 simulate: Do not download the video files.
108 format: Video format code.
109 format_limit: Highest quality format to try.
110 outtmpl: Template for output names.
111 restrictfilenames: Do not allow "&" and spaces in file names
112 ignoreerrors: Do not stop on download errors.
113 nooverwrites: Prevent overwriting files.
114 playliststart: Playlist item to start at.
115 playlistend: Playlist item to end at.
116 matchtitle: Download only matching titles.
117 rejecttitle: Reject downloads for matching titles.
118 logger: Log messages to a logging.Logger instance.
119 logtostderr: Log messages to stderr instead of stdout.
120 writedescription: Write the video description to a .description file
121 writeinfojson: Write the video description to a .info.json file
122 writeannotations: Write the video annotations to a .annotations.xml file
123 writethumbnail: Write the thumbnail image to a file
124 writesubtitles: Write the video subtitles to a file
125 writeautomaticsub: Write the automatic subtitles to a file
126 allsubtitles: Downloads all the subtitles of the video
127 (requires writesubtitles or writeautomaticsub)
128 listsubtitles: Lists all available subtitles for the video
129 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
130 subtitleslangs: List of languages of the subtitles to download
131 keepvideo: Keep the video file after post-processing
132 daterange: A DateRange object, download only if the upload_date is in the range.
133 skip_download: Skip the actual download of the video file
134 cachedir: Location of the cache files in the filesystem.
135 None to disable filesystem cache.
136 noplaylist: Download single video instead of a playlist if in doubt.
137 age_limit: An integer representing the user's age in years.
138 Unsuitable videos for the given age are skipped.
139 min_views: An integer representing the minimum view count the video
140 must have in order to not be skipped.
141 Videos without view count information are always
142 downloaded. None for no limit.
143 max_views: An integer representing the maximum view count.
144 Videos that are more popular than that are not
146 Videos without view count information are always
147 downloaded. None for no limit.
148 download_archive: File name of a file where all downloads are recorded.
149 Videos already present in the file are not downloaded
151 cookiefile: File name where cookies should be read from and dumped to.
152 nocheckcertificate:Do not verify SSL certificates
153 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
154 At the moment, this is only supported by YouTube.
155 proxy: URL of the proxy server to use
156 socket_timeout: Time to wait for unresponsive hosts, in seconds
157 bidi_workaround: Work around buggy terminals without bidirectional text
158 support, using fridibi
159 debug_printtraffic:Print out sent and received HTTP traffic
160 include_ads: Download ads as well
161 default_search: Prepend this string if an input url is not valid.
162 'auto' for elaborate guessing
163 encoding: Use this encoding instead of the system-specified.
165 The following parameters are not used by YoutubeDL itself, they are used by
167 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
168 noresizebuffer, retries, continuedl, noprogress, consoletitle
170 The following options are used by the post processors:
171 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
172 otherwise prefer avconv.
178 _download_retcode = None
179 _num_downloads = None
182 def __init__(self, params=None):
183 """Create a FileDownloader object with the given options."""
187 self._ies_instances = {}
189 self._progress_hooks = []
190 self._download_retcode = 0
191 self._num_downloads = 0
192 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
193 self._err_file = sys.stderr
196 if params.get('bidi_workaround', False):
199 master, slave = pty.openpty()
200 width = get_term_width()
204 width_args = ['-w', str(width)]
206 stdin=subprocess.PIPE,
208 stderr=self._err_file)
210 self._output_process = subprocess.Popen(
211 ['bidiv'] + width_args, **sp_kwargs
214 self._output_process = subprocess.Popen(
215 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
216 self._output_channel = os.fdopen(master, 'rb')
217 except OSError as ose:
219 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
223 if (sys.version_info >= (3,) and sys.platform != 'win32' and
224 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
225 and not params['restrictfilenames']):
226 # On Python 3, the Unicode filesystem API will throw errors (#1474)
228 'Assuming --restrict-filenames since file system encoding '
229 'cannot encode all charactes. '
230 'Set the LC_ALL environment variable to fix this.')
231 self.params['restrictfilenames'] = True
233 if '%(stitle)s' in self.params.get('outtmpl', ''):
234 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
238 def add_info_extractor(self, ie):
239 """Add an InfoExtractor object to the end of the list."""
241 self._ies_instances[ie.ie_key()] = ie
242 ie.set_downloader(self)
244 def get_info_extractor(self, ie_key):
246 Get an instance of an IE with name ie_key, it will try to get one from
247 the _ies list, if there's no instance it will create a new one and add
248 it to the extractor list.
250 ie = self._ies_instances.get(ie_key)
252 ie = get_info_extractor(ie_key)()
253 self.add_info_extractor(ie)
256 def add_default_info_extractors(self):
258 Add the InfoExtractors returned by gen_extractors to the end of the list
260 for ie in gen_extractors():
261 self.add_info_extractor(ie)
263 def add_post_processor(self, pp):
264 """Add a PostProcessor object to the end of the chain."""
266 pp.set_downloader(self)
268 def add_progress_hook(self, ph):
269 """Add the progress hook (currently only for the file downloader)"""
270 self._progress_hooks.append(ph)
272 def _bidi_workaround(self, message):
273 if not hasattr(self, '_output_channel'):
276 assert hasattr(self, '_output_process')
277 assert type(message) == type('')
278 line_count = message.count('\n') + 1
279 self._output_process.stdin.write((message + '\n').encode('utf-8'))
280 self._output_process.stdin.flush()
281 res = ''.join(self._output_channel.readline().decode('utf-8')
282 for _ in range(line_count))
283 return res[:-len('\n')]
285 def to_screen(self, message, skip_eol=False):
286 """Print message to stdout if not in quiet mode."""
287 return self.to_stdout(message, skip_eol, check_quiet=True)
289 def _write_string(self, s, out=None):
290 write_string(s, out=out, encoding=self.params.get('encoding'))
292 def to_stdout(self, message, skip_eol=False, check_quiet=False):
293 """Print message to stdout if not in quiet mode."""
294 if self.params.get('logger'):
295 self.params['logger'].debug(message)
296 elif not check_quiet or not self.params.get('quiet', False):
297 message = self._bidi_workaround(message)
298 terminator = ['\n', ''][skip_eol]
299 output = message + terminator
301 self._write_string(output, self._screen_file)
303 def to_stderr(self, message):
304 """Print message to stderr."""
305 assert type(message) == type('')
306 if self.params.get('logger'):
307 self.params['logger'].error(message)
309 message = self._bidi_workaround(message)
310 output = message + '\n'
311 self._write_string(output, self._err_file)
313 def to_console_title(self, message):
314 if not self.params.get('consoletitle', False):
316 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
317 # c_wchar_p() might not be necessary if `message` is
318 # already of type unicode()
319 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
320 elif 'TERM' in os.environ:
321 self._write_string('\033]0;%s\007' % message, self._screen_file)
323 def save_console_title(self):
324 if not self.params.get('consoletitle', False):
326 if 'TERM' in os.environ:
327 # Save the title on stack
328 self._write_string('\033[22;0t', self._screen_file)
330 def restore_console_title(self):
331 if not self.params.get('consoletitle', False):
333 if 'TERM' in os.environ:
334 # Restore the title from stack
335 self._write_string('\033[23;0t', self._screen_file)
338 self.save_console_title()
341 def __exit__(self, *args):
342 self.restore_console_title()
344 if self.params.get('cookiefile') is not None:
345 self.cookiejar.save()
347 def trouble(self, message=None, tb=None):
348 """Determine action to take when a download problem appears.
350 Depending on if the downloader has been configured to ignore
351 download errors or not, this method may throw an exception or
352 not when errors are found, after printing the message.
354 tb, if given, is additional traceback information.
356 if message is not None:
357 self.to_stderr(message)
358 if self.params.get('verbose'):
360 if sys.exc_info()[0]: # if .trouble has been called from an except block
362 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
363 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
364 tb += compat_str(traceback.format_exc())
366 tb_data = traceback.format_list(traceback.extract_stack())
367 tb = ''.join(tb_data)
369 if not self.params.get('ignoreerrors', False):
370 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
371 exc_info = sys.exc_info()[1].exc_info
373 exc_info = sys.exc_info()
374 raise DownloadError(message, exc_info)
375 self._download_retcode = 1
377 def report_warning(self, message):
379 Print the message to stderr, it will be prefixed with 'WARNING:'
380 If stderr is a tty file the 'WARNING:' will be colored
382 if self.params.get('logger') is not None:
383 self.params['logger'].warning(message)
385 if self.params.get('no_warnings'):
387 if self._err_file.isatty() and os.name != 'nt':
388 _msg_header = '\033[0;33mWARNING:\033[0m'
390 _msg_header = 'WARNING:'
391 warning_message = '%s %s' % (_msg_header, message)
392 self.to_stderr(warning_message)
394 def report_error(self, message, tb=None):
396 Do the same as trouble, but prefixes the message with 'ERROR:', colored
397 in red if stderr is a tty file.
399 if self._err_file.isatty() and os.name != 'nt':
400 _msg_header = '\033[0;31mERROR:\033[0m'
402 _msg_header = 'ERROR:'
403 error_message = '%s %s' % (_msg_header, message)
404 self.trouble(error_message, tb)
406 def report_file_already_downloaded(self, file_name):
407 """Report file has already been fully downloaded."""
409 self.to_screen('[download] %s has already been downloaded' % file_name)
410 except UnicodeEncodeError:
411 self.to_screen('[download] The file has already been downloaded')
413 def prepare_filename(self, info_dict):
414 """Generate the output filename."""
416 template_dict = dict(info_dict)
418 template_dict['epoch'] = int(time.time())
419 autonumber_size = self.params.get('autonumber_size')
420 if autonumber_size is None:
422 autonumber_templ = '%0' + str(autonumber_size) + 'd'
423 template_dict['autonumber'] = autonumber_templ % self._num_downloads
424 if template_dict.get('playlist_index') is not None:
425 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
426 if template_dict.get('resolution') is None:
427 if template_dict.get('width') and template_dict.get('height'):
428 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
429 elif template_dict.get('height'):
430 template_dict['resolution'] = '%sp' % template_dict['height']
431 elif template_dict.get('width'):
432 template_dict['resolution'] = '?x%d' % template_dict['width']
434 sanitize = lambda k, v: sanitize_filename(
436 restricted=self.params.get('restrictfilenames'),
438 template_dict = dict((k, sanitize(k, v))
439 for k, v in template_dict.items()
441 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
443 tmpl = os.path.expanduser(self.params['outtmpl'])
444 filename = tmpl % template_dict
446 except ValueError as err:
447 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
450 def _match_entry(self, info_dict):
451 """ Returns None iff the file should be downloaded """
453 video_title = info_dict.get('title', info_dict.get('id', 'video'))
454 if 'title' in info_dict:
455 # This can happen when we're just evaluating the playlist
456 title = info_dict['title']
457 matchtitle = self.params.get('matchtitle', False)
459 if not re.search(matchtitle, title, re.IGNORECASE):
460 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
461 rejecttitle = self.params.get('rejecttitle', False)
463 if re.search(rejecttitle, title, re.IGNORECASE):
464 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
465 date = info_dict.get('upload_date', None)
467 dateRange = self.params.get('daterange', DateRange())
468 if date not in dateRange:
469 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
470 view_count = info_dict.get('view_count', None)
471 if view_count is not None:
472 min_views = self.params.get('min_views')
473 if min_views is not None and view_count < min_views:
474 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
475 max_views = self.params.get('max_views')
476 if max_views is not None and view_count > max_views:
477 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
478 age_limit = self.params.get('age_limit')
479 if age_limit is not None:
480 if age_limit < info_dict.get('age_limit', 0):
481 return 'Skipping "' + title + '" because it is age restricted'
482 if self.in_download_archive(info_dict):
483 return '%s has already been recorded in archive' % video_title
487 def add_extra_info(info_dict, extra_info):
488 '''Set the keys from extra_info in info dict if they are missing'''
489 for key, value in extra_info.items():
490 info_dict.setdefault(key, value)
492 def extract_info(self, url, download=True, ie_key=None, extra_info={},
495 Returns a list with a dictionary for each video we find.
496 If 'download', also downloads the videos.
497 extra_info is a dict containing the extra values to add to each result
501 ies = [self.get_info_extractor(ie_key)]
506 if not ie.suitable(url):
510 self.report_warning('The program functionality for this site has been marked as broken, '
511 'and will probably not work.')
514 ie_result = ie.extract(url)
515 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
517 if isinstance(ie_result, list):
518 # Backwards compatibility: old IE result format
520 '_type': 'compat_list',
521 'entries': ie_result,
523 self.add_default_extra_info(ie_result, ie, url)
525 return self.process_ie_result(ie_result, download, extra_info)
528 except ExtractorError as de: # An error we somewhat expected
529 self.report_error(compat_str(de), de.format_traceback())
531 except MaxDownloadsReached:
533 except Exception as e:
534 if self.params.get('ignoreerrors', False):
535 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
540 self.report_error('no suitable InfoExtractor for URL %s' % url)
542 def add_default_extra_info(self, ie_result, ie, url):
543 self.add_extra_info(ie_result, {
544 'extractor': ie.IE_NAME,
546 'webpage_url_basename': url_basename(url),
547 'extractor_key': ie.ie_key(),
550 def process_ie_result(self, ie_result, download=True, extra_info={}):
552 Take the result of the ie(may be modified) and resolve all unresolved
553 references (URLs, playlist items).
555 It will also download the videos if 'download'.
556 Returns the resolved ie_result.
559 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
560 if result_type == 'video':
561 self.add_extra_info(ie_result, extra_info)
562 return self.process_video_result(ie_result, download=download)
563 elif result_type == 'url':
564 # We have to add extra_info to the results because it may be
565 # contained in a playlist
566 return self.extract_info(ie_result['url'],
568 ie_key=ie_result.get('ie_key'),
569 extra_info=extra_info)
570 elif result_type == 'url_transparent':
571 # Use the information from the embedding page
572 info = self.extract_info(
573 ie_result['url'], ie_key=ie_result.get('ie_key'),
574 extra_info=extra_info, download=False, process=False)
576 def make_result(embedded_info):
577 new_result = ie_result.copy()
578 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
579 'entries', 'ie_key', 'duration',
580 'subtitles', 'annotations', 'format',
581 'thumbnail', 'thumbnails'):
584 if f in embedded_info:
585 new_result[f] = embedded_info[f]
587 new_result = make_result(info)
589 assert new_result.get('_type') != 'url_transparent'
590 if new_result.get('_type') == 'compat_list':
591 new_result['entries'] = [
592 make_result(e) for e in new_result['entries']]
594 return self.process_ie_result(
595 new_result, download=download, extra_info=extra_info)
596 elif result_type == 'playlist':
597 # We process each entry in the playlist
598 playlist = ie_result.get('title', None) or ie_result.get('id', None)
599 self.to_screen('[download] Downloading playlist: %s' % playlist)
601 playlist_results = []
603 playliststart = self.params.get('playliststart', 1) - 1
604 playlistend = self.params.get('playlistend', None)
605 # For backwards compatibility, interpret -1 as whole list
606 if playlistend == -1:
609 if isinstance(ie_result['entries'], list):
610 n_all_entries = len(ie_result['entries'])
611 entries = ie_result['entries'][playliststart:playlistend]
612 n_entries = len(entries)
614 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
615 (ie_result['extractor'], playlist, n_all_entries, n_entries))
617 assert isinstance(ie_result['entries'], PagedList)
618 entries = ie_result['entries'].getslice(
619 playliststart, playlistend)
620 n_entries = len(entries)
622 "[%s] playlist %s: Downloading %d videos" %
623 (ie_result['extractor'], playlist, n_entries))
625 for i, entry in enumerate(entries, 1):
626 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
628 'playlist': playlist,
629 'playlist_index': i + playliststart,
630 'extractor': ie_result['extractor'],
631 'webpage_url': ie_result['webpage_url'],
632 'webpage_url_basename': url_basename(ie_result['webpage_url']),
633 'extractor_key': ie_result['extractor_key'],
636 reason = self._match_entry(entry)
637 if reason is not None:
638 self.to_screen('[download] ' + reason)
641 entry_result = self.process_ie_result(entry,
644 playlist_results.append(entry_result)
645 ie_result['entries'] = playlist_results
647 elif result_type == 'compat_list':
649 self.add_extra_info(r,
651 'extractor': ie_result['extractor'],
652 'webpage_url': ie_result['webpage_url'],
653 'webpage_url_basename': url_basename(ie_result['webpage_url']),
654 'extractor_key': ie_result['extractor_key'],
657 ie_result['entries'] = [
658 self.process_ie_result(_fixup(r), download, extra_info)
659 for r in ie_result['entries']
663 raise Exception('Invalid result type: %s' % result_type)
665 def select_format(self, format_spec, available_formats):
666 if format_spec == 'best' or format_spec is None:
667 return available_formats[-1]
668 elif format_spec == 'worst':
669 return available_formats[0]
670 elif format_spec == 'bestaudio':
672 f for f in available_formats
673 if f.get('vcodec') == 'none']
675 return audio_formats[-1]
676 elif format_spec == 'worstaudio':
678 f for f in available_formats
679 if f.get('vcodec') == 'none']
681 return audio_formats[0]
682 elif format_spec == 'bestvideo':
684 f for f in available_formats
685 if f.get('acodec') == 'none']
687 return video_formats[-1]
688 elif format_spec == 'worstvideo':
690 f for f in available_formats
691 if f.get('acodec') == 'none']
693 return video_formats[0]
695 extensions = ['mp4', 'flv', 'webm', '3gp']
696 if format_spec in extensions:
697 filter_f = lambda f: f['ext'] == format_spec
699 filter_f = lambda f: f['format_id'] == format_spec
700 matches = list(filter(filter_f, available_formats))
705 def process_video_result(self, info_dict, download=True):
706 assert info_dict.get('_type', 'video') == 'video'
708 if 'id' not in info_dict:
709 raise ExtractorError('Missing "id" field in extractor result')
710 if 'title' not in info_dict:
711 raise ExtractorError('Missing "title" field in extractor result')
713 if 'playlist' not in info_dict:
714 # It isn't part of a playlist
715 info_dict['playlist'] = None
716 info_dict['playlist_index'] = None
718 if 'display_id' not in info_dict and 'id' in info_dict:
719 info_dict['display_id'] = info_dict['id']
721 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
722 upload_date = datetime.datetime.utcfromtimestamp(
723 info_dict['timestamp'])
724 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
726 # This extractors handle format selection themselves
727 if info_dict['extractor'] in ['Youku']:
729 self.process_info(info_dict)
732 # We now pick which formats have to be downloaded
733 if info_dict.get('formats') is None:
734 # There's only one format available
735 formats = [info_dict]
737 formats = info_dict['formats']
740 raise ExtractorError('No video formats found!')
742 # We check that all the formats have the format and format_id fields
743 for i, format in enumerate(formats):
744 if 'url' not in format:
745 raise ExtractorError('Missing "url" key in result (index %d)' % i)
747 if format.get('format_id') is None:
748 format['format_id'] = compat_str(i)
749 if format.get('format') is None:
750 format['format'] = '{id} - {res}{note}'.format(
751 id=format['format_id'],
752 res=self.format_resolution(format),
753 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
755 # Automatically determine file extension if missing
756 if 'ext' not in format:
757 format['ext'] = determine_ext(format['url']).lower()
759 format_limit = self.params.get('format_limit', None)
761 formats = list(takewhile_inclusive(
762 lambda f: f['format_id'] != format_limit, formats
765 # TODO Central sorting goes here
767 if formats[0] is not info_dict:
768 # only set the 'formats' fields if the original info_dict list them
769 # otherwise we end up with a circular reference, the first (and unique)
770 # element in the 'formats' field in info_dict is info_dict itself,
771 # wich can't be exported to json
772 info_dict['formats'] = formats
773 if self.params.get('listformats', None):
774 self.list_formats(info_dict)
777 req_format = self.params.get('format')
778 if req_format is None:
780 formats_to_download = []
781 # The -1 is for supporting YoutubeIE
782 if req_format in ('-1', 'all'):
783 formats_to_download = formats
785 # We can accept formats requested in the format: 34/5/best, we pick
786 # the first that is available, starting from left
787 req_formats = req_format.split('/')
788 for rf in req_formats:
789 if re.match(r'.+?\+.+?', rf) is not None:
790 # Two formats have been requested like '137+139'
791 format_1, format_2 = rf.split('+')
792 formats_info = (self.select_format(format_1, formats),
793 self.select_format(format_2, formats))
794 if all(formats_info):
796 'requested_formats': formats_info,
798 'ext': formats_info[0]['ext'],
801 selected_format = None
803 selected_format = self.select_format(rf, formats)
804 if selected_format is not None:
805 formats_to_download = [selected_format]
807 if not formats_to_download:
808 raise ExtractorError('requested format not available',
812 if len(formats_to_download) > 1:
813 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
814 for format in formats_to_download:
815 new_info = dict(info_dict)
816 new_info.update(format)
817 self.process_info(new_info)
818 # We update the info dict with the best quality format (backwards compatibility)
819 info_dict.update(formats_to_download[-1])
822 def process_info(self, info_dict):
823 """Process a single resolved IE result."""
825 assert info_dict.get('_type', 'video') == 'video'
827 max_downloads = self.params.get('max_downloads')
828 if max_downloads is not None:
829 if self._num_downloads >= int(max_downloads):
830 raise MaxDownloadsReached()
832 info_dict['fulltitle'] = info_dict['title']
833 if len(info_dict['title']) > 200:
834 info_dict['title'] = info_dict['title'][:197] + '...'
836 # Keep for backwards compatibility
837 info_dict['stitle'] = info_dict['title']
839 if not 'format' in info_dict:
840 info_dict['format'] = info_dict['ext']
842 reason = self._match_entry(info_dict)
843 if reason is not None:
844 self.to_screen('[download] ' + reason)
847 self._num_downloads += 1
849 filename = self.prepare_filename(info_dict)
852 if self.params.get('forcetitle', False):
853 self.to_stdout(info_dict['fulltitle'])
854 if self.params.get('forceid', False):
855 self.to_stdout(info_dict['id'])
856 if self.params.get('forceurl', False):
857 # For RTMP URLs, also include the playpath
858 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
859 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
860 self.to_stdout(info_dict['thumbnail'])
861 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
862 self.to_stdout(info_dict['description'])
863 if self.params.get('forcefilename', False) and filename is not None:
864 self.to_stdout(filename)
865 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
866 self.to_stdout(formatSeconds(info_dict['duration']))
867 if self.params.get('forceformat', False):
868 self.to_stdout(info_dict['format'])
869 if self.params.get('forcejson', False):
870 info_dict['_filename'] = filename
871 self.to_stdout(json.dumps(info_dict))
873 # Do nothing else if in simulate mode
874 if self.params.get('simulate', False):
881 dn = os.path.dirname(encodeFilename(filename))
882 if dn and not os.path.exists(dn):
884 except (OSError, IOError) as err:
885 self.report_error('unable to create directory ' + compat_str(err))
888 if self.params.get('writedescription', False):
889 descfn = filename + '.description'
890 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
891 self.to_screen('[info] Video description is already present')
894 self.to_screen('[info] Writing video description to: ' + descfn)
895 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
896 descfile.write(info_dict['description'])
897 except (KeyError, TypeError):
898 self.report_warning('There\'s no description to write.')
899 except (OSError, IOError):
900 self.report_error('Cannot write description file ' + descfn)
903 if self.params.get('writeannotations', False):
904 annofn = filename + '.annotations.xml'
905 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
906 self.to_screen('[info] Video annotations are already present')
909 self.to_screen('[info] Writing video annotations to: ' + annofn)
910 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
911 annofile.write(info_dict['annotations'])
912 except (KeyError, TypeError):
913 self.report_warning('There are no annotations to write.')
914 except (OSError, IOError):
915 self.report_error('Cannot write annotations file: ' + annofn)
918 subtitles_are_requested = any([self.params.get('writesubtitles', False),
919 self.params.get('writeautomaticsub')])
921 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
922 # subtitles download errors are already managed as troubles in relevant IE
923 # that way it will silently go on when used with unsupporting IE
924 subtitles = info_dict['subtitles']
925 sub_format = self.params.get('subtitlesformat', 'srt')
926 for sub_lang in subtitles.keys():
927 sub = subtitles[sub_lang]
931 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
932 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
933 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
935 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
936 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
938 except (OSError, IOError):
939 self.report_error('Cannot write subtitles file ' + descfn)
942 if self.params.get('writeinfojson', False):
943 infofn = os.path.splitext(filename)[0] + '.info.json'
944 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
945 self.to_screen('[info] Video description metadata is already present')
947 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
949 write_json_file(info_dict, encodeFilename(infofn))
950 except (OSError, IOError):
951 self.report_error('Cannot write metadata to JSON file ' + infofn)
954 if self.params.get('writethumbnail', False):
955 if info_dict.get('thumbnail') is not None:
956 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
957 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
958 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
959 self.to_screen('[%s] %s: Thumbnail is already present' %
960 (info_dict['extractor'], info_dict['id']))
962 self.to_screen('[%s] %s: Downloading thumbnail ...' %
963 (info_dict['extractor'], info_dict['id']))
965 uf = self.urlopen(info_dict['thumbnail'])
966 with open(thumb_filename, 'wb') as thumbf:
967 shutil.copyfileobj(uf, thumbf)
968 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
969 (info_dict['extractor'], info_dict['id'], thumb_filename))
970 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
971 self.report_warning('Unable to download thumbnail "%s": %s' %
972 (info_dict['thumbnail'], compat_str(err)))
974 if not self.params.get('skip_download', False):
975 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
980 fd = get_suitable_downloader(info)(self, self.params)
981 for ph in self._progress_hooks:
982 fd.add_progress_hook(ph)
983 return fd.download(name, info)
984 if info_dict.get('requested_formats') is not None:
987 merger = FFmpegMergerPP(self)
988 if not merger._get_executable():
990 self.report_warning('You have requested multiple '
991 'formats but ffmpeg or avconv are not installed.'
992 ' The formats won\'t be merged')
994 postprocessors = [merger]
995 for f in info_dict['requested_formats']:
996 new_info = dict(info_dict)
998 fname = self.prepare_filename(new_info)
999 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1000 downloaded.append(fname)
1001 partial_success = dl(fname, new_info)
1002 success = success and partial_success
1003 info_dict['__postprocessors'] = postprocessors
1004 info_dict['__files_to_merge'] = downloaded
1006 # Just a single file
1007 success = dl(filename, info_dict)
1008 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1009 self.report_error('unable to download video data: %s' % str(err))
1011 except (OSError, IOError) as err:
1012 raise UnavailableVideoError(err)
1013 except (ContentTooShortError, ) as err:
1014 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1019 self.post_process(filename, info_dict)
1020 except (PostProcessingError) as err:
1021 self.report_error('postprocessing: %s' % str(err))
1024 self.record_download_archive(info_dict)
1026 def download(self, url_list):
1027 """Download a given list of URLs."""
1028 if (len(url_list) > 1 and
1029 '%' not in self.params['outtmpl']
1030 and self.params.get('max_downloads') != 1):
1031 raise SameFileError(self.params['outtmpl'])
1033 for url in url_list:
1035 #It also downloads the videos
1036 self.extract_info(url)
1037 except UnavailableVideoError:
1038 self.report_error('unable to download video')
1039 except MaxDownloadsReached:
1040 self.to_screen('[info] Maximum number of downloaded files reached.')
1043 return self._download_retcode
1045 def download_with_info_file(self, info_filename):
1046 with io.open(info_filename, 'r', encoding='utf-8') as f:
1049 self.process_ie_result(info, download=True)
1050 except DownloadError:
1051 webpage_url = info.get('webpage_url')
1052 if webpage_url is not None:
1053 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1054 return self.download([webpage_url])
1057 return self._download_retcode
1059 def post_process(self, filename, ie_info):
1060 """Run all the postprocessors on the given file."""
1061 info = dict(ie_info)
1062 info['filepath'] = filename
1065 if ie_info.get('__postprocessors') is not None:
1066 pps_chain.extend(ie_info['__postprocessors'])
1067 pps_chain.extend(self._pps)
1068 for pp in pps_chain:
1070 keep_video_wish, new_info = pp.run(info)
1071 if keep_video_wish is not None:
1073 keep_video = keep_video_wish
1074 elif keep_video is None:
1075 # No clear decision yet, let IE decide
1076 keep_video = keep_video_wish
1077 except PostProcessingError as e:
1078 self.report_error(e.msg)
1079 if keep_video is False and not self.params.get('keepvideo', False):
1081 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1082 os.remove(encodeFilename(filename))
1083 except (IOError, OSError):
1084 self.report_warning('Unable to remove downloaded video file')
1086 def _make_archive_id(self, info_dict):
1087 # Future-proof against any change in case
1088 # and backwards compatibility with prior versions
1089 extractor = info_dict.get('extractor_key')
1090 if extractor is None:
1091 if 'id' in info_dict:
1092 extractor = info_dict.get('ie_key') # key in a playlist
1093 if extractor is None:
1094 return None # Incomplete video information
1095 return extractor.lower() + ' ' + info_dict['id']
1097 def in_download_archive(self, info_dict):
1098 fn = self.params.get('download_archive')
1102 vid_id = self._make_archive_id(info_dict)
1104 return False # Incomplete video information
1107 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1108 for line in archive_file:
1109 if line.strip() == vid_id:
1111 except IOError as ioe:
1112 if ioe.errno != errno.ENOENT:
1116 def record_download_archive(self, info_dict):
1117 fn = self.params.get('download_archive')
1120 vid_id = self._make_archive_id(info_dict)
1122 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1123 archive_file.write(vid_id + '\n')
1126 def format_resolution(format, default='unknown'):
1127 if format.get('vcodec') == 'none':
1129 if format.get('resolution') is not None:
1130 return format['resolution']
1131 if format.get('height') is not None:
1132 if format.get('width') is not None:
1133 res = '%sx%s' % (format['width'], format['height'])
1135 res = '%sp' % format['height']
1136 elif format.get('width') is not None:
1137 res = '?x%d' % format['width']
1142 def list_formats(self, info_dict):
1143 def format_note(fdict):
1145 if fdict.get('ext') in ['f4f', 'f4m']:
1146 res += '(unsupported) '
1147 if fdict.get('format_note') is not None:
1148 res += fdict['format_note'] + ' '
1149 if fdict.get('tbr') is not None:
1150 res += '%4dk ' % fdict['tbr']
1151 if fdict.get('container') is not None:
1154 res += '%s container' % fdict['container']
1155 if (fdict.get('vcodec') is not None and
1156 fdict.get('vcodec') != 'none'):
1159 res += fdict['vcodec']
1160 if fdict.get('vbr') is not None:
1162 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1164 if fdict.get('vbr') is not None:
1165 res += '%4dk' % fdict['vbr']
1166 if fdict.get('acodec') is not None:
1169 if fdict['acodec'] == 'none':
1172 res += '%-5s' % fdict['acodec']
1173 elif fdict.get('abr') is not None:
1177 if fdict.get('abr') is not None:
1178 res += '@%3dk' % fdict['abr']
1179 if fdict.get('asr') is not None:
1180 res += ' (%5dHz)' % fdict['asr']
1181 if fdict.get('filesize') is not None:
1184 res += format_bytes(fdict['filesize'])
1187 def line(format, idlen=20):
1188 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1189 format['format_id'],
1191 self.format_resolution(format),
1192 format_note(format),
1195 formats = info_dict.get('formats', [info_dict])
1196 idlen = max(len('format code'),
1197 max(len(f['format_id']) for f in formats))
1198 formats_s = [line(f, idlen) for f in formats]
1199 if len(formats) > 1:
1200 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1201 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1203 header_line = line({
1204 'format_id': 'format code', 'ext': 'extension',
1205 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1206 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1207 (info_dict['id'], header_line, '\n'.join(formats_s)))
1209 def urlopen(self, req):
1210 """ Start an HTTP download """
1211 return self._opener.open(req, timeout=self._socket_timeout)
1213 def print_debug_header(self):
1214 if not self.params.get('verbose'):
1218 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1219 locale.getpreferredencoding(),
1220 sys.getfilesystemencoding(),
1221 sys.stdout.encoding,
1222 self.get_encoding()),
1226 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1228 sp = subprocess.Popen(
1229 ['git', 'rev-parse', '--short', 'HEAD'],
1230 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1231 cwd=os.path.dirname(os.path.abspath(__file__)))
1232 out, err = sp.communicate()
1233 out = out.decode().strip()
1234 if re.match('[0-9a-f]+', out):
1235 self._write_string('[debug] Git HEAD: ' + out + '\n')
1241 self._write_string('[debug] Python version %s - %s' %
1242 (platform.python_version(), platform_name()) + '\n')
1245 for handler in self._opener.handlers:
1246 if hasattr(handler, 'proxies'):
1247 proxy_map.update(handler.proxies)
1248 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1250 def _setup_opener(self):
1251 timeout_val = self.params.get('socket_timeout')
1252 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1254 opts_cookiefile = self.params.get('cookiefile')
1255 opts_proxy = self.params.get('proxy')
1257 if opts_cookiefile is None:
1258 self.cookiejar = compat_cookiejar.CookieJar()
1260 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1262 if os.access(opts_cookiefile, os.R_OK):
1263 self.cookiejar.load()
1265 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1267 if opts_proxy is not None:
1268 if opts_proxy == '':
1271 proxies = {'http': opts_proxy, 'https': opts_proxy}
1273 proxies = compat_urllib_request.getproxies()
1274 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1275 if 'http' in proxies and 'https' not in proxies:
1276 proxies['https'] = proxies['http']
1277 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1279 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1280 https_handler = make_HTTPS_handler(
1281 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1282 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1283 opener = compat_urllib_request.build_opener(
1284 https_handler, proxy_handler, cookie_processor, ydlh)
1285 # Delete the default user-agent header, which would otherwise apply in
1286 # cases where our custom HTTP handler doesn't come into play
1287 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1288 opener.addheaders = []
1289 self._opener = opener
1291 def encode(self, s):
1292 if isinstance(s, bytes):
1293 return s # Already encoded
1296 return s.encode(self.get_encoding())
1297 except UnicodeEncodeError as err:
1298 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1301 def get_encoding(self):
1302 encoding = self.params.get('encoding')
1303 if encoding is None:
1304 encoding = preferredencoding()