2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
54 UnavailableVideoError,
61 from .cache import Cache
62 from .extractor import get_info_extractor, gen_extractors
63 from .downloader import get_suitable_downloader
64 from .postprocessor import FFmpegMergerPP
65 from .version import __version__
68 class YoutubeDL(object):
71 YoutubeDL objects are the ones responsible of downloading the
72 actual video file and writing it to disk if the user has requested
73 it, among some other tasks. In most cases there should be one per
74 program. As, given a video URL, the downloader doesn't know how to
75 extract all the needed information, task that InfoExtractors do, it
76 has to pass the URL to one of them.
78 For this, YoutubeDL objects have a method that allows
79 InfoExtractors to be registered in a given order. When it is passed
80 a URL, the YoutubeDL object handles it to the first InfoExtractor it
81 finds that reports being able to handle it. The InfoExtractor extracts
82 all the information about the video or videos the URL refers to, and
83 YoutubeDL process the extracted information, possibly using a File
84 Downloader to download the video.
86 YoutubeDL objects accept a lot of parameters. In order not to saturate
87 the object constructor with arguments, it receives a dictionary of
88 options instead. These options are available through the params
89 attribute for the InfoExtractors to use. The YoutubeDL also
90 registers itself as the downloader in charge for the InfoExtractors
91 that are added to it, so this is a "mutual registration".
95 username: Username for authentication purposes.
96 password: Password for authentication purposes.
97 videopassword: Password for acces a video.
98 usenetrc: Use netrc for authentication instead.
99 verbose: Print additional info to stdout.
100 quiet: Do not print messages to stdout.
101 no_warnings: Do not print out anything for warnings.
102 forceurl: Force printing final URL.
103 forcetitle: Force printing title.
104 forceid: Force printing ID.
105 forcethumbnail: Force printing thumbnail URL.
106 forcedescription: Force printing description.
107 forcefilename: Force printing final filename.
108 forceduration: Force printing duration.
109 forcejson: Force printing info_dict as JSON.
110 simulate: Do not download the video files.
111 format: Video format code.
112 format_limit: Highest quality format to try.
113 outtmpl: Template for output names.
114 restrictfilenames: Do not allow "&" and spaces in file names
115 ignoreerrors: Do not stop on download errors.
116 nooverwrites: Prevent overwriting files.
117 playliststart: Playlist item to start at.
118 playlistend: Playlist item to end at.
119 matchtitle: Download only matching titles.
120 rejecttitle: Reject downloads for matching titles.
121 logger: Log messages to a logging.Logger instance.
122 logtostderr: Log messages to stderr instead of stdout.
123 writedescription: Write the video description to a .description file
124 writeinfojson: Write the video description to a .info.json file
125 writeannotations: Write the video annotations to a .annotations.xml file
126 writethumbnail: Write the thumbnail image to a file
127 writesubtitles: Write the video subtitles to a file
128 writeautomaticsub: Write the automatic subtitles to a file
129 allsubtitles: Downloads all the subtitles of the video
130 (requires writesubtitles or writeautomaticsub)
131 listsubtitles: Lists all available subtitles for the video
132 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
133 subtitleslangs: List of languages of the subtitles to download
134 keepvideo: Keep the video file after post-processing
135 daterange: A DateRange object, download only if the upload_date is in the range.
136 skip_download: Skip the actual download of the video file
137 cachedir: Location of the cache files in the filesystem.
138 False to disable filesystem cache.
139 noplaylist: Download single video instead of a playlist if in doubt.
140 age_limit: An integer representing the user's age in years.
141 Unsuitable videos for the given age are skipped.
142 min_views: An integer representing the minimum view count the video
143 must have in order to not be skipped.
144 Videos without view count information are always
145 downloaded. None for no limit.
146 max_views: An integer representing the maximum view count.
147 Videos that are more popular than that are not
149 Videos without view count information are always
150 downloaded. None for no limit.
151 download_archive: File name of a file where all downloads are recorded.
152 Videos already present in the file are not downloaded
154 cookiefile: File name where cookies should be read from and dumped to.
155 nocheckcertificate:Do not verify SSL certificates
156 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
157 At the moment, this is only supported by YouTube.
158 proxy: URL of the proxy server to use
159 socket_timeout: Time to wait for unresponsive hosts, in seconds
160 bidi_workaround: Work around buggy terminals without bidirectional text
161 support, using fridibi
162 debug_printtraffic:Print out sent and received HTTP traffic
163 include_ads: Download ads as well
164 default_search: Prepend this string if an input url is not valid.
165 'auto' for elaborate guessing
166 encoding: Use this encoding instead of the system-specified.
167 extract_flat: Do not resolve URLs, return the immediate result.
168 Pass in 'in_playlist' to only show this behavior for
171 The following parameters are not used by YoutubeDL itself, they are used by
173 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
174 noresizebuffer, retries, continuedl, noprogress, consoletitle
176 The following options are used by the post processors:
177 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
178 otherwise prefer avconv.
179 exec_cmd: Arbitrary command to run after downloading
185 _download_retcode = None
186 _num_downloads = None
189 def __init__(self, params=None):
190 """Create a FileDownloader object with the given options."""
194 self._ies_instances = {}
196 self._progress_hooks = []
197 self._download_retcode = 0
198 self._num_downloads = 0
199 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
200 self._err_file = sys.stderr
202 self.cache = Cache(self)
204 if params.get('bidi_workaround', False):
207 master, slave = pty.openpty()
208 width = get_term_width()
212 width_args = ['-w', str(width)]
214 stdin=subprocess.PIPE,
216 stderr=self._err_file)
218 self._output_process = subprocess.Popen(
219 ['bidiv'] + width_args, **sp_kwargs
222 self._output_process = subprocess.Popen(
223 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
224 self._output_channel = os.fdopen(master, 'rb')
225 except OSError as ose:
227 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
231 if (sys.version_info >= (3,) and sys.platform != 'win32' and
232 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
233 and not params.get('restrictfilenames', False)):
234 # On Python 3, the Unicode filesystem API will throw errors (#1474)
236 'Assuming --restrict-filenames since file system encoding '
237 'cannot encode all characters. '
238 'Set the LC_ALL environment variable to fix this.')
239 self.params['restrictfilenames'] = True
241 if '%(stitle)s' in self.params.get('outtmpl', ''):
242 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
246 def add_info_extractor(self, ie):
247 """Add an InfoExtractor object to the end of the list."""
249 self._ies_instances[ie.ie_key()] = ie
250 ie.set_downloader(self)
252 def get_info_extractor(self, ie_key):
254 Get an instance of an IE with name ie_key, it will try to get one from
255 the _ies list, if there's no instance it will create a new one and add
256 it to the extractor list.
258 ie = self._ies_instances.get(ie_key)
260 ie = get_info_extractor(ie_key)()
261 self.add_info_extractor(ie)
264 def add_default_info_extractors(self):
266 Add the InfoExtractors returned by gen_extractors to the end of the list
268 for ie in gen_extractors():
269 self.add_info_extractor(ie)
271 def add_post_processor(self, pp):
272 """Add a PostProcessor object to the end of the chain."""
274 pp.set_downloader(self)
276 def add_progress_hook(self, ph):
277 """Add the progress hook (currently only for the file downloader)"""
278 self._progress_hooks.append(ph)
280 def _bidi_workaround(self, message):
281 if not hasattr(self, '_output_channel'):
284 assert hasattr(self, '_output_process')
285 assert isinstance(message, compat_str)
286 line_count = message.count('\n') + 1
287 self._output_process.stdin.write((message + '\n').encode('utf-8'))
288 self._output_process.stdin.flush()
289 res = ''.join(self._output_channel.readline().decode('utf-8')
290 for _ in range(line_count))
291 return res[:-len('\n')]
293 def to_screen(self, message, skip_eol=False):
294 """Print message to stdout if not in quiet mode."""
295 return self.to_stdout(message, skip_eol, check_quiet=True)
297 def _write_string(self, s, out=None):
298 write_string(s, out=out, encoding=self.params.get('encoding'))
300 def to_stdout(self, message, skip_eol=False, check_quiet=False):
301 """Print message to stdout if not in quiet mode."""
302 if self.params.get('logger'):
303 self.params['logger'].debug(message)
304 elif not check_quiet or not self.params.get('quiet', False):
305 message = self._bidi_workaround(message)
306 terminator = ['\n', ''][skip_eol]
307 output = message + terminator
309 self._write_string(output, self._screen_file)
311 def to_stderr(self, message):
312 """Print message to stderr."""
313 assert isinstance(message, compat_str)
314 if self.params.get('logger'):
315 self.params['logger'].error(message)
317 message = self._bidi_workaround(message)
318 output = message + '\n'
319 self._write_string(output, self._err_file)
321 def to_console_title(self, message):
322 if not self.params.get('consoletitle', False):
324 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
325 # c_wchar_p() might not be necessary if `message` is
326 # already of type unicode()
327 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
328 elif 'TERM' in os.environ:
329 self._write_string('\033]0;%s\007' % message, self._screen_file)
331 def save_console_title(self):
332 if not self.params.get('consoletitle', False):
334 if 'TERM' in os.environ:
335 # Save the title on stack
336 self._write_string('\033[22;0t', self._screen_file)
338 def restore_console_title(self):
339 if not self.params.get('consoletitle', False):
341 if 'TERM' in os.environ:
342 # Restore the title from stack
343 self._write_string('\033[23;0t', self._screen_file)
346 self.save_console_title()
349 def __exit__(self, *args):
350 self.restore_console_title()
352 if self.params.get('cookiefile') is not None:
353 self.cookiejar.save()
355 def trouble(self, message=None, tb=None):
356 """Determine action to take when a download problem appears.
358 Depending on if the downloader has been configured to ignore
359 download errors or not, this method may throw an exception or
360 not when errors are found, after printing the message.
362 tb, if given, is additional traceback information.
364 if message is not None:
365 self.to_stderr(message)
366 if self.params.get('verbose'):
368 if sys.exc_info()[0]: # if .trouble has been called from an except block
370 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
371 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
372 tb += compat_str(traceback.format_exc())
374 tb_data = traceback.format_list(traceback.extract_stack())
375 tb = ''.join(tb_data)
377 if not self.params.get('ignoreerrors', False):
378 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
379 exc_info = sys.exc_info()[1].exc_info
381 exc_info = sys.exc_info()
382 raise DownloadError(message, exc_info)
383 self._download_retcode = 1
385 def report_warning(self, message):
387 Print the message to stderr, it will be prefixed with 'WARNING:'
388 If stderr is a tty file the 'WARNING:' will be colored
390 if self.params.get('logger') is not None:
391 self.params['logger'].warning(message)
393 if self.params.get('no_warnings'):
395 if self._err_file.isatty() and os.name != 'nt':
396 _msg_header = '\033[0;33mWARNING:\033[0m'
398 _msg_header = 'WARNING:'
399 warning_message = '%s %s' % (_msg_header, message)
400 self.to_stderr(warning_message)
402 def report_error(self, message, tb=None):
404 Do the same as trouble, but prefixes the message with 'ERROR:', colored
405 in red if stderr is a tty file.
407 if self._err_file.isatty() and os.name != 'nt':
408 _msg_header = '\033[0;31mERROR:\033[0m'
410 _msg_header = 'ERROR:'
411 error_message = '%s %s' % (_msg_header, message)
412 self.trouble(error_message, tb)
414 def report_file_already_downloaded(self, file_name):
415 """Report file has already been fully downloaded."""
417 self.to_screen('[download] %s has already been downloaded' % file_name)
418 except UnicodeEncodeError:
419 self.to_screen('[download] The file has already been downloaded')
421 def prepare_filename(self, info_dict):
422 """Generate the output filename."""
424 template_dict = dict(info_dict)
426 template_dict['epoch'] = int(time.time())
427 autonumber_size = self.params.get('autonumber_size')
428 if autonumber_size is None:
430 autonumber_templ = '%0' + str(autonumber_size) + 'd'
431 template_dict['autonumber'] = autonumber_templ % self._num_downloads
432 if template_dict.get('playlist_index') is not None:
433 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
434 if template_dict.get('resolution') is None:
435 if template_dict.get('width') and template_dict.get('height'):
436 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
437 elif template_dict.get('height'):
438 template_dict['resolution'] = '%sp' % template_dict['height']
439 elif template_dict.get('width'):
440 template_dict['resolution'] = '?x%d' % template_dict['width']
442 sanitize = lambda k, v: sanitize_filename(
444 restricted=self.params.get('restrictfilenames'),
446 template_dict = dict((k, sanitize(k, v))
447 for k, v in template_dict.items()
449 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
451 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
452 tmpl = os.path.expanduser(outtmpl)
453 filename = tmpl % template_dict
455 except ValueError as err:
456 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
459 def _match_entry(self, info_dict):
460 """ Returns None iff the file should be downloaded """
462 video_title = info_dict.get('title', info_dict.get('id', 'video'))
463 if 'title' in info_dict:
464 # This can happen when we're just evaluating the playlist
465 title = info_dict['title']
466 matchtitle = self.params.get('matchtitle', False)
468 if not re.search(matchtitle, title, re.IGNORECASE):
469 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
470 rejecttitle = self.params.get('rejecttitle', False)
472 if re.search(rejecttitle, title, re.IGNORECASE):
473 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
474 date = info_dict.get('upload_date', None)
476 dateRange = self.params.get('daterange', DateRange())
477 if date not in dateRange:
478 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
479 view_count = info_dict.get('view_count', None)
480 if view_count is not None:
481 min_views = self.params.get('min_views')
482 if min_views is not None and view_count < min_views:
483 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
484 max_views = self.params.get('max_views')
485 if max_views is not None and view_count > max_views:
486 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
487 age_limit = self.params.get('age_limit')
488 if age_limit is not None:
489 actual_age_limit = info_dict.get('age_limit')
490 if actual_age_limit is None:
492 if age_limit < actual_age_limit:
493 return 'Skipping "' + title + '" because it is age restricted'
494 if self.in_download_archive(info_dict):
495 return '%s has already been recorded in archive' % video_title
499 def add_extra_info(info_dict, extra_info):
500 '''Set the keys from extra_info in info dict if they are missing'''
501 for key, value in extra_info.items():
502 info_dict.setdefault(key, value)
504 def extract_info(self, url, download=True, ie_key=None, extra_info={},
507 Returns a list with a dictionary for each video we find.
508 If 'download', also downloads the videos.
509 extra_info is a dict containing the extra values to add to each result
513 ies = [self.get_info_extractor(ie_key)]
518 if not ie.suitable(url):
522 self.report_warning('The program functionality for this site has been marked as broken, '
523 'and will probably not work.')
526 ie_result = ie.extract(url)
527 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
529 if isinstance(ie_result, list):
530 # Backwards compatibility: old IE result format
532 '_type': 'compat_list',
533 'entries': ie_result,
535 self.add_default_extra_info(ie_result, ie, url)
537 return self.process_ie_result(ie_result, download, extra_info)
540 except ExtractorError as de: # An error we somewhat expected
541 self.report_error(compat_str(de), de.format_traceback())
543 except MaxDownloadsReached:
545 except Exception as e:
546 if self.params.get('ignoreerrors', False):
547 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
552 self.report_error('no suitable InfoExtractor for URL %s' % url)
554 def add_default_extra_info(self, ie_result, ie, url):
555 self.add_extra_info(ie_result, {
556 'extractor': ie.IE_NAME,
558 'webpage_url_basename': url_basename(url),
559 'extractor_key': ie.ie_key(),
562 def process_ie_result(self, ie_result, download=True, extra_info={}):
564 Take the result of the ie(may be modified) and resolve all unresolved
565 references (URLs, playlist items).
567 It will also download the videos if 'download'.
568 Returns the resolved ie_result.
571 result_type = ie_result.get('_type', 'video')
573 if result_type in ('url', 'url_transparent'):
574 extract_flat = self.params.get('extract_flat', False)
575 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
576 extract_flat is True):
577 self.add_extra_info(ie_result, extra_info)
578 if self.params.get('forcejson', False):
579 self.to_stdout(json.dumps(ie_result))
582 if result_type == 'video':
583 self.add_extra_info(ie_result, extra_info)
584 return self.process_video_result(ie_result, download=download)
585 elif result_type == 'url':
586 # We have to add extra_info to the results because it may be
587 # contained in a playlist
588 return self.extract_info(ie_result['url'],
590 ie_key=ie_result.get('ie_key'),
591 extra_info=extra_info)
592 elif result_type == 'url_transparent':
593 # Use the information from the embedding page
594 info = self.extract_info(
595 ie_result['url'], ie_key=ie_result.get('ie_key'),
596 extra_info=extra_info, download=False, process=False)
598 def make_result(embedded_info):
599 new_result = ie_result.copy()
600 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
601 'entries', 'ie_key', 'duration',
602 'subtitles', 'annotations', 'format',
603 'thumbnail', 'thumbnails'):
606 if f in embedded_info:
607 new_result[f] = embedded_info[f]
609 new_result = make_result(info)
611 assert new_result.get('_type') != 'url_transparent'
612 if new_result.get('_type') == 'compat_list':
613 new_result['entries'] = [
614 make_result(e) for e in new_result['entries']]
616 return self.process_ie_result(
617 new_result, download=download, extra_info=extra_info)
618 elif result_type == 'playlist':
619 # We process each entry in the playlist
620 playlist = ie_result.get('title', None) or ie_result.get('id', None)
621 self.to_screen('[download] Downloading playlist: %s' % playlist)
623 playlist_results = []
625 playliststart = self.params.get('playliststart', 1) - 1
626 playlistend = self.params.get('playlistend', None)
627 # For backwards compatibility, interpret -1 as whole list
628 if playlistend == -1:
631 if isinstance(ie_result['entries'], list):
632 n_all_entries = len(ie_result['entries'])
633 entries = ie_result['entries'][playliststart:playlistend]
634 n_entries = len(entries)
636 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
637 (ie_result['extractor'], playlist, n_all_entries, n_entries))
639 assert isinstance(ie_result['entries'], PagedList)
640 entries = ie_result['entries'].getslice(
641 playliststart, playlistend)
642 n_entries = len(entries)
644 "[%s] playlist %s: Downloading %d videos" %
645 (ie_result['extractor'], playlist, n_entries))
647 for i, entry in enumerate(entries, 1):
648 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
650 'n_entries': n_entries,
651 'playlist': playlist,
652 'playlist_index': i + playliststart,
653 'extractor': ie_result['extractor'],
654 'webpage_url': ie_result['webpage_url'],
655 'webpage_url_basename': url_basename(ie_result['webpage_url']),
656 'extractor_key': ie_result['extractor_key'],
659 reason = self._match_entry(entry)
660 if reason is not None:
661 self.to_screen('[download] ' + reason)
664 entry_result = self.process_ie_result(entry,
667 playlist_results.append(entry_result)
668 ie_result['entries'] = playlist_results
670 elif result_type == 'compat_list':
672 self.add_extra_info(r,
674 'extractor': ie_result['extractor'],
675 'webpage_url': ie_result['webpage_url'],
676 'webpage_url_basename': url_basename(ie_result['webpage_url']),
677 'extractor_key': ie_result['extractor_key'],
680 ie_result['entries'] = [
681 self.process_ie_result(_fixup(r), download, extra_info)
682 for r in ie_result['entries']
686 raise Exception('Invalid result type: %s' % result_type)
688 def select_format(self, format_spec, available_formats):
689 if format_spec == 'best' or format_spec is None:
690 return available_formats[-1]
691 elif format_spec == 'worst':
692 return available_formats[0]
693 elif format_spec == 'bestaudio':
695 f for f in available_formats
696 if f.get('vcodec') == 'none']
698 return audio_formats[-1]
699 elif format_spec == 'worstaudio':
701 f for f in available_formats
702 if f.get('vcodec') == 'none']
704 return audio_formats[0]
705 elif format_spec == 'bestvideo':
707 f for f in available_formats
708 if f.get('acodec') == 'none']
710 return video_formats[-1]
711 elif format_spec == 'worstvideo':
713 f for f in available_formats
714 if f.get('acodec') == 'none']
716 return video_formats[0]
718 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
719 if format_spec in extensions:
720 filter_f = lambda f: f['ext'] == format_spec
722 filter_f = lambda f: f['format_id'] == format_spec
723 matches = list(filter(filter_f, available_formats))
728 def process_video_result(self, info_dict, download=True):
729 assert info_dict.get('_type', 'video') == 'video'
731 if 'id' not in info_dict:
732 raise ExtractorError('Missing "id" field in extractor result')
733 if 'title' not in info_dict:
734 raise ExtractorError('Missing "title" field in extractor result')
736 if 'playlist' not in info_dict:
737 # It isn't part of a playlist
738 info_dict['playlist'] = None
739 info_dict['playlist_index'] = None
741 thumbnails = info_dict.get('thumbnails')
743 thumbnails.sort(key=lambda t: (
744 t.get('width'), t.get('height'), t.get('url')))
746 if 'width' in t and 'height' in t:
747 t['resolution'] = '%dx%d' % (t['width'], t['height'])
749 if thumbnails and 'thumbnail' not in info_dict:
750 info_dict['thumbnail'] = thumbnails[-1]['url']
752 if 'display_id' not in info_dict and 'id' in info_dict:
753 info_dict['display_id'] = info_dict['id']
755 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
756 upload_date = datetime.datetime.utcfromtimestamp(
757 info_dict['timestamp'])
758 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
760 # This extractors handle format selection themselves
761 if info_dict['extractor'] in ['Youku']:
763 self.process_info(info_dict)
766 # We now pick which formats have to be downloaded
767 if info_dict.get('formats') is None:
768 # There's only one format available
769 formats = [info_dict]
771 formats = info_dict['formats']
774 raise ExtractorError('No video formats found!')
776 # We check that all the formats have the format and format_id fields
777 for i, format in enumerate(formats):
778 if 'url' not in format:
779 raise ExtractorError('Missing "url" key in result (index %d)' % i)
781 if format.get('format_id') is None:
782 format['format_id'] = compat_str(i)
783 if format.get('format') is None:
784 format['format'] = '{id} - {res}{note}'.format(
785 id=format['format_id'],
786 res=self.format_resolution(format),
787 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
789 # Automatically determine file extension if missing
790 if 'ext' not in format:
791 format['ext'] = determine_ext(format['url']).lower()
793 format_limit = self.params.get('format_limit', None)
795 formats = list(takewhile_inclusive(
796 lambda f: f['format_id'] != format_limit, formats
799 # TODO Central sorting goes here
801 if formats[0] is not info_dict:
802 # only set the 'formats' fields if the original info_dict list them
803 # otherwise we end up with a circular reference, the first (and unique)
804 # element in the 'formats' field in info_dict is info_dict itself,
805 # wich can't be exported to json
806 info_dict['formats'] = formats
807 if self.params.get('listformats', None):
808 self.list_formats(info_dict)
811 req_format = self.params.get('format')
812 if req_format is None:
814 formats_to_download = []
815 # The -1 is for supporting YoutubeIE
816 if req_format in ('-1', 'all'):
817 formats_to_download = formats
819 for rfstr in req_format.split(','):
820 # We can accept formats requested in the format: 34/5/best, we pick
821 # the first that is available, starting from left
822 req_formats = rfstr.split('/')
823 for rf in req_formats:
824 if re.match(r'.+?\+.+?', rf) is not None:
825 # Two formats have been requested like '137+139'
826 format_1, format_2 = rf.split('+')
827 formats_info = (self.select_format(format_1, formats),
828 self.select_format(format_2, formats))
829 if all(formats_info):
831 'requested_formats': formats_info,
833 'ext': formats_info[0]['ext'],
836 selected_format = None
838 selected_format = self.select_format(rf, formats)
839 if selected_format is not None:
840 formats_to_download.append(selected_format)
842 if not formats_to_download:
843 raise ExtractorError('requested format not available',
847 if len(formats_to_download) > 1:
848 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
849 for format in formats_to_download:
850 new_info = dict(info_dict)
851 new_info.update(format)
852 self.process_info(new_info)
853 # We update the info dict with the best quality format (backwards compatibility)
854 info_dict.update(formats_to_download[-1])
857 def process_info(self, info_dict):
858 """Process a single resolved IE result."""
860 assert info_dict.get('_type', 'video') == 'video'
862 max_downloads = self.params.get('max_downloads')
863 if max_downloads is not None:
864 if self._num_downloads >= int(max_downloads):
865 raise MaxDownloadsReached()
867 info_dict['fulltitle'] = info_dict['title']
868 if len(info_dict['title']) > 200:
869 info_dict['title'] = info_dict['title'][:197] + '...'
871 # Keep for backwards compatibility
872 info_dict['stitle'] = info_dict['title']
874 if 'format' not in info_dict:
875 info_dict['format'] = info_dict['ext']
877 reason = self._match_entry(info_dict)
878 if reason is not None:
879 self.to_screen('[download] ' + reason)
882 self._num_downloads += 1
884 filename = self.prepare_filename(info_dict)
887 if self.params.get('forcetitle', False):
888 self.to_stdout(info_dict['fulltitle'])
889 if self.params.get('forceid', False):
890 self.to_stdout(info_dict['id'])
891 if self.params.get('forceurl', False):
892 # For RTMP URLs, also include the playpath
893 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
894 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
895 self.to_stdout(info_dict['thumbnail'])
896 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
897 self.to_stdout(info_dict['description'])
898 if self.params.get('forcefilename', False) and filename is not None:
899 self.to_stdout(filename)
900 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
901 self.to_stdout(formatSeconds(info_dict['duration']))
902 if self.params.get('forceformat', False):
903 self.to_stdout(info_dict['format'])
904 if self.params.get('forcejson', False):
905 info_dict['_filename'] = filename
906 self.to_stdout(json.dumps(info_dict))
908 # Do nothing else if in simulate mode
909 if self.params.get('simulate', False):
916 dn = os.path.dirname(encodeFilename(filename))
917 if dn and not os.path.exists(dn):
919 except (OSError, IOError) as err:
920 self.report_error('unable to create directory ' + compat_str(err))
923 if self.params.get('writedescription', False):
924 descfn = filename + '.description'
925 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
926 self.to_screen('[info] Video description is already present')
929 self.to_screen('[info] Writing video description to: ' + descfn)
930 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
931 descfile.write(info_dict['description'])
932 except (KeyError, TypeError):
933 self.report_warning('There\'s no description to write.')
934 except (OSError, IOError):
935 self.report_error('Cannot write description file ' + descfn)
938 if self.params.get('writeannotations', False):
939 annofn = filename + '.annotations.xml'
940 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
941 self.to_screen('[info] Video annotations are already present')
944 self.to_screen('[info] Writing video annotations to: ' + annofn)
945 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
946 annofile.write(info_dict['annotations'])
947 except (KeyError, TypeError):
948 self.report_warning('There are no annotations to write.')
949 except (OSError, IOError):
950 self.report_error('Cannot write annotations file: ' + annofn)
953 subtitles_are_requested = any([self.params.get('writesubtitles', False),
954 self.params.get('writeautomaticsub')])
956 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
957 # subtitles download errors are already managed as troubles in relevant IE
958 # that way it will silently go on when used with unsupporting IE
959 subtitles = info_dict['subtitles']
960 sub_format = self.params.get('subtitlesformat', 'srt')
961 for sub_lang in subtitles.keys():
962 sub = subtitles[sub_lang]
966 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
967 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
968 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
970 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
971 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
973 except (OSError, IOError):
974 self.report_error('Cannot write subtitles file ' + sub_filename)
977 if self.params.get('writeinfojson', False):
978 infofn = os.path.splitext(filename)[0] + '.info.json'
979 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
980 self.to_screen('[info] Video description metadata is already present')
982 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
984 write_json_file(info_dict, encodeFilename(infofn))
985 except (OSError, IOError):
986 self.report_error('Cannot write metadata to JSON file ' + infofn)
989 if self.params.get('writethumbnail', False):
990 if info_dict.get('thumbnail') is not None:
991 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
992 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
993 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
994 self.to_screen('[%s] %s: Thumbnail is already present' %
995 (info_dict['extractor'], info_dict['id']))
997 self.to_screen('[%s] %s: Downloading thumbnail ...' %
998 (info_dict['extractor'], info_dict['id']))
1000 uf = self.urlopen(info_dict['thumbnail'])
1001 with open(thumb_filename, 'wb') as thumbf:
1002 shutil.copyfileobj(uf, thumbf)
1003 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1004 (info_dict['extractor'], info_dict['id'], thumb_filename))
1005 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1006 self.report_warning('Unable to download thumbnail "%s": %s' %
1007 (info_dict['thumbnail'], compat_str(err)))
1009 if not self.params.get('skip_download', False):
1010 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1015 fd = get_suitable_downloader(info)(self, self.params)
1016 for ph in self._progress_hooks:
1017 fd.add_progress_hook(ph)
1018 if self.params.get('verbose'):
1019 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1020 return fd.download(name, info)
1021 if info_dict.get('requested_formats') is not None:
1024 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1025 if not merger._get_executable():
1027 self.report_warning('You have requested multiple '
1028 'formats but ffmpeg or avconv are not installed.'
1029 ' The formats won\'t be merged')
1031 postprocessors = [merger]
1032 for f in info_dict['requested_formats']:
1033 new_info = dict(info_dict)
1035 fname = self.prepare_filename(new_info)
1036 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1037 downloaded.append(fname)
1038 partial_success = dl(fname, new_info)
1039 success = success and partial_success
1040 info_dict['__postprocessors'] = postprocessors
1041 info_dict['__files_to_merge'] = downloaded
1043 # Just a single file
1044 success = dl(filename, info_dict)
1045 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1046 self.report_error('unable to download video data: %s' % str(err))
1048 except (OSError, IOError) as err:
1049 raise UnavailableVideoError(err)
1050 except (ContentTooShortError, ) as err:
1051 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1056 self.post_process(filename, info_dict)
1057 except (PostProcessingError) as err:
1058 self.report_error('postprocessing: %s' % str(err))
1061 self.record_download_archive(info_dict)
1063 def download(self, url_list):
1064 """Download a given list of URLs."""
1065 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1066 if (len(url_list) > 1 and
1068 and self.params.get('max_downloads') != 1):
1069 raise SameFileError(outtmpl)
1071 for url in url_list:
1073 #It also downloads the videos
1074 self.extract_info(url)
1075 except UnavailableVideoError:
1076 self.report_error('unable to download video')
1077 except MaxDownloadsReached:
1078 self.to_screen('[info] Maximum number of downloaded files reached.')
1081 return self._download_retcode
1083 def download_with_info_file(self, info_filename):
1084 with io.open(info_filename, 'r', encoding='utf-8') as f:
1087 self.process_ie_result(info, download=True)
1088 except DownloadError:
1089 webpage_url = info.get('webpage_url')
1090 if webpage_url is not None:
1091 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1092 return self.download([webpage_url])
1095 return self._download_retcode
1097 def post_process(self, filename, ie_info):
1098 """Run all the postprocessors on the given file."""
1099 info = dict(ie_info)
1100 info['filepath'] = filename
1103 if ie_info.get('__postprocessors') is not None:
1104 pps_chain.extend(ie_info['__postprocessors'])
1105 pps_chain.extend(self._pps)
1106 for pp in pps_chain:
1108 keep_video_wish, new_info = pp.run(info)
1109 if keep_video_wish is not None:
1111 keep_video = keep_video_wish
1112 elif keep_video is None:
1113 # No clear decision yet, let IE decide
1114 keep_video = keep_video_wish
1115 except PostProcessingError as e:
1116 self.report_error(e.msg)
1117 if keep_video is False and not self.params.get('keepvideo', False):
1119 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1120 os.remove(encodeFilename(filename))
1121 except (IOError, OSError):
1122 self.report_warning('Unable to remove downloaded video file')
1124 def _make_archive_id(self, info_dict):
1125 # Future-proof against any change in case
1126 # and backwards compatibility with prior versions
1127 extractor = info_dict.get('extractor_key')
1128 if extractor is None:
1129 if 'id' in info_dict:
1130 extractor = info_dict.get('ie_key') # key in a playlist
1131 if extractor is None:
1132 return None # Incomplete video information
1133 return extractor.lower() + ' ' + info_dict['id']
1135 def in_download_archive(self, info_dict):
1136 fn = self.params.get('download_archive')
1140 vid_id = self._make_archive_id(info_dict)
1142 return False # Incomplete video information
1145 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1146 for line in archive_file:
1147 if line.strip() == vid_id:
1149 except IOError as ioe:
1150 if ioe.errno != errno.ENOENT:
1154 def record_download_archive(self, info_dict):
1155 fn = self.params.get('download_archive')
1158 vid_id = self._make_archive_id(info_dict)
1160 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1161 archive_file.write(vid_id + '\n')
1164 def format_resolution(format, default='unknown'):
1165 if format.get('vcodec') == 'none':
1167 if format.get('resolution') is not None:
1168 return format['resolution']
1169 if format.get('height') is not None:
1170 if format.get('width') is not None:
1171 res = '%sx%s' % (format['width'], format['height'])
1173 res = '%sp' % format['height']
1174 elif format.get('width') is not None:
1175 res = '?x%d' % format['width']
1180 def _format_note(self, fdict):
1182 if fdict.get('ext') in ['f4f', 'f4m']:
1183 res += '(unsupported) '
1184 if fdict.get('format_note') is not None:
1185 res += fdict['format_note'] + ' '
1186 if fdict.get('tbr') is not None:
1187 res += '%4dk ' % fdict['tbr']
1188 if fdict.get('container') is not None:
1191 res += '%s container' % fdict['container']
1192 if (fdict.get('vcodec') is not None and
1193 fdict.get('vcodec') != 'none'):
1196 res += fdict['vcodec']
1197 if fdict.get('vbr') is not None:
1199 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1201 if fdict.get('vbr') is not None:
1202 res += '%4dk' % fdict['vbr']
1203 if fdict.get('acodec') is not None:
1206 if fdict['acodec'] == 'none':
1209 res += '%-5s' % fdict['acodec']
1210 elif fdict.get('abr') is not None:
1214 if fdict.get('abr') is not None:
1215 res += '@%3dk' % fdict['abr']
1216 if fdict.get('asr') is not None:
1217 res += ' (%5dHz)' % fdict['asr']
1218 if fdict.get('filesize') is not None:
1221 res += format_bytes(fdict['filesize'])
1222 elif fdict.get('filesize_approx') is not None:
1225 res += '~' + format_bytes(fdict['filesize_approx'])
1228 def list_formats(self, info_dict):
1229 def line(format, idlen=20):
1230 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1231 format['format_id'],
1233 self.format_resolution(format),
1234 self._format_note(format),
1237 formats = info_dict.get('formats', [info_dict])
1238 idlen = max(len('format code'),
1239 max(len(f['format_id']) for f in formats))
1240 formats_s = [line(f, idlen) for f in formats]
1241 if len(formats) > 1:
1242 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1243 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1245 header_line = line({
1246 'format_id': 'format code', 'ext': 'extension',
1247 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1248 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1249 (info_dict['id'], header_line, '\n'.join(formats_s)))
1251 def urlopen(self, req):
1252 """ Start an HTTP download """
1254 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1255 # always respected by websites, some tend to give out URLs with non percent-encoded
1256 # non-ASCII characters (see telemb.py, ard.py [#3412])
1257 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1258 # To work around aforementioned issue we will replace request's original URL with
1259 # percent-encoded one
1260 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1261 url = req if req_is_string else req.get_full_url()
1262 url_escaped = escape_url(url)
1264 # Substitute URL if any change after escaping
1265 if url != url_escaped:
1269 req = compat_urllib_request.Request(
1270 url_escaped, data=req.data, headers=req.headers,
1271 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1273 return self._opener.open(req, timeout=self._socket_timeout)
1275 def print_debug_header(self):
1276 if not self.params.get('verbose'):
1279 if type('') is not compat_str:
1280 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1281 self.report_warning(
1282 'Your Python is broken! Update to a newer and supported version')
1285 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1286 locale.getpreferredencoding(),
1287 sys.getfilesystemencoding(),
1288 sys.stdout.encoding,
1289 self.get_encoding()))
1290 write_string(encoding_str, encoding=None)
1292 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1294 sp = subprocess.Popen(
1295 ['git', 'rev-parse', '--short', 'HEAD'],
1296 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1297 cwd=os.path.dirname(os.path.abspath(__file__)))
1298 out, err = sp.communicate()
1299 out = out.decode().strip()
1300 if re.match('[0-9a-f]+', out):
1301 self._write_string('[debug] Git HEAD: ' + out + '\n')
1307 self._write_string('[debug] Python version %s - %s' %
1308 (platform.python_version(), platform_name()) + '\n')
1311 for handler in self._opener.handlers:
1312 if hasattr(handler, 'proxies'):
1313 proxy_map.update(handler.proxies)
1314 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1316 def _setup_opener(self):
1317 timeout_val = self.params.get('socket_timeout')
1318 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1320 opts_cookiefile = self.params.get('cookiefile')
1321 opts_proxy = self.params.get('proxy')
1323 if opts_cookiefile is None:
1324 self.cookiejar = compat_cookiejar.CookieJar()
1326 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1328 if os.access(opts_cookiefile, os.R_OK):
1329 self.cookiejar.load()
1331 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1333 if opts_proxy is not None:
1334 if opts_proxy == '':
1337 proxies = {'http': opts_proxy, 'https': opts_proxy}
1339 proxies = compat_urllib_request.getproxies()
1340 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1341 if 'http' in proxies and 'https' not in proxies:
1342 proxies['https'] = proxies['http']
1343 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1345 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1346 https_handler = make_HTTPS_handler(
1347 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1348 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1349 opener = compat_urllib_request.build_opener(
1350 https_handler, proxy_handler, cookie_processor, ydlh)
1351 # Delete the default user-agent header, which would otherwise apply in
1352 # cases where our custom HTTP handler doesn't come into play
1353 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1354 opener.addheaders = []
1355 self._opener = opener
1357 def encode(self, s):
1358 if isinstance(s, bytes):
1359 return s # Already encoded
1362 return s.encode(self.get_encoding())
1363 except UnicodeEncodeError as err:
1364 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1367 def get_encoding(self):
1368 encoding = self.params.get('encoding')
1369 if encoding is None:
1370 encoding = preferredencoding()