2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
61 UnavailableVideoError,
71 from .cache import Cache
72 from .extractor import get_info_extractor, gen_extractors
73 from .downloader import get_suitable_downloader
74 from .downloader.rtmp import rtmpdump_version
75 from .postprocessor import (
77 FFmpegFixupStretchedPP,
82 from .version import __version__
85 class YoutubeDL(object):
88 YoutubeDL objects are the ones responsible of downloading the
89 actual video file and writing it to disk if the user has requested
90 it, among some other tasks. In most cases there should be one per
91 program. As, given a video URL, the downloader doesn't know how to
92 extract all the needed information, task that InfoExtractors do, it
93 has to pass the URL to one of them.
95 For this, YoutubeDL objects have a method that allows
96 InfoExtractors to be registered in a given order. When it is passed
97 a URL, the YoutubeDL object handles it to the first InfoExtractor it
98 finds that reports being able to handle it. The InfoExtractor extracts
99 all the information about the video or videos the URL refers to, and
100 YoutubeDL process the extracted information, possibly using a File
101 Downloader to download the video.
103 YoutubeDL objects accept a lot of parameters. In order not to saturate
104 the object constructor with arguments, it receives a dictionary of
105 options instead. These options are available through the params
106 attribute for the InfoExtractors to use. The YoutubeDL also
107 registers itself as the downloader in charge for the InfoExtractors
108 that are added to it, so this is a "mutual registration".
112 username: Username for authentication purposes.
113 password: Password for authentication purposes.
114 videopassword: Password for acces a video.
115 usenetrc: Use netrc for authentication instead.
116 verbose: Print additional info to stdout.
117 quiet: Do not print messages to stdout.
118 no_warnings: Do not print out anything for warnings.
119 forceurl: Force printing final URL.
120 forcetitle: Force printing title.
121 forceid: Force printing ID.
122 forcethumbnail: Force printing thumbnail URL.
123 forcedescription: Force printing description.
124 forcefilename: Force printing final filename.
125 forceduration: Force printing duration.
126 forcejson: Force printing info_dict as JSON.
127 dump_single_json: Force printing the info_dict of the whole playlist
128 (or video) as a single JSON line.
129 simulate: Do not download the video files.
130 format: Video format code. See options.py for more information.
131 format_limit: Highest quality format to try.
132 outtmpl: Template for output names.
133 restrictfilenames: Do not allow "&" and spaces in file names
134 ignoreerrors: Do not stop on download errors.
135 nooverwrites: Prevent overwriting files.
136 playliststart: Playlist item to start at.
137 playlistend: Playlist item to end at.
138 playlistreverse: Download playlist items in reverse order.
139 matchtitle: Download only matching titles.
140 rejecttitle: Reject downloads for matching titles.
141 logger: Log messages to a logging.Logger instance.
142 logtostderr: Log messages to stderr instead of stdout.
143 writedescription: Write the video description to a .description file
144 writeinfojson: Write the video description to a .info.json file
145 writeannotations: Write the video annotations to a .annotations.xml file
146 writethumbnail: Write the thumbnail image to a file
147 writesubtitles: Write the video subtitles to a file
148 writeautomaticsub: Write the automatic subtitles to a file
149 allsubtitles: Downloads all the subtitles of the video
150 (requires writesubtitles or writeautomaticsub)
151 listsubtitles: Lists all available subtitles for the video
152 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
153 subtitleslangs: List of languages of the subtitles to download
154 keepvideo: Keep the video file after post-processing
155 daterange: A DateRange object, download only if the upload_date is in the range.
156 skip_download: Skip the actual download of the video file
157 cachedir: Location of the cache files in the filesystem.
158 False to disable filesystem cache.
159 noplaylist: Download single video instead of a playlist if in doubt.
160 age_limit: An integer representing the user's age in years.
161 Unsuitable videos for the given age are skipped.
162 min_views: An integer representing the minimum view count the video
163 must have in order to not be skipped.
164 Videos without view count information are always
165 downloaded. None for no limit.
166 max_views: An integer representing the maximum view count.
167 Videos that are more popular than that are not
169 Videos without view count information are always
170 downloaded. None for no limit.
171 download_archive: File name of a file where all downloads are recorded.
172 Videos already present in the file are not downloaded
174 cookiefile: File name where cookies should be read from and dumped to.
175 nocheckcertificate:Do not verify SSL certificates
176 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
177 At the moment, this is only supported by YouTube.
178 proxy: URL of the proxy server to use
179 socket_timeout: Time to wait for unresponsive hosts, in seconds
180 bidi_workaround: Work around buggy terminals without bidirectional text
181 support, using fridibi
182 debug_printtraffic:Print out sent and received HTTP traffic
183 include_ads: Download ads as well
184 default_search: Prepend this string if an input url is not valid.
185 'auto' for elaborate guessing
186 encoding: Use this encoding instead of the system-specified.
187 extract_flat: Do not resolve URLs, return the immediate result.
188 Pass in 'in_playlist' to only show this behavior for
190 postprocessors: A list of dictionaries, each with an entry
191 * key: The name of the postprocessor. See
192 youtube_dl/postprocessor/__init__.py for a list.
193 as well as any further keyword arguments for the
195 progress_hooks: A list of functions that get called on download
196 progress, with a dictionary with the entries
197 * filename: The final filename
198 * status: One of "downloading" and "finished"
200 The dict may also have some of the following entries:
202 * downloaded_bytes: Bytes on disk
203 * total_bytes: Size of the whole file, None if unknown
204 * tmpfilename: The filename we're currently writing to
205 * eta: The estimated time in seconds, None if unknown
206 * speed: The download speed in bytes/second, None if
209 Progress hooks are guaranteed to be called at least once
210 (with status "finished") if the download is successful.
211 merge_output_format: Extension to use when merging formats.
212 fixup: Automatically correct known faults of the file.
214 - "never": do nothing
215 - "warn": only emit a warning
216 - "detect_or_warn": check whether we can do anything
217 about it, warn otherwise (default)
218 source_address: (Experimental) Client-side IP address to bind to.
219 call_home: Boolean, true iff we are allowed to contact the
220 youtube-dl servers for debugging.
221 sleep_interval: Number of seconds to sleep before each download.
224 The following parameters are not used by YoutubeDL itself, they are used by
226 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
227 noresizebuffer, retries, continuedl, noprogress, consoletitle
229 The following options are used by the post processors:
230 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
231 otherwise prefer avconv.
232 exec_cmd: Arbitrary command to run after downloading
238 _download_retcode = None
239 _num_downloads = None
242 def __init__(self, params=None, auto_init=True):
243 """Create a FileDownloader object with the given options."""
247 self._ies_instances = {}
249 self._progress_hooks = []
250 self._download_retcode = 0
251 self._num_downloads = 0
252 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
253 self._err_file = sys.stderr
255 self.cache = Cache(self)
257 if params.get('bidi_workaround', False):
260 master, slave = pty.openpty()
261 width = get_term_width()
265 width_args = ['-w', str(width)]
267 stdin=subprocess.PIPE,
269 stderr=self._err_file)
271 self._output_process = subprocess.Popen(
272 ['bidiv'] + width_args, **sp_kwargs
275 self._output_process = subprocess.Popen(
276 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
277 self._output_channel = os.fdopen(master, 'rb')
278 except OSError as ose:
280 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
284 if (sys.version_info >= (3,) and sys.platform != 'win32' and
285 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
286 and not params.get('restrictfilenames', False)):
287 # On Python 3, the Unicode filesystem API will throw errors (#1474)
289 'Assuming --restrict-filenames since file system encoding '
290 'cannot encode all characters. '
291 'Set the LC_ALL environment variable to fix this.')
292 self.params['restrictfilenames'] = True
294 if '%(stitle)s' in self.params.get('outtmpl', ''):
295 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
300 self.print_debug_header()
301 self.add_default_info_extractors()
303 for pp_def_raw in self.params.get('postprocessors', []):
304 pp_class = get_postprocessor(pp_def_raw['key'])
305 pp_def = dict(pp_def_raw)
307 pp = pp_class(self, **compat_kwargs(pp_def))
308 self.add_post_processor(pp)
310 for ph in self.params.get('progress_hooks', []):
311 self.add_progress_hook(ph)
313 def warn_if_short_id(self, argv):
314 # short YouTube ID starting with dash?
316 i for i, a in enumerate(argv)
317 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
321 [a for i, a in enumerate(argv) if i not in idxs] +
322 ['--'] + [argv[i] for i in idxs]
325 'Long argument string detected. '
326 'Use -- to separate parameters and URLs, like this:\n%s\n' %
327 args_to_str(correct_argv))
329 def add_info_extractor(self, ie):
330 """Add an InfoExtractor object to the end of the list."""
332 self._ies_instances[ie.ie_key()] = ie
333 ie.set_downloader(self)
335 def get_info_extractor(self, ie_key):
337 Get an instance of an IE with name ie_key, it will try to get one from
338 the _ies list, if there's no instance it will create a new one and add
339 it to the extractor list.
341 ie = self._ies_instances.get(ie_key)
343 ie = get_info_extractor(ie_key)()
344 self.add_info_extractor(ie)
347 def add_default_info_extractors(self):
349 Add the InfoExtractors returned by gen_extractors to the end of the list
351 for ie in gen_extractors():
352 self.add_info_extractor(ie)
354 def add_post_processor(self, pp):
355 """Add a PostProcessor object to the end of the chain."""
357 pp.set_downloader(self)
359 def add_progress_hook(self, ph):
360 """Add the progress hook (currently only for the file downloader)"""
361 self._progress_hooks.append(ph)
363 def _bidi_workaround(self, message):
364 if not hasattr(self, '_output_channel'):
367 assert hasattr(self, '_output_process')
368 assert isinstance(message, compat_str)
369 line_count = message.count('\n') + 1
370 self._output_process.stdin.write((message + '\n').encode('utf-8'))
371 self._output_process.stdin.flush()
372 res = ''.join(self._output_channel.readline().decode('utf-8')
373 for _ in range(line_count))
374 return res[:-len('\n')]
376 def to_screen(self, message, skip_eol=False):
377 """Print message to stdout if not in quiet mode."""
378 return self.to_stdout(message, skip_eol, check_quiet=True)
380 def _write_string(self, s, out=None):
381 write_string(s, out=out, encoding=self.params.get('encoding'))
383 def to_stdout(self, message, skip_eol=False, check_quiet=False):
384 """Print message to stdout if not in quiet mode."""
385 if self.params.get('logger'):
386 self.params['logger'].debug(message)
387 elif not check_quiet or not self.params.get('quiet', False):
388 message = self._bidi_workaround(message)
389 terminator = ['\n', ''][skip_eol]
390 output = message + terminator
392 self._write_string(output, self._screen_file)
394 def to_stderr(self, message):
395 """Print message to stderr."""
396 assert isinstance(message, compat_str)
397 if self.params.get('logger'):
398 self.params['logger'].error(message)
400 message = self._bidi_workaround(message)
401 output = message + '\n'
402 self._write_string(output, self._err_file)
404 def to_console_title(self, message):
405 if not self.params.get('consoletitle', False):
407 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
408 # c_wchar_p() might not be necessary if `message` is
409 # already of type unicode()
410 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
411 elif 'TERM' in os.environ:
412 self._write_string('\033]0;%s\007' % message, self._screen_file)
414 def save_console_title(self):
415 if not self.params.get('consoletitle', False):
417 if 'TERM' in os.environ:
418 # Save the title on stack
419 self._write_string('\033[22;0t', self._screen_file)
421 def restore_console_title(self):
422 if not self.params.get('consoletitle', False):
424 if 'TERM' in os.environ:
425 # Restore the title from stack
426 self._write_string('\033[23;0t', self._screen_file)
429 self.save_console_title()
432 def __exit__(self, *args):
433 self.restore_console_title()
435 if self.params.get('cookiefile') is not None:
436 self.cookiejar.save()
438 def trouble(self, message=None, tb=None):
439 """Determine action to take when a download problem appears.
441 Depending on if the downloader has been configured to ignore
442 download errors or not, this method may throw an exception or
443 not when errors are found, after printing the message.
445 tb, if given, is additional traceback information.
447 if message is not None:
448 self.to_stderr(message)
449 if self.params.get('verbose'):
451 if sys.exc_info()[0]: # if .trouble has been called from an except block
453 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
454 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
455 tb += compat_str(traceback.format_exc())
457 tb_data = traceback.format_list(traceback.extract_stack())
458 tb = ''.join(tb_data)
460 if not self.params.get('ignoreerrors', False):
461 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
462 exc_info = sys.exc_info()[1].exc_info
464 exc_info = sys.exc_info()
465 raise DownloadError(message, exc_info)
466 self._download_retcode = 1
468 def report_warning(self, message):
470 Print the message to stderr, it will be prefixed with 'WARNING:'
471 If stderr is a tty file the 'WARNING:' will be colored
473 if self.params.get('logger') is not None:
474 self.params['logger'].warning(message)
476 if self.params.get('no_warnings'):
478 if self._err_file.isatty() and os.name != 'nt':
479 _msg_header = '\033[0;33mWARNING:\033[0m'
481 _msg_header = 'WARNING:'
482 warning_message = '%s %s' % (_msg_header, message)
483 self.to_stderr(warning_message)
485 def report_error(self, message, tb=None):
487 Do the same as trouble, but prefixes the message with 'ERROR:', colored
488 in red if stderr is a tty file.
490 if self._err_file.isatty() and os.name != 'nt':
491 _msg_header = '\033[0;31mERROR:\033[0m'
493 _msg_header = 'ERROR:'
494 error_message = '%s %s' % (_msg_header, message)
495 self.trouble(error_message, tb)
497 def report_file_already_downloaded(self, file_name):
498 """Report file has already been fully downloaded."""
500 self.to_screen('[download] %s has already been downloaded' % file_name)
501 except UnicodeEncodeError:
502 self.to_screen('[download] The file has already been downloaded')
504 def prepare_filename(self, info_dict):
505 """Generate the output filename."""
507 template_dict = dict(info_dict)
509 template_dict['epoch'] = int(time.time())
510 autonumber_size = self.params.get('autonumber_size')
511 if autonumber_size is None:
513 autonumber_templ = '%0' + str(autonumber_size) + 'd'
514 template_dict['autonumber'] = autonumber_templ % self._num_downloads
515 if template_dict.get('playlist_index') is not None:
516 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
517 if template_dict.get('resolution') is None:
518 if template_dict.get('width') and template_dict.get('height'):
519 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
520 elif template_dict.get('height'):
521 template_dict['resolution'] = '%sp' % template_dict['height']
522 elif template_dict.get('width'):
523 template_dict['resolution'] = '?x%d' % template_dict['width']
525 sanitize = lambda k, v: sanitize_filename(
527 restricted=self.params.get('restrictfilenames'),
529 template_dict = dict((k, sanitize(k, v))
530 for k, v in template_dict.items()
532 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
534 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
535 tmpl = compat_expanduser(outtmpl)
536 filename = tmpl % template_dict
538 except ValueError as err:
539 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
542 def _match_entry(self, info_dict):
543 """ Returns None iff the file should be downloaded """
545 video_title = info_dict.get('title', info_dict.get('id', 'video'))
546 if 'title' in info_dict:
547 # This can happen when we're just evaluating the playlist
548 title = info_dict['title']
549 matchtitle = self.params.get('matchtitle', False)
551 if not re.search(matchtitle, title, re.IGNORECASE):
552 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
553 rejecttitle = self.params.get('rejecttitle', False)
555 if re.search(rejecttitle, title, re.IGNORECASE):
556 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
557 date = info_dict.get('upload_date', None)
559 dateRange = self.params.get('daterange', DateRange())
560 if date not in dateRange:
561 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
562 view_count = info_dict.get('view_count', None)
563 if view_count is not None:
564 min_views = self.params.get('min_views')
565 if min_views is not None and view_count < min_views:
566 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
567 max_views = self.params.get('max_views')
568 if max_views is not None and view_count > max_views:
569 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
570 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
571 return 'Skipping "%s" because it is age restricted' % title
572 if self.in_download_archive(info_dict):
573 return '%s has already been recorded in archive' % video_title
577 def add_extra_info(info_dict, extra_info):
578 '''Set the keys from extra_info in info dict if they are missing'''
579 for key, value in extra_info.items():
580 info_dict.setdefault(key, value)
582 def extract_info(self, url, download=True, ie_key=None, extra_info={},
585 Returns a list with a dictionary for each video we find.
586 If 'download', also downloads the videos.
587 extra_info is a dict containing the extra values to add to each result
591 ies = [self.get_info_extractor(ie_key)]
596 if not ie.suitable(url):
600 self.report_warning('The program functionality for this site has been marked as broken, '
601 'and will probably not work.')
604 ie_result = ie.extract(url)
605 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
607 if isinstance(ie_result, list):
608 # Backwards compatibility: old IE result format
610 '_type': 'compat_list',
611 'entries': ie_result,
613 self.add_default_extra_info(ie_result, ie, url)
615 return self.process_ie_result(ie_result, download, extra_info)
618 except ExtractorError as de: # An error we somewhat expected
619 self.report_error(compat_str(de), de.format_traceback())
621 except MaxDownloadsReached:
623 except Exception as e:
624 if self.params.get('ignoreerrors', False):
625 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
630 self.report_error('no suitable InfoExtractor for URL %s' % url)
632 def add_default_extra_info(self, ie_result, ie, url):
633 self.add_extra_info(ie_result, {
634 'extractor': ie.IE_NAME,
636 'webpage_url_basename': url_basename(url),
637 'extractor_key': ie.ie_key(),
640 def process_ie_result(self, ie_result, download=True, extra_info={}):
642 Take the result of the ie(may be modified) and resolve all unresolved
643 references (URLs, playlist items).
645 It will also download the videos if 'download'.
646 Returns the resolved ie_result.
649 result_type = ie_result.get('_type', 'video')
651 if result_type in ('url', 'url_transparent'):
652 extract_flat = self.params.get('extract_flat', False)
653 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
654 extract_flat is True):
655 if self.params.get('forcejson', False):
656 self.to_stdout(json.dumps(ie_result))
659 if result_type == 'video':
660 self.add_extra_info(ie_result, extra_info)
661 return self.process_video_result(ie_result, download=download)
662 elif result_type == 'url':
663 # We have to add extra_info to the results because it may be
664 # contained in a playlist
665 return self.extract_info(ie_result['url'],
667 ie_key=ie_result.get('ie_key'),
668 extra_info=extra_info)
669 elif result_type == 'url_transparent':
670 # Use the information from the embedding page
671 info = self.extract_info(
672 ie_result['url'], ie_key=ie_result.get('ie_key'),
673 extra_info=extra_info, download=False, process=False)
675 force_properties = dict(
676 (k, v) for k, v in ie_result.items() if v is not None)
677 for f in ('_type', 'url'):
678 if f in force_properties:
679 del force_properties[f]
680 new_result = info.copy()
681 new_result.update(force_properties)
683 assert new_result.get('_type') != 'url_transparent'
685 return self.process_ie_result(
686 new_result, download=download, extra_info=extra_info)
687 elif result_type == 'playlist' or result_type == 'multi_video':
688 # We process each entry in the playlist
689 playlist = ie_result.get('title', None) or ie_result.get('id', None)
690 self.to_screen('[download] Downloading playlist: %s' % playlist)
692 playlist_results = []
694 playliststart = self.params.get('playliststart', 1) - 1
695 playlistend = self.params.get('playlistend', None)
696 # For backwards compatibility, interpret -1 as whole list
697 if playlistend == -1:
700 ie_entries = ie_result['entries']
701 if isinstance(ie_entries, list):
702 n_all_entries = len(ie_entries)
703 entries = ie_entries[playliststart:playlistend]
704 n_entries = len(entries)
706 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
707 (ie_result['extractor'], playlist, n_all_entries, n_entries))
708 elif isinstance(ie_entries, PagedList):
709 entries = ie_entries.getslice(
710 playliststart, playlistend)
711 n_entries = len(entries)
713 "[%s] playlist %s: Downloading %d videos" %
714 (ie_result['extractor'], playlist, n_entries))
716 entries = list(itertools.islice(
717 ie_entries, playliststart, playlistend))
718 n_entries = len(entries)
720 "[%s] playlist %s: Downloading %d videos" %
721 (ie_result['extractor'], playlist, n_entries))
723 if self.params.get('playlistreverse', False):
724 entries = entries[::-1]
726 for i, entry in enumerate(entries, 1):
727 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
729 'n_entries': n_entries,
730 'playlist': playlist,
731 'playlist_id': ie_result.get('id'),
732 'playlist_title': ie_result.get('title'),
733 'playlist_index': i + playliststart,
734 'extractor': ie_result['extractor'],
735 'webpage_url': ie_result['webpage_url'],
736 'webpage_url_basename': url_basename(ie_result['webpage_url']),
737 'extractor_key': ie_result['extractor_key'],
740 reason = self._match_entry(entry)
741 if reason is not None:
742 self.to_screen('[download] ' + reason)
745 entry_result = self.process_ie_result(entry,
748 playlist_results.append(entry_result)
749 ie_result['entries'] = playlist_results
751 elif result_type == 'compat_list':
753 'Extractor %s returned a compat_list result. '
754 'It needs to be updated.' % ie_result.get('extractor'))
760 'extractor': ie_result['extractor'],
761 'webpage_url': ie_result['webpage_url'],
762 'webpage_url_basename': url_basename(ie_result['webpage_url']),
763 'extractor_key': ie_result['extractor_key'],
767 ie_result['entries'] = [
768 self.process_ie_result(_fixup(r), download, extra_info)
769 for r in ie_result['entries']
773 raise Exception('Invalid result type: %s' % result_type)
775 def _apply_format_filter(self, format_spec, available_formats):
776 " Returns a tuple of the remaining format_spec and filtered formats "
786 operator_rex = re.compile(r'''(?x)\s*\[
787 (?P<key>width|height|tbr|abr|vbr|filesize)
788 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
789 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
791 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
792 m = operator_rex.search(format_spec)
794 raise ValueError('Invalid format specification %r' % format_spec)
797 comparison_value = int(m.group('value'))
799 comparison_value = parse_filesize(m.group('value'))
800 if comparison_value is None:
801 comparison_value = parse_filesize(m.group('value') + 'B')
802 if comparison_value is None:
804 'Invalid value %r in format specification %r' % (
805 m.group('value'), format_spec))
806 op = OPERATORS[m.group('op')]
809 actual_value = f.get(m.group('key'))
810 if actual_value is None:
811 return m.group('none_inclusive')
812 return op(actual_value, comparison_value)
813 new_formats = [f for f in available_formats if _filter(f)]
815 new_format_spec = format_spec[:-len(m.group(0))]
816 if not new_format_spec:
817 new_format_spec = 'best'
819 return (new_format_spec, new_formats)
821 def select_format(self, format_spec, available_formats):
822 while format_spec.endswith(']'):
823 format_spec, available_formats = self._apply_format_filter(
824 format_spec, available_formats)
825 if not available_formats:
828 if format_spec == 'best' or format_spec is None:
829 return available_formats[-1]
830 elif format_spec == 'worst':
831 return available_formats[0]
832 elif format_spec == 'bestaudio':
834 f for f in available_formats
835 if f.get('vcodec') == 'none']
837 return audio_formats[-1]
838 elif format_spec == 'worstaudio':
840 f for f in available_formats
841 if f.get('vcodec') == 'none']
843 return audio_formats[0]
844 elif format_spec == 'bestvideo':
846 f for f in available_formats
847 if f.get('acodec') == 'none']
849 return video_formats[-1]
850 elif format_spec == 'worstvideo':
852 f for f in available_formats
853 if f.get('acodec') == 'none']
855 return video_formats[0]
857 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
858 if format_spec in extensions:
859 filter_f = lambda f: f['ext'] == format_spec
861 filter_f = lambda f: f['format_id'] == format_spec
862 matches = list(filter(filter_f, available_formats))
867 def process_video_result(self, info_dict, download=True):
868 assert info_dict.get('_type', 'video') == 'video'
870 if 'id' not in info_dict:
871 raise ExtractorError('Missing "id" field in extractor result')
872 if 'title' not in info_dict:
873 raise ExtractorError('Missing "title" field in extractor result')
875 if 'playlist' not in info_dict:
876 # It isn't part of a playlist
877 info_dict['playlist'] = None
878 info_dict['playlist_index'] = None
880 thumbnails = info_dict.get('thumbnails')
882 thumbnails.sort(key=lambda t: (
883 t.get('width'), t.get('height'), t.get('url')))
885 if 'width' in t and 'height' in t:
886 t['resolution'] = '%dx%d' % (t['width'], t['height'])
888 if thumbnails and 'thumbnail' not in info_dict:
889 info_dict['thumbnail'] = thumbnails[-1]['url']
891 if 'display_id' not in info_dict and 'id' in info_dict:
892 info_dict['display_id'] = info_dict['id']
894 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
895 # Working around negative timestamps in Windows
896 # (see http://bugs.python.org/issue1646728)
897 if info_dict['timestamp'] < 0 and os.name == 'nt':
898 info_dict['timestamp'] = 0
899 upload_date = datetime.datetime.utcfromtimestamp(
900 info_dict['timestamp'])
901 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
903 # This extractors handle format selection themselves
904 if info_dict['extractor'] in ['Youku']:
906 self.process_info(info_dict)
909 # We now pick which formats have to be downloaded
910 if info_dict.get('formats') is None:
911 # There's only one format available
912 formats = [info_dict]
914 formats = info_dict['formats']
917 raise ExtractorError('No video formats found!')
919 # We check that all the formats have the format and format_id fields
920 for i, format in enumerate(formats):
921 if 'url' not in format:
922 raise ExtractorError('Missing "url" key in result (index %d)' % i)
924 if format.get('format_id') is None:
925 format['format_id'] = compat_str(i)
926 if format.get('format') is None:
927 format['format'] = '{id} - {res}{note}'.format(
928 id=format['format_id'],
929 res=self.format_resolution(format),
930 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
932 # Automatically determine file extension if missing
933 if 'ext' not in format:
934 format['ext'] = determine_ext(format['url']).lower()
936 format_limit = self.params.get('format_limit', None)
938 formats = list(takewhile_inclusive(
939 lambda f: f['format_id'] != format_limit, formats
942 # TODO Central sorting goes here
944 if formats[0] is not info_dict:
945 # only set the 'formats' fields if the original info_dict list them
946 # otherwise we end up with a circular reference, the first (and unique)
947 # element in the 'formats' field in info_dict is info_dict itself,
948 # wich can't be exported to json
949 info_dict['formats'] = formats
950 if self.params.get('listformats', None):
951 self.list_formats(info_dict)
954 req_format = self.params.get('format')
955 if req_format is None:
957 formats_to_download = []
958 # The -1 is for supporting YoutubeIE
959 if req_format in ('-1', 'all'):
960 formats_to_download = formats
962 for rfstr in req_format.split(','):
963 # We can accept formats requested in the format: 34/5/best, we pick
964 # the first that is available, starting from left
965 req_formats = rfstr.split('/')
966 for rf in req_formats:
967 if re.match(r'.+?\+.+?', rf) is not None:
968 # Two formats have been requested like '137+139'
969 format_1, format_2 = rf.split('+')
970 formats_info = (self.select_format(format_1, formats),
971 self.select_format(format_2, formats))
972 if all(formats_info):
973 # The first format must contain the video and the
975 if formats_info[0].get('vcodec') == 'none':
976 self.report_error('The first format must '
977 'contain the video, try using '
978 '"-f %s+%s"' % (format_2, format_1))
981 formats_info[0]['ext']
982 if self.params.get('merge_output_format') is None
983 else self.params['merge_output_format'])
985 'requested_formats': formats_info,
987 'ext': formats_info[0]['ext'],
988 'width': formats_info[0].get('width'),
989 'height': formats_info[0].get('height'),
990 'resolution': formats_info[0].get('resolution'),
991 'fps': formats_info[0].get('fps'),
992 'vcodec': formats_info[0].get('vcodec'),
993 'vbr': formats_info[0].get('vbr'),
994 'stretched_ratio': formats_info[0].get('stretched_ratio'),
995 'acodec': formats_info[1].get('acodec'),
996 'abr': formats_info[1].get('abr'),
1000 selected_format = None
1002 selected_format = self.select_format(rf, formats)
1003 if selected_format is not None:
1004 formats_to_download.append(selected_format)
1006 if not formats_to_download:
1007 raise ExtractorError('requested format not available',
1011 if len(formats_to_download) > 1:
1012 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1013 for format in formats_to_download:
1014 new_info = dict(info_dict)
1015 new_info.update(format)
1016 self.process_info(new_info)
1017 # We update the info dict with the best quality format (backwards compatibility)
1018 info_dict.update(formats_to_download[-1])
1021 def process_info(self, info_dict):
1022 """Process a single resolved IE result."""
1024 assert info_dict.get('_type', 'video') == 'video'
1026 max_downloads = self.params.get('max_downloads')
1027 if max_downloads is not None:
1028 if self._num_downloads >= int(max_downloads):
1029 raise MaxDownloadsReached()
1031 info_dict['fulltitle'] = info_dict['title']
1032 if len(info_dict['title']) > 200:
1033 info_dict['title'] = info_dict['title'][:197] + '...'
1035 # Keep for backwards compatibility
1036 info_dict['stitle'] = info_dict['title']
1038 if 'format' not in info_dict:
1039 info_dict['format'] = info_dict['ext']
1041 reason = self._match_entry(info_dict)
1042 if reason is not None:
1043 self.to_screen('[download] ' + reason)
1046 self._num_downloads += 1
1048 filename = self.prepare_filename(info_dict)
1051 if self.params.get('forcetitle', False):
1052 self.to_stdout(info_dict['fulltitle'])
1053 if self.params.get('forceid', False):
1054 self.to_stdout(info_dict['id'])
1055 if self.params.get('forceurl', False):
1056 if info_dict.get('requested_formats') is not None:
1057 for f in info_dict['requested_formats']:
1058 self.to_stdout(f['url'] + f.get('play_path', ''))
1060 # For RTMP URLs, also include the playpath
1061 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1062 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1063 self.to_stdout(info_dict['thumbnail'])
1064 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1065 self.to_stdout(info_dict['description'])
1066 if self.params.get('forcefilename', False) and filename is not None:
1067 self.to_stdout(filename)
1068 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1069 self.to_stdout(formatSeconds(info_dict['duration']))
1070 if self.params.get('forceformat', False):
1071 self.to_stdout(info_dict['format'])
1072 if self.params.get('forcejson', False):
1073 info_dict['_filename'] = filename
1074 self.to_stdout(json.dumps(info_dict))
1075 if self.params.get('dump_single_json', False):
1076 info_dict['_filename'] = filename
1078 # Do nothing else if in simulate mode
1079 if self.params.get('simulate', False):
1082 if filename is None:
1086 dn = os.path.dirname(encodeFilename(filename))
1087 if dn and not os.path.exists(dn):
1089 except (OSError, IOError) as err:
1090 self.report_error('unable to create directory ' + compat_str(err))
1093 if self.params.get('writedescription', False):
1094 descfn = filename + '.description'
1095 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1096 self.to_screen('[info] Video description is already present')
1097 elif info_dict.get('description') is None:
1098 self.report_warning('There\'s no description to write.')
1101 self.to_screen('[info] Writing video description to: ' + descfn)
1102 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1103 descfile.write(info_dict['description'])
1104 except (OSError, IOError):
1105 self.report_error('Cannot write description file ' + descfn)
1108 if self.params.get('writeannotations', False):
1109 annofn = filename + '.annotations.xml'
1110 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1111 self.to_screen('[info] Video annotations are already present')
1114 self.to_screen('[info] Writing video annotations to: ' + annofn)
1115 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1116 annofile.write(info_dict['annotations'])
1117 except (KeyError, TypeError):
1118 self.report_warning('There are no annotations to write.')
1119 except (OSError, IOError):
1120 self.report_error('Cannot write annotations file: ' + annofn)
1123 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1124 self.params.get('writeautomaticsub')])
1126 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1127 # subtitles download errors are already managed as troubles in relevant IE
1128 # that way it will silently go on when used with unsupporting IE
1129 subtitles = info_dict['subtitles']
1130 sub_format = self.params.get('subtitlesformat', 'srt')
1131 for sub_lang in subtitles.keys():
1132 sub = subtitles[sub_lang]
1136 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1137 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1138 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1140 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1141 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1143 except (OSError, IOError):
1144 self.report_error('Cannot write subtitles file ' + sub_filename)
1147 if self.params.get('writeinfojson', False):
1148 infofn = os.path.splitext(filename)[0] + '.info.json'
1149 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1150 self.to_screen('[info] Video description metadata is already present')
1152 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1154 write_json_file(info_dict, infofn)
1155 except (OSError, IOError):
1156 self.report_error('Cannot write metadata to JSON file ' + infofn)
1159 if self.params.get('writethumbnail', False):
1160 if info_dict.get('thumbnail') is not None:
1161 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1162 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1163 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1164 self.to_screen('[%s] %s: Thumbnail is already present' %
1165 (info_dict['extractor'], info_dict['id']))
1167 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1168 (info_dict['extractor'], info_dict['id']))
1170 uf = self.urlopen(info_dict['thumbnail'])
1171 with open(thumb_filename, 'wb') as thumbf:
1172 shutil.copyfileobj(uf, thumbf)
1173 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1174 (info_dict['extractor'], info_dict['id'], thumb_filename))
1175 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1176 self.report_warning('Unable to download thumbnail "%s": %s' %
1177 (info_dict['thumbnail'], compat_str(err)))
1179 if not self.params.get('skip_download', False):
1182 fd = get_suitable_downloader(info, self.params)(self, self.params)
1183 for ph in self._progress_hooks:
1184 fd.add_progress_hook(ph)
1185 if self.params.get('verbose'):
1186 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1187 return fd.download(name, info)
1188 if info_dict.get('requested_formats') is not None:
1191 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1192 if not merger._executable:
1194 self.report_warning('You have requested multiple '
1195 'formats but ffmpeg or avconv are not installed.'
1196 ' The formats won\'t be merged')
1198 postprocessors = [merger]
1199 for f in info_dict['requested_formats']:
1200 new_info = dict(info_dict)
1202 fname = self.prepare_filename(new_info)
1203 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1204 downloaded.append(fname)
1205 partial_success = dl(fname, new_info)
1206 success = success and partial_success
1207 info_dict['__postprocessors'] = postprocessors
1208 info_dict['__files_to_merge'] = downloaded
1210 # Just a single file
1211 success = dl(filename, info_dict)
1212 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1213 self.report_error('unable to download video data: %s' % str(err))
1215 except (OSError, IOError) as err:
1216 raise UnavailableVideoError(err)
1217 except (ContentTooShortError, ) as err:
1218 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1223 fixup_policy = self.params.get('fixup')
1224 if fixup_policy is None:
1225 fixup_policy = 'detect_or_warn'
1227 stretched_ratio = info_dict.get('stretched_ratio')
1228 if stretched_ratio is not None and stretched_ratio != 1:
1229 if fixup_policy == 'warn':
1230 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1231 info_dict['id'], stretched_ratio))
1232 elif fixup_policy == 'detect_or_warn':
1233 stretched_pp = FFmpegFixupStretchedPP(self)
1234 if stretched_pp.available:
1235 info_dict.setdefault('__postprocessors', [])
1236 info_dict['__postprocessors'].append(stretched_pp)
1238 self.report_warning(
1239 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1240 info_dict['id'], stretched_ratio))
1242 assert fixup_policy in ('ignore', 'never')
1244 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1245 if fixup_policy == 'warn':
1246 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1248 elif fixup_policy == 'detect_or_warn':
1249 fixup_pp = FFmpegFixupM4aPP(self)
1250 if fixup_pp.available:
1251 info_dict.setdefault('__postprocessors', [])
1252 info_dict['__postprocessors'].append(fixup_pp)
1254 self.report_warning(
1255 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1258 assert fixup_policy in ('ignore', 'never')
1261 self.post_process(filename, info_dict)
1262 except (PostProcessingError) as err:
1263 self.report_error('postprocessing: %s' % str(err))
1265 self.record_download_archive(info_dict)
1267 def download(self, url_list):
1268 """Download a given list of URLs."""
1269 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1270 if (len(url_list) > 1 and
1272 and self.params.get('max_downloads') != 1):
1273 raise SameFileError(outtmpl)
1275 for url in url_list:
1277 # It also downloads the videos
1278 res = self.extract_info(url)
1279 except UnavailableVideoError:
1280 self.report_error('unable to download video')
1281 except MaxDownloadsReached:
1282 self.to_screen('[info] Maximum number of downloaded files reached.')
1285 if self.params.get('dump_single_json', False):
1286 self.to_stdout(json.dumps(res))
1288 return self._download_retcode
1290 def download_with_info_file(self, info_filename):
1291 with io.open(info_filename, 'r', encoding='utf-8') as f:
1294 self.process_ie_result(info, download=True)
1295 except DownloadError:
1296 webpage_url = info.get('webpage_url')
1297 if webpage_url is not None:
1298 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1299 return self.download([webpage_url])
1302 return self._download_retcode
1304 def post_process(self, filename, ie_info):
1305 """Run all the postprocessors on the given file."""
1306 info = dict(ie_info)
1307 info['filepath'] = filename
1309 if ie_info.get('__postprocessors') is not None:
1310 pps_chain.extend(ie_info['__postprocessors'])
1311 pps_chain.extend(self._pps)
1312 for pp in pps_chain:
1314 old_filename = info['filepath']
1316 keep_video_wish, info = pp.run(info)
1317 if keep_video_wish is not None:
1319 keep_video = keep_video_wish
1320 elif keep_video is None:
1321 # No clear decision yet, let IE decide
1322 keep_video = keep_video_wish
1323 except PostProcessingError as e:
1324 self.report_error(e.msg)
1325 if keep_video is False and not self.params.get('keepvideo', False):
1327 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1328 os.remove(encodeFilename(old_filename))
1329 except (IOError, OSError):
1330 self.report_warning('Unable to remove downloaded video file')
1332 def _make_archive_id(self, info_dict):
1333 # Future-proof against any change in case
1334 # and backwards compatibility with prior versions
1335 extractor = info_dict.get('extractor_key')
1336 if extractor is None:
1337 if 'id' in info_dict:
1338 extractor = info_dict.get('ie_key') # key in a playlist
1339 if extractor is None:
1340 return None # Incomplete video information
1341 return extractor.lower() + ' ' + info_dict['id']
1343 def in_download_archive(self, info_dict):
1344 fn = self.params.get('download_archive')
1348 vid_id = self._make_archive_id(info_dict)
1350 return False # Incomplete video information
1353 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1354 for line in archive_file:
1355 if line.strip() == vid_id:
1357 except IOError as ioe:
1358 if ioe.errno != errno.ENOENT:
1362 def record_download_archive(self, info_dict):
1363 fn = self.params.get('download_archive')
1366 vid_id = self._make_archive_id(info_dict)
1368 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1369 archive_file.write(vid_id + '\n')
1372 def format_resolution(format, default='unknown'):
1373 if format.get('vcodec') == 'none':
1375 if format.get('resolution') is not None:
1376 return format['resolution']
1377 if format.get('height') is not None:
1378 if format.get('width') is not None:
1379 res = '%sx%s' % (format['width'], format['height'])
1381 res = '%sp' % format['height']
1382 elif format.get('width') is not None:
1383 res = '?x%d' % format['width']
1388 def _format_note(self, fdict):
1390 if fdict.get('ext') in ['f4f', 'f4m']:
1391 res += '(unsupported) '
1392 if fdict.get('format_note') is not None:
1393 res += fdict['format_note'] + ' '
1394 if fdict.get('tbr') is not None:
1395 res += '%4dk ' % fdict['tbr']
1396 if fdict.get('container') is not None:
1399 res += '%s container' % fdict['container']
1400 if (fdict.get('vcodec') is not None and
1401 fdict.get('vcodec') != 'none'):
1404 res += fdict['vcodec']
1405 if fdict.get('vbr') is not None:
1407 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1409 if fdict.get('vbr') is not None:
1410 res += '%4dk' % fdict['vbr']
1411 if fdict.get('fps') is not None:
1412 res += ', %sfps' % fdict['fps']
1413 if fdict.get('acodec') is not None:
1416 if fdict['acodec'] == 'none':
1419 res += '%-5s' % fdict['acodec']
1420 elif fdict.get('abr') is not None:
1424 if fdict.get('abr') is not None:
1425 res += '@%3dk' % fdict['abr']
1426 if fdict.get('asr') is not None:
1427 res += ' (%5dHz)' % fdict['asr']
1428 if fdict.get('filesize') is not None:
1431 res += format_bytes(fdict['filesize'])
1432 elif fdict.get('filesize_approx') is not None:
1435 res += '~' + format_bytes(fdict['filesize_approx'])
1438 def list_formats(self, info_dict):
1439 def line(format, idlen=20):
1440 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1441 format['format_id'],
1443 self.format_resolution(format),
1444 self._format_note(format),
1447 formats = info_dict.get('formats', [info_dict])
1448 idlen = max(len('format code'),
1449 max(len(f['format_id']) for f in formats))
1451 line(f, idlen) for f in formats
1452 if f.get('preference') is None or f['preference'] >= -1000]
1453 if len(formats) > 1:
1454 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1455 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1457 header_line = line({
1458 'format_id': 'format code', 'ext': 'extension',
1459 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1460 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1461 (info_dict['id'], header_line, '\n'.join(formats_s)))
1463 def urlopen(self, req):
1464 """ Start an HTTP download """
1466 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1467 # always respected by websites, some tend to give out URLs with non percent-encoded
1468 # non-ASCII characters (see telemb.py, ard.py [#3412])
1469 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1470 # To work around aforementioned issue we will replace request's original URL with
1471 # percent-encoded one
1472 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1473 url = req if req_is_string else req.get_full_url()
1474 url_escaped = escape_url(url)
1476 # Substitute URL if any change after escaping
1477 if url != url_escaped:
1481 req = compat_urllib_request.Request(
1482 url_escaped, data=req.data, headers=req.headers,
1483 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1485 return self._opener.open(req, timeout=self._socket_timeout)
1487 def print_debug_header(self):
1488 if not self.params.get('verbose'):
1491 if type('') is not compat_str:
1492 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1493 self.report_warning(
1494 'Your Python is broken! Update to a newer and supported version')
1496 stdout_encoding = getattr(
1497 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1499 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1500 locale.getpreferredencoding(),
1501 sys.getfilesystemencoding(),
1503 self.get_encoding()))
1504 write_string(encoding_str, encoding=None)
1506 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1508 sp = subprocess.Popen(
1509 ['git', 'rev-parse', '--short', 'HEAD'],
1510 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1511 cwd=os.path.dirname(os.path.abspath(__file__)))
1512 out, err = sp.communicate()
1513 out = out.decode().strip()
1514 if re.match('[0-9a-f]+', out):
1515 self._write_string('[debug] Git HEAD: ' + out + '\n')
1521 self._write_string('[debug] Python version %s - %s\n' % (
1522 platform.python_version(), platform_name()))
1524 exe_versions = FFmpegPostProcessor.get_versions()
1525 exe_versions['rtmpdump'] = rtmpdump_version()
1526 exe_str = ', '.join(
1528 for exe, v in sorted(exe_versions.items())
1533 self._write_string('[debug] exe versions: %s\n' % exe_str)
1536 for handler in self._opener.handlers:
1537 if hasattr(handler, 'proxies'):
1538 proxy_map.update(handler.proxies)
1539 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1541 if self.params.get('call_home', False):
1542 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1543 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1544 latest_version = self.urlopen(
1545 'https://yt-dl.org/latest/version').read().decode('utf-8')
1546 if version_tuple(latest_version) > version_tuple(__version__):
1547 self.report_warning(
1548 'You are using an outdated version (newest version: %s)! '
1549 'See https://yt-dl.org/update if you need help updating.' %
1552 def _setup_opener(self):
1553 timeout_val = self.params.get('socket_timeout')
1554 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1556 opts_cookiefile = self.params.get('cookiefile')
1557 opts_proxy = self.params.get('proxy')
1559 if opts_cookiefile is None:
1560 self.cookiejar = compat_cookiejar.CookieJar()
1562 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1564 if os.access(opts_cookiefile, os.R_OK):
1565 self.cookiejar.load()
1567 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1569 if opts_proxy is not None:
1570 if opts_proxy == '':
1573 proxies = {'http': opts_proxy, 'https': opts_proxy}
1575 proxies = compat_urllib_request.getproxies()
1576 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1577 if 'http' in proxies and 'https' not in proxies:
1578 proxies['https'] = proxies['http']
1579 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1581 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1582 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1583 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1584 opener = compat_urllib_request.build_opener(
1585 https_handler, proxy_handler, cookie_processor, ydlh)
1586 # Delete the default user-agent header, which would otherwise apply in
1587 # cases where our custom HTTP handler doesn't come into play
1588 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1589 opener.addheaders = []
1590 self._opener = opener
1592 def encode(self, s):
1593 if isinstance(s, bytes):
1594 return s # Already encoded
1597 return s.encode(self.get_encoding())
1598 except UnicodeEncodeError as err:
1599 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1602 def get_encoding(self):
1603 encoding = self.params.get('encoding')
1604 if encoding is None:
1605 encoding = preferredencoding()