2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
61 UnavailableVideoError,
71 from .cache import Cache
72 from .extractor import get_info_extractor, gen_extractors
73 from .downloader import get_suitable_downloader
74 from .downloader.rtmp import rtmpdump_version
75 from .postprocessor import (
77 FFmpegFixupStretchedPP,
82 from .version import __version__
85 class YoutubeDL(object):
88 YoutubeDL objects are the ones responsible of downloading the
89 actual video file and writing it to disk if the user has requested
90 it, among some other tasks. In most cases there should be one per
91 program. As, given a video URL, the downloader doesn't know how to
92 extract all the needed information, task that InfoExtractors do, it
93 has to pass the URL to one of them.
95 For this, YoutubeDL objects have a method that allows
96 InfoExtractors to be registered in a given order. When it is passed
97 a URL, the YoutubeDL object handles it to the first InfoExtractor it
98 finds that reports being able to handle it. The InfoExtractor extracts
99 all the information about the video or videos the URL refers to, and
100 YoutubeDL process the extracted information, possibly using a File
101 Downloader to download the video.
103 YoutubeDL objects accept a lot of parameters. In order not to saturate
104 the object constructor with arguments, it receives a dictionary of
105 options instead. These options are available through the params
106 attribute for the InfoExtractors to use. The YoutubeDL also
107 registers itself as the downloader in charge for the InfoExtractors
108 that are added to it, so this is a "mutual registration".
112 username: Username for authentication purposes.
113 password: Password for authentication purposes.
114 videopassword: Password for acces a video.
115 usenetrc: Use netrc for authentication instead.
116 verbose: Print additional info to stdout.
117 quiet: Do not print messages to stdout.
118 no_warnings: Do not print out anything for warnings.
119 forceurl: Force printing final URL.
120 forcetitle: Force printing title.
121 forceid: Force printing ID.
122 forcethumbnail: Force printing thumbnail URL.
123 forcedescription: Force printing description.
124 forcefilename: Force printing final filename.
125 forceduration: Force printing duration.
126 forcejson: Force printing info_dict as JSON.
127 dump_single_json: Force printing the info_dict of the whole playlist
128 (or video) as a single JSON line.
129 simulate: Do not download the video files.
130 format: Video format code. See options.py for more information.
131 format_limit: Highest quality format to try.
132 outtmpl: Template for output names.
133 restrictfilenames: Do not allow "&" and spaces in file names
134 ignoreerrors: Do not stop on download errors.
135 nooverwrites: Prevent overwriting files.
136 playliststart: Playlist item to start at.
137 playlistend: Playlist item to end at.
138 playlistreverse: Download playlist items in reverse order.
139 matchtitle: Download only matching titles.
140 rejecttitle: Reject downloads for matching titles.
141 logger: Log messages to a logging.Logger instance.
142 logtostderr: Log messages to stderr instead of stdout.
143 writedescription: Write the video description to a .description file
144 writeinfojson: Write the video description to a .info.json file
145 writeannotations: Write the video annotations to a .annotations.xml file
146 writethumbnail: Write the thumbnail image to a file
147 writesubtitles: Write the video subtitles to a file
148 writeautomaticsub: Write the automatic subtitles to a file
149 allsubtitles: Downloads all the subtitles of the video
150 (requires writesubtitles or writeautomaticsub)
151 listsubtitles: Lists all available subtitles for the video
152 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
153 subtitleslangs: List of languages of the subtitles to download
154 keepvideo: Keep the video file after post-processing
155 daterange: A DateRange object, download only if the upload_date is in the range.
156 skip_download: Skip the actual download of the video file
157 cachedir: Location of the cache files in the filesystem.
158 False to disable filesystem cache.
159 noplaylist: Download single video instead of a playlist if in doubt.
160 age_limit: An integer representing the user's age in years.
161 Unsuitable videos for the given age are skipped.
162 min_views: An integer representing the minimum view count the video
163 must have in order to not be skipped.
164 Videos without view count information are always
165 downloaded. None for no limit.
166 max_views: An integer representing the maximum view count.
167 Videos that are more popular than that are not
169 Videos without view count information are always
170 downloaded. None for no limit.
171 download_archive: File name of a file where all downloads are recorded.
172 Videos already present in the file are not downloaded
174 cookiefile: File name where cookies should be read from and dumped to.
175 nocheckcertificate:Do not verify SSL certificates
176 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
177 At the moment, this is only supported by YouTube.
178 proxy: URL of the proxy server to use
179 socket_timeout: Time to wait for unresponsive hosts, in seconds
180 bidi_workaround: Work around buggy terminals without bidirectional text
181 support, using fridibi
182 debug_printtraffic:Print out sent and received HTTP traffic
183 include_ads: Download ads as well
184 default_search: Prepend this string if an input url is not valid.
185 'auto' for elaborate guessing
186 encoding: Use this encoding instead of the system-specified.
187 extract_flat: Do not resolve URLs, return the immediate result.
188 Pass in 'in_playlist' to only show this behavior for
190 postprocessors: A list of dictionaries, each with an entry
191 * key: The name of the postprocessor. See
192 youtube_dl/postprocessor/__init__.py for a list.
193 as well as any further keyword arguments for the
195 progress_hooks: A list of functions that get called on download
196 progress, with a dictionary with the entries
197 * filename: The final filename
198 * status: One of "downloading" and "finished"
200 The dict may also have some of the following entries:
202 * downloaded_bytes: Bytes on disk
203 * total_bytes: Size of the whole file, None if unknown
204 * tmpfilename: The filename we're currently writing to
205 * eta: The estimated time in seconds, None if unknown
206 * speed: The download speed in bytes/second, None if
209 Progress hooks are guaranteed to be called at least once
210 (with status "finished") if the download is successful.
211 merge_output_format: Extension to use when merging formats.
212 fixup: Automatically correct known faults of the file.
214 - "never": do nothing
215 - "warn": only emit a warning
216 - "detect_or_warn": check whether we can do anything
217 about it, warn otherwise (default)
218 source_address: (Experimental) Client-side IP address to bind to.
219 call_home: Boolean, true iff we are allowed to contact the
220 youtube-dl servers for debugging.
221 sleep_interval: Number of seconds to sleep before each download.
222 external_downloader: Executable of the external downloader to call.
225 The following parameters are not used by YoutubeDL itself, they are used by
227 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
228 noresizebuffer, retries, continuedl, noprogress, consoletitle
230 The following options are used by the post processors:
231 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
232 otherwise prefer avconv.
233 exec_cmd: Arbitrary command to run after downloading
239 _download_retcode = None
240 _num_downloads = None
243 def __init__(self, params=None, auto_init=True):
244 """Create a FileDownloader object with the given options."""
248 self._ies_instances = {}
250 self._progress_hooks = []
251 self._download_retcode = 0
252 self._num_downloads = 0
253 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
254 self._err_file = sys.stderr
256 self.cache = Cache(self)
258 if params.get('bidi_workaround', False):
261 master, slave = pty.openpty()
262 width = get_term_width()
266 width_args = ['-w', str(width)]
268 stdin=subprocess.PIPE,
270 stderr=self._err_file)
272 self._output_process = subprocess.Popen(
273 ['bidiv'] + width_args, **sp_kwargs
276 self._output_process = subprocess.Popen(
277 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
278 self._output_channel = os.fdopen(master, 'rb')
279 except OSError as ose:
281 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
285 if (sys.version_info >= (3,) and sys.platform != 'win32' and
286 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
287 and not params.get('restrictfilenames', False)):
288 # On Python 3, the Unicode filesystem API will throw errors (#1474)
290 'Assuming --restrict-filenames since file system encoding '
291 'cannot encode all characters. '
292 'Set the LC_ALL environment variable to fix this.')
293 self.params['restrictfilenames'] = True
295 if '%(stitle)s' in self.params.get('outtmpl', ''):
296 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
301 self.print_debug_header()
302 self.add_default_info_extractors()
304 for pp_def_raw in self.params.get('postprocessors', []):
305 pp_class = get_postprocessor(pp_def_raw['key'])
306 pp_def = dict(pp_def_raw)
308 pp = pp_class(self, **compat_kwargs(pp_def))
309 self.add_post_processor(pp)
311 for ph in self.params.get('progress_hooks', []):
312 self.add_progress_hook(ph)
314 def warn_if_short_id(self, argv):
315 # short YouTube ID starting with dash?
317 i for i, a in enumerate(argv)
318 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
322 [a for i, a in enumerate(argv) if i not in idxs] +
323 ['--'] + [argv[i] for i in idxs]
326 'Long argument string detected. '
327 'Use -- to separate parameters and URLs, like this:\n%s\n' %
328 args_to_str(correct_argv))
330 def add_info_extractor(self, ie):
331 """Add an InfoExtractor object to the end of the list."""
333 self._ies_instances[ie.ie_key()] = ie
334 ie.set_downloader(self)
336 def get_info_extractor(self, ie_key):
338 Get an instance of an IE with name ie_key, it will try to get one from
339 the _ies list, if there's no instance it will create a new one and add
340 it to the extractor list.
342 ie = self._ies_instances.get(ie_key)
344 ie = get_info_extractor(ie_key)()
345 self.add_info_extractor(ie)
348 def add_default_info_extractors(self):
350 Add the InfoExtractors returned by gen_extractors to the end of the list
352 for ie in gen_extractors():
353 self.add_info_extractor(ie)
355 def add_post_processor(self, pp):
356 """Add a PostProcessor object to the end of the chain."""
358 pp.set_downloader(self)
360 def add_progress_hook(self, ph):
361 """Add the progress hook (currently only for the file downloader)"""
362 self._progress_hooks.append(ph)
364 def _bidi_workaround(self, message):
365 if not hasattr(self, '_output_channel'):
368 assert hasattr(self, '_output_process')
369 assert isinstance(message, compat_str)
370 line_count = message.count('\n') + 1
371 self._output_process.stdin.write((message + '\n').encode('utf-8'))
372 self._output_process.stdin.flush()
373 res = ''.join(self._output_channel.readline().decode('utf-8')
374 for _ in range(line_count))
375 return res[:-len('\n')]
377 def to_screen(self, message, skip_eol=False):
378 """Print message to stdout if not in quiet mode."""
379 return self.to_stdout(message, skip_eol, check_quiet=True)
381 def _write_string(self, s, out=None):
382 write_string(s, out=out, encoding=self.params.get('encoding'))
384 def to_stdout(self, message, skip_eol=False, check_quiet=False):
385 """Print message to stdout if not in quiet mode."""
386 if self.params.get('logger'):
387 self.params['logger'].debug(message)
388 elif not check_quiet or not self.params.get('quiet', False):
389 message = self._bidi_workaround(message)
390 terminator = ['\n', ''][skip_eol]
391 output = message + terminator
393 self._write_string(output, self._screen_file)
395 def to_stderr(self, message):
396 """Print message to stderr."""
397 assert isinstance(message, compat_str)
398 if self.params.get('logger'):
399 self.params['logger'].error(message)
401 message = self._bidi_workaround(message)
402 output = message + '\n'
403 self._write_string(output, self._err_file)
405 def to_console_title(self, message):
406 if not self.params.get('consoletitle', False):
408 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
409 # c_wchar_p() might not be necessary if `message` is
410 # already of type unicode()
411 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
412 elif 'TERM' in os.environ:
413 self._write_string('\033]0;%s\007' % message, self._screen_file)
415 def save_console_title(self):
416 if not self.params.get('consoletitle', False):
418 if 'TERM' in os.environ:
419 # Save the title on stack
420 self._write_string('\033[22;0t', self._screen_file)
422 def restore_console_title(self):
423 if not self.params.get('consoletitle', False):
425 if 'TERM' in os.environ:
426 # Restore the title from stack
427 self._write_string('\033[23;0t', self._screen_file)
430 self.save_console_title()
433 def __exit__(self, *args):
434 self.restore_console_title()
436 if self.params.get('cookiefile') is not None:
437 self.cookiejar.save()
439 def trouble(self, message=None, tb=None):
440 """Determine action to take when a download problem appears.
442 Depending on if the downloader has been configured to ignore
443 download errors or not, this method may throw an exception or
444 not when errors are found, after printing the message.
446 tb, if given, is additional traceback information.
448 if message is not None:
449 self.to_stderr(message)
450 if self.params.get('verbose'):
452 if sys.exc_info()[0]: # if .trouble has been called from an except block
454 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
455 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
456 tb += compat_str(traceback.format_exc())
458 tb_data = traceback.format_list(traceback.extract_stack())
459 tb = ''.join(tb_data)
461 if not self.params.get('ignoreerrors', False):
462 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
463 exc_info = sys.exc_info()[1].exc_info
465 exc_info = sys.exc_info()
466 raise DownloadError(message, exc_info)
467 self._download_retcode = 1
469 def report_warning(self, message):
471 Print the message to stderr, it will be prefixed with 'WARNING:'
472 If stderr is a tty file the 'WARNING:' will be colored
474 if self.params.get('logger') is not None:
475 self.params['logger'].warning(message)
477 if self.params.get('no_warnings'):
479 if self._err_file.isatty() and os.name != 'nt':
480 _msg_header = '\033[0;33mWARNING:\033[0m'
482 _msg_header = 'WARNING:'
483 warning_message = '%s %s' % (_msg_header, message)
484 self.to_stderr(warning_message)
486 def report_error(self, message, tb=None):
488 Do the same as trouble, but prefixes the message with 'ERROR:', colored
489 in red if stderr is a tty file.
491 if self._err_file.isatty() and os.name != 'nt':
492 _msg_header = '\033[0;31mERROR:\033[0m'
494 _msg_header = 'ERROR:'
495 error_message = '%s %s' % (_msg_header, message)
496 self.trouble(error_message, tb)
498 def report_file_already_downloaded(self, file_name):
499 """Report file has already been fully downloaded."""
501 self.to_screen('[download] %s has already been downloaded' % file_name)
502 except UnicodeEncodeError:
503 self.to_screen('[download] The file has already been downloaded')
505 def prepare_filename(self, info_dict):
506 """Generate the output filename."""
508 template_dict = dict(info_dict)
510 template_dict['epoch'] = int(time.time())
511 autonumber_size = self.params.get('autonumber_size')
512 if autonumber_size is None:
514 autonumber_templ = '%0' + str(autonumber_size) + 'd'
515 template_dict['autonumber'] = autonumber_templ % self._num_downloads
516 if template_dict.get('playlist_index') is not None:
517 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
518 if template_dict.get('resolution') is None:
519 if template_dict.get('width') and template_dict.get('height'):
520 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
521 elif template_dict.get('height'):
522 template_dict['resolution'] = '%sp' % template_dict['height']
523 elif template_dict.get('width'):
524 template_dict['resolution'] = '?x%d' % template_dict['width']
526 sanitize = lambda k, v: sanitize_filename(
528 restricted=self.params.get('restrictfilenames'),
530 template_dict = dict((k, sanitize(k, v))
531 for k, v in template_dict.items()
533 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
535 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
536 tmpl = compat_expanduser(outtmpl)
537 filename = tmpl % template_dict
539 except ValueError as err:
540 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
543 def _match_entry(self, info_dict):
544 """ Returns None iff the file should be downloaded """
546 video_title = info_dict.get('title', info_dict.get('id', 'video'))
547 if 'title' in info_dict:
548 # This can happen when we're just evaluating the playlist
549 title = info_dict['title']
550 matchtitle = self.params.get('matchtitle', False)
552 if not re.search(matchtitle, title, re.IGNORECASE):
553 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
554 rejecttitle = self.params.get('rejecttitle', False)
556 if re.search(rejecttitle, title, re.IGNORECASE):
557 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
558 date = info_dict.get('upload_date', None)
560 dateRange = self.params.get('daterange', DateRange())
561 if date not in dateRange:
562 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
563 view_count = info_dict.get('view_count', None)
564 if view_count is not None:
565 min_views = self.params.get('min_views')
566 if min_views is not None and view_count < min_views:
567 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
568 max_views = self.params.get('max_views')
569 if max_views is not None and view_count > max_views:
570 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
571 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
572 return 'Skipping "%s" because it is age restricted' % title
573 if self.in_download_archive(info_dict):
574 return '%s has already been recorded in archive' % video_title
578 def add_extra_info(info_dict, extra_info):
579 '''Set the keys from extra_info in info dict if they are missing'''
580 for key, value in extra_info.items():
581 info_dict.setdefault(key, value)
583 def extract_info(self, url, download=True, ie_key=None, extra_info={},
586 Returns a list with a dictionary for each video we find.
587 If 'download', also downloads the videos.
588 extra_info is a dict containing the extra values to add to each result
592 ies = [self.get_info_extractor(ie_key)]
597 if not ie.suitable(url):
601 self.report_warning('The program functionality for this site has been marked as broken, '
602 'and will probably not work.')
605 ie_result = ie.extract(url)
606 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
608 if isinstance(ie_result, list):
609 # Backwards compatibility: old IE result format
611 '_type': 'compat_list',
612 'entries': ie_result,
614 self.add_default_extra_info(ie_result, ie, url)
616 return self.process_ie_result(ie_result, download, extra_info)
619 except ExtractorError as de: # An error we somewhat expected
620 self.report_error(compat_str(de), de.format_traceback())
622 except MaxDownloadsReached:
624 except Exception as e:
625 if self.params.get('ignoreerrors', False):
626 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
631 self.report_error('no suitable InfoExtractor for URL %s' % url)
633 def add_default_extra_info(self, ie_result, ie, url):
634 self.add_extra_info(ie_result, {
635 'extractor': ie.IE_NAME,
637 'webpage_url_basename': url_basename(url),
638 'extractor_key': ie.ie_key(),
641 def process_ie_result(self, ie_result, download=True, extra_info={}):
643 Take the result of the ie(may be modified) and resolve all unresolved
644 references (URLs, playlist items).
646 It will also download the videos if 'download'.
647 Returns the resolved ie_result.
650 result_type = ie_result.get('_type', 'video')
652 if result_type in ('url', 'url_transparent'):
653 extract_flat = self.params.get('extract_flat', False)
654 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
655 extract_flat is True):
656 if self.params.get('forcejson', False):
657 self.to_stdout(json.dumps(ie_result))
660 if result_type == 'video':
661 self.add_extra_info(ie_result, extra_info)
662 return self.process_video_result(ie_result, download=download)
663 elif result_type == 'url':
664 # We have to add extra_info to the results because it may be
665 # contained in a playlist
666 return self.extract_info(ie_result['url'],
668 ie_key=ie_result.get('ie_key'),
669 extra_info=extra_info)
670 elif result_type == 'url_transparent':
671 # Use the information from the embedding page
672 info = self.extract_info(
673 ie_result['url'], ie_key=ie_result.get('ie_key'),
674 extra_info=extra_info, download=False, process=False)
676 force_properties = dict(
677 (k, v) for k, v in ie_result.items() if v is not None)
678 for f in ('_type', 'url'):
679 if f in force_properties:
680 del force_properties[f]
681 new_result = info.copy()
682 new_result.update(force_properties)
684 assert new_result.get('_type') != 'url_transparent'
686 return self.process_ie_result(
687 new_result, download=download, extra_info=extra_info)
688 elif result_type == 'playlist' or result_type == 'multi_video':
689 # We process each entry in the playlist
690 playlist = ie_result.get('title', None) or ie_result.get('id', None)
691 self.to_screen('[download] Downloading playlist: %s' % playlist)
693 playlist_results = []
695 playliststart = self.params.get('playliststart', 1) - 1
696 playlistend = self.params.get('playlistend', None)
697 # For backwards compatibility, interpret -1 as whole list
698 if playlistend == -1:
701 ie_entries = ie_result['entries']
702 if isinstance(ie_entries, list):
703 n_all_entries = len(ie_entries)
704 entries = ie_entries[playliststart:playlistend]
705 n_entries = len(entries)
707 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
708 (ie_result['extractor'], playlist, n_all_entries, n_entries))
709 elif isinstance(ie_entries, PagedList):
710 entries = ie_entries.getslice(
711 playliststart, playlistend)
712 n_entries = len(entries)
714 "[%s] playlist %s: Downloading %d videos" %
715 (ie_result['extractor'], playlist, n_entries))
717 entries = list(itertools.islice(
718 ie_entries, playliststart, playlistend))
719 n_entries = len(entries)
721 "[%s] playlist %s: Downloading %d videos" %
722 (ie_result['extractor'], playlist, n_entries))
724 if self.params.get('playlistreverse', False):
725 entries = entries[::-1]
727 for i, entry in enumerate(entries, 1):
728 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
730 'n_entries': n_entries,
731 'playlist': playlist,
732 'playlist_id': ie_result.get('id'),
733 'playlist_title': ie_result.get('title'),
734 'playlist_index': i + playliststart,
735 'extractor': ie_result['extractor'],
736 'webpage_url': ie_result['webpage_url'],
737 'webpage_url_basename': url_basename(ie_result['webpage_url']),
738 'extractor_key': ie_result['extractor_key'],
741 reason = self._match_entry(entry)
742 if reason is not None:
743 self.to_screen('[download] ' + reason)
746 entry_result = self.process_ie_result(entry,
749 playlist_results.append(entry_result)
750 ie_result['entries'] = playlist_results
752 elif result_type == 'compat_list':
754 'Extractor %s returned a compat_list result. '
755 'It needs to be updated.' % ie_result.get('extractor'))
761 'extractor': ie_result['extractor'],
762 'webpage_url': ie_result['webpage_url'],
763 'webpage_url_basename': url_basename(ie_result['webpage_url']),
764 'extractor_key': ie_result['extractor_key'],
768 ie_result['entries'] = [
769 self.process_ie_result(_fixup(r), download, extra_info)
770 for r in ie_result['entries']
774 raise Exception('Invalid result type: %s' % result_type)
776 def _apply_format_filter(self, format_spec, available_formats):
777 " Returns a tuple of the remaining format_spec and filtered formats "
787 operator_rex = re.compile(r'''(?x)\s*\[
788 (?P<key>width|height|tbr|abr|vbr|filesize)
789 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
790 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
792 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
793 m = operator_rex.search(format_spec)
795 raise ValueError('Invalid format specification %r' % format_spec)
798 comparison_value = int(m.group('value'))
800 comparison_value = parse_filesize(m.group('value'))
801 if comparison_value is None:
802 comparison_value = parse_filesize(m.group('value') + 'B')
803 if comparison_value is None:
805 'Invalid value %r in format specification %r' % (
806 m.group('value'), format_spec))
807 op = OPERATORS[m.group('op')]
810 actual_value = f.get(m.group('key'))
811 if actual_value is None:
812 return m.group('none_inclusive')
813 return op(actual_value, comparison_value)
814 new_formats = [f for f in available_formats if _filter(f)]
816 new_format_spec = format_spec[:-len(m.group(0))]
817 if not new_format_spec:
818 new_format_spec = 'best'
820 return (new_format_spec, new_formats)
822 def select_format(self, format_spec, available_formats):
823 while format_spec.endswith(']'):
824 format_spec, available_formats = self._apply_format_filter(
825 format_spec, available_formats)
826 if not available_formats:
829 if format_spec == 'best' or format_spec is None:
830 return available_formats[-1]
831 elif format_spec == 'worst':
832 return available_formats[0]
833 elif format_spec == 'bestaudio':
835 f for f in available_formats
836 if f.get('vcodec') == 'none']
838 return audio_formats[-1]
839 elif format_spec == 'worstaudio':
841 f for f in available_formats
842 if f.get('vcodec') == 'none']
844 return audio_formats[0]
845 elif format_spec == 'bestvideo':
847 f for f in available_formats
848 if f.get('acodec') == 'none']
850 return video_formats[-1]
851 elif format_spec == 'worstvideo':
853 f for f in available_formats
854 if f.get('acodec') == 'none']
856 return video_formats[0]
858 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
859 if format_spec in extensions:
860 filter_f = lambda f: f['ext'] == format_spec
862 filter_f = lambda f: f['format_id'] == format_spec
863 matches = list(filter(filter_f, available_formats))
868 def process_video_result(self, info_dict, download=True):
869 assert info_dict.get('_type', 'video') == 'video'
871 if 'id' not in info_dict:
872 raise ExtractorError('Missing "id" field in extractor result')
873 if 'title' not in info_dict:
874 raise ExtractorError('Missing "title" field in extractor result')
876 if 'playlist' not in info_dict:
877 # It isn't part of a playlist
878 info_dict['playlist'] = None
879 info_dict['playlist_index'] = None
881 thumbnails = info_dict.get('thumbnails')
883 thumbnails.sort(key=lambda t: (
884 t.get('width'), t.get('height'), t.get('url')))
886 if 'width' in t and 'height' in t:
887 t['resolution'] = '%dx%d' % (t['width'], t['height'])
889 if thumbnails and 'thumbnail' not in info_dict:
890 info_dict['thumbnail'] = thumbnails[-1]['url']
892 if 'display_id' not in info_dict and 'id' in info_dict:
893 info_dict['display_id'] = info_dict['id']
895 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
896 # Working around negative timestamps in Windows
897 # (see http://bugs.python.org/issue1646728)
898 if info_dict['timestamp'] < 0 and os.name == 'nt':
899 info_dict['timestamp'] = 0
900 upload_date = datetime.datetime.utcfromtimestamp(
901 info_dict['timestamp'])
902 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
904 # This extractors handle format selection themselves
905 if info_dict['extractor'] in ['Youku']:
907 self.process_info(info_dict)
910 # We now pick which formats have to be downloaded
911 if info_dict.get('formats') is None:
912 # There's only one format available
913 formats = [info_dict]
915 formats = info_dict['formats']
918 raise ExtractorError('No video formats found!')
920 # We check that all the formats have the format and format_id fields
921 for i, format in enumerate(formats):
922 if 'url' not in format:
923 raise ExtractorError('Missing "url" key in result (index %d)' % i)
925 if format.get('format_id') is None:
926 format['format_id'] = compat_str(i)
927 if format.get('format') is None:
928 format['format'] = '{id} - {res}{note}'.format(
929 id=format['format_id'],
930 res=self.format_resolution(format),
931 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
933 # Automatically determine file extension if missing
934 if 'ext' not in format:
935 format['ext'] = determine_ext(format['url']).lower()
937 format_limit = self.params.get('format_limit', None)
939 formats = list(takewhile_inclusive(
940 lambda f: f['format_id'] != format_limit, formats
943 # TODO Central sorting goes here
945 if formats[0] is not info_dict:
946 # only set the 'formats' fields if the original info_dict list them
947 # otherwise we end up with a circular reference, the first (and unique)
948 # element in the 'formats' field in info_dict is info_dict itself,
949 # wich can't be exported to json
950 info_dict['formats'] = formats
951 if self.params.get('listformats', None):
952 self.list_formats(info_dict)
955 req_format = self.params.get('format')
956 if req_format is None:
958 formats_to_download = []
959 # The -1 is for supporting YoutubeIE
960 if req_format in ('-1', 'all'):
961 formats_to_download = formats
963 for rfstr in req_format.split(','):
964 # We can accept formats requested in the format: 34/5/best, we pick
965 # the first that is available, starting from left
966 req_formats = rfstr.split('/')
967 for rf in req_formats:
968 if re.match(r'.+?\+.+?', rf) is not None:
969 # Two formats have been requested like '137+139'
970 format_1, format_2 = rf.split('+')
971 formats_info = (self.select_format(format_1, formats),
972 self.select_format(format_2, formats))
973 if all(formats_info):
974 # The first format must contain the video and the
976 if formats_info[0].get('vcodec') == 'none':
977 self.report_error('The first format must '
978 'contain the video, try using '
979 '"-f %s+%s"' % (format_2, format_1))
982 formats_info[0]['ext']
983 if self.params.get('merge_output_format') is None
984 else self.params['merge_output_format'])
986 'requested_formats': formats_info,
988 'ext': formats_info[0]['ext'],
989 'width': formats_info[0].get('width'),
990 'height': formats_info[0].get('height'),
991 'resolution': formats_info[0].get('resolution'),
992 'fps': formats_info[0].get('fps'),
993 'vcodec': formats_info[0].get('vcodec'),
994 'vbr': formats_info[0].get('vbr'),
995 'stretched_ratio': formats_info[0].get('stretched_ratio'),
996 'acodec': formats_info[1].get('acodec'),
997 'abr': formats_info[1].get('abr'),
1001 selected_format = None
1003 selected_format = self.select_format(rf, formats)
1004 if selected_format is not None:
1005 formats_to_download.append(selected_format)
1007 if not formats_to_download:
1008 raise ExtractorError('requested format not available',
1012 if len(formats_to_download) > 1:
1013 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1014 for format in formats_to_download:
1015 new_info = dict(info_dict)
1016 new_info.update(format)
1017 self.process_info(new_info)
1018 # We update the info dict with the best quality format (backwards compatibility)
1019 info_dict.update(formats_to_download[-1])
1022 def process_info(self, info_dict):
1023 """Process a single resolved IE result."""
1025 assert info_dict.get('_type', 'video') == 'video'
1027 max_downloads = self.params.get('max_downloads')
1028 if max_downloads is not None:
1029 if self._num_downloads >= int(max_downloads):
1030 raise MaxDownloadsReached()
1032 info_dict['fulltitle'] = info_dict['title']
1033 if len(info_dict['title']) > 200:
1034 info_dict['title'] = info_dict['title'][:197] + '...'
1036 # Keep for backwards compatibility
1037 info_dict['stitle'] = info_dict['title']
1039 if 'format' not in info_dict:
1040 info_dict['format'] = info_dict['ext']
1042 reason = self._match_entry(info_dict)
1043 if reason is not None:
1044 self.to_screen('[download] ' + reason)
1047 self._num_downloads += 1
1049 filename = self.prepare_filename(info_dict)
1052 if self.params.get('forcetitle', False):
1053 self.to_stdout(info_dict['fulltitle'])
1054 if self.params.get('forceid', False):
1055 self.to_stdout(info_dict['id'])
1056 if self.params.get('forceurl', False):
1057 if info_dict.get('requested_formats') is not None:
1058 for f in info_dict['requested_formats']:
1059 self.to_stdout(f['url'] + f.get('play_path', ''))
1061 # For RTMP URLs, also include the playpath
1062 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1063 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1064 self.to_stdout(info_dict['thumbnail'])
1065 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1066 self.to_stdout(info_dict['description'])
1067 if self.params.get('forcefilename', False) and filename is not None:
1068 self.to_stdout(filename)
1069 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1070 self.to_stdout(formatSeconds(info_dict['duration']))
1071 if self.params.get('forceformat', False):
1072 self.to_stdout(info_dict['format'])
1073 if self.params.get('forcejson', False):
1074 info_dict['_filename'] = filename
1075 self.to_stdout(json.dumps(info_dict))
1076 if self.params.get('dump_single_json', False):
1077 info_dict['_filename'] = filename
1079 # Do nothing else if in simulate mode
1080 if self.params.get('simulate', False):
1083 if filename is None:
1087 dn = os.path.dirname(encodeFilename(filename))
1088 if dn and not os.path.exists(dn):
1090 except (OSError, IOError) as err:
1091 self.report_error('unable to create directory ' + compat_str(err))
1094 if self.params.get('writedescription', False):
1095 descfn = filename + '.description'
1096 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1097 self.to_screen('[info] Video description is already present')
1098 elif info_dict.get('description') is None:
1099 self.report_warning('There\'s no description to write.')
1102 self.to_screen('[info] Writing video description to: ' + descfn)
1103 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1104 descfile.write(info_dict['description'])
1105 except (OSError, IOError):
1106 self.report_error('Cannot write description file ' + descfn)
1109 if self.params.get('writeannotations', False):
1110 annofn = filename + '.annotations.xml'
1111 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1112 self.to_screen('[info] Video annotations are already present')
1115 self.to_screen('[info] Writing video annotations to: ' + annofn)
1116 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1117 annofile.write(info_dict['annotations'])
1118 except (KeyError, TypeError):
1119 self.report_warning('There are no annotations to write.')
1120 except (OSError, IOError):
1121 self.report_error('Cannot write annotations file: ' + annofn)
1124 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1125 self.params.get('writeautomaticsub')])
1127 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1128 # subtitles download errors are already managed as troubles in relevant IE
1129 # that way it will silently go on when used with unsupporting IE
1130 subtitles = info_dict['subtitles']
1131 sub_format = self.params.get('subtitlesformat', 'srt')
1132 for sub_lang in subtitles.keys():
1133 sub = subtitles[sub_lang]
1137 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1138 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1139 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1141 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1142 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1144 except (OSError, IOError):
1145 self.report_error('Cannot write subtitles file ' + sub_filename)
1148 if self.params.get('writeinfojson', False):
1149 infofn = os.path.splitext(filename)[0] + '.info.json'
1150 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1151 self.to_screen('[info] Video description metadata is already present')
1153 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1155 write_json_file(info_dict, infofn)
1156 except (OSError, IOError):
1157 self.report_error('Cannot write metadata to JSON file ' + infofn)
1160 if self.params.get('writethumbnail', False):
1161 if info_dict.get('thumbnail') is not None:
1162 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1163 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1164 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1165 self.to_screen('[%s] %s: Thumbnail is already present' %
1166 (info_dict['extractor'], info_dict['id']))
1168 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1169 (info_dict['extractor'], info_dict['id']))
1171 uf = self.urlopen(info_dict['thumbnail'])
1172 with open(thumb_filename, 'wb') as thumbf:
1173 shutil.copyfileobj(uf, thumbf)
1174 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1175 (info_dict['extractor'], info_dict['id'], thumb_filename))
1176 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1177 self.report_warning('Unable to download thumbnail "%s": %s' %
1178 (info_dict['thumbnail'], compat_str(err)))
1180 if not self.params.get('skip_download', False):
1183 fd = get_suitable_downloader(info, self.params)(self, self.params)
1184 for ph in self._progress_hooks:
1185 fd.add_progress_hook(ph)
1186 if self.params.get('verbose'):
1187 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1188 return fd.download(name, info)
1189 if info_dict.get('requested_formats') is not None:
1192 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1193 if not merger._executable:
1195 self.report_warning('You have requested multiple '
1196 'formats but ffmpeg or avconv are not installed.'
1197 ' The formats won\'t be merged')
1199 postprocessors = [merger]
1200 for f in info_dict['requested_formats']:
1201 new_info = dict(info_dict)
1203 fname = self.prepare_filename(new_info)
1204 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1205 downloaded.append(fname)
1206 partial_success = dl(fname, new_info)
1207 success = success and partial_success
1208 info_dict['__postprocessors'] = postprocessors
1209 info_dict['__files_to_merge'] = downloaded
1211 # Just a single file
1212 success = dl(filename, info_dict)
1213 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1214 self.report_error('unable to download video data: %s' % str(err))
1216 except (OSError, IOError) as err:
1217 raise UnavailableVideoError(err)
1218 except (ContentTooShortError, ) as err:
1219 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1224 fixup_policy = self.params.get('fixup')
1225 if fixup_policy is None:
1226 fixup_policy = 'detect_or_warn'
1228 stretched_ratio = info_dict.get('stretched_ratio')
1229 if stretched_ratio is not None and stretched_ratio != 1:
1230 if fixup_policy == 'warn':
1231 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1232 info_dict['id'], stretched_ratio))
1233 elif fixup_policy == 'detect_or_warn':
1234 stretched_pp = FFmpegFixupStretchedPP(self)
1235 if stretched_pp.available:
1236 info_dict.setdefault('__postprocessors', [])
1237 info_dict['__postprocessors'].append(stretched_pp)
1239 self.report_warning(
1240 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1241 info_dict['id'], stretched_ratio))
1243 assert fixup_policy in ('ignore', 'never')
1245 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1246 if fixup_policy == 'warn':
1247 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1249 elif fixup_policy == 'detect_or_warn':
1250 fixup_pp = FFmpegFixupM4aPP(self)
1251 if fixup_pp.available:
1252 info_dict.setdefault('__postprocessors', [])
1253 info_dict['__postprocessors'].append(fixup_pp)
1255 self.report_warning(
1256 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1259 assert fixup_policy in ('ignore', 'never')
1262 self.post_process(filename, info_dict)
1263 except (PostProcessingError) as err:
1264 self.report_error('postprocessing: %s' % str(err))
1266 self.record_download_archive(info_dict)
1268 def download(self, url_list):
1269 """Download a given list of URLs."""
1270 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1271 if (len(url_list) > 1 and
1273 and self.params.get('max_downloads') != 1):
1274 raise SameFileError(outtmpl)
1276 for url in url_list:
1278 # It also downloads the videos
1279 res = self.extract_info(url)
1280 except UnavailableVideoError:
1281 self.report_error('unable to download video')
1282 except MaxDownloadsReached:
1283 self.to_screen('[info] Maximum number of downloaded files reached.')
1286 if self.params.get('dump_single_json', False):
1287 self.to_stdout(json.dumps(res))
1289 return self._download_retcode
1291 def download_with_info_file(self, info_filename):
1292 with io.open(info_filename, 'r', encoding='utf-8') as f:
1295 self.process_ie_result(info, download=True)
1296 except DownloadError:
1297 webpage_url = info.get('webpage_url')
1298 if webpage_url is not None:
1299 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1300 return self.download([webpage_url])
1303 return self._download_retcode
1305 def post_process(self, filename, ie_info):
1306 """Run all the postprocessors on the given file."""
1307 info = dict(ie_info)
1308 info['filepath'] = filename
1310 if ie_info.get('__postprocessors') is not None:
1311 pps_chain.extend(ie_info['__postprocessors'])
1312 pps_chain.extend(self._pps)
1313 for pp in pps_chain:
1315 old_filename = info['filepath']
1317 keep_video_wish, info = pp.run(info)
1318 if keep_video_wish is not None:
1320 keep_video = keep_video_wish
1321 elif keep_video is None:
1322 # No clear decision yet, let IE decide
1323 keep_video = keep_video_wish
1324 except PostProcessingError as e:
1325 self.report_error(e.msg)
1326 if keep_video is False and not self.params.get('keepvideo', False):
1328 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1329 os.remove(encodeFilename(old_filename))
1330 except (IOError, OSError):
1331 self.report_warning('Unable to remove downloaded video file')
1333 def _make_archive_id(self, info_dict):
1334 # Future-proof against any change in case
1335 # and backwards compatibility with prior versions
1336 extractor = info_dict.get('extractor_key')
1337 if extractor is None:
1338 if 'id' in info_dict:
1339 extractor = info_dict.get('ie_key') # key in a playlist
1340 if extractor is None:
1341 return None # Incomplete video information
1342 return extractor.lower() + ' ' + info_dict['id']
1344 def in_download_archive(self, info_dict):
1345 fn = self.params.get('download_archive')
1349 vid_id = self._make_archive_id(info_dict)
1351 return False # Incomplete video information
1354 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1355 for line in archive_file:
1356 if line.strip() == vid_id:
1358 except IOError as ioe:
1359 if ioe.errno != errno.ENOENT:
1363 def record_download_archive(self, info_dict):
1364 fn = self.params.get('download_archive')
1367 vid_id = self._make_archive_id(info_dict)
1369 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1370 archive_file.write(vid_id + '\n')
1373 def format_resolution(format, default='unknown'):
1374 if format.get('vcodec') == 'none':
1376 if format.get('resolution') is not None:
1377 return format['resolution']
1378 if format.get('height') is not None:
1379 if format.get('width') is not None:
1380 res = '%sx%s' % (format['width'], format['height'])
1382 res = '%sp' % format['height']
1383 elif format.get('width') is not None:
1384 res = '?x%d' % format['width']
1389 def _format_note(self, fdict):
1391 if fdict.get('ext') in ['f4f', 'f4m']:
1392 res += '(unsupported) '
1393 if fdict.get('format_note') is not None:
1394 res += fdict['format_note'] + ' '
1395 if fdict.get('tbr') is not None:
1396 res += '%4dk ' % fdict['tbr']
1397 if fdict.get('container') is not None:
1400 res += '%s container' % fdict['container']
1401 if (fdict.get('vcodec') is not None and
1402 fdict.get('vcodec') != 'none'):
1405 res += fdict['vcodec']
1406 if fdict.get('vbr') is not None:
1408 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1410 if fdict.get('vbr') is not None:
1411 res += '%4dk' % fdict['vbr']
1412 if fdict.get('fps') is not None:
1413 res += ', %sfps' % fdict['fps']
1414 if fdict.get('acodec') is not None:
1417 if fdict['acodec'] == 'none':
1420 res += '%-5s' % fdict['acodec']
1421 elif fdict.get('abr') is not None:
1425 if fdict.get('abr') is not None:
1426 res += '@%3dk' % fdict['abr']
1427 if fdict.get('asr') is not None:
1428 res += ' (%5dHz)' % fdict['asr']
1429 if fdict.get('filesize') is not None:
1432 res += format_bytes(fdict['filesize'])
1433 elif fdict.get('filesize_approx') is not None:
1436 res += '~' + format_bytes(fdict['filesize_approx'])
1439 def list_formats(self, info_dict):
1440 def line(format, idlen=20):
1441 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1442 format['format_id'],
1444 self.format_resolution(format),
1445 self._format_note(format),
1448 formats = info_dict.get('formats', [info_dict])
1449 idlen = max(len('format code'),
1450 max(len(f['format_id']) for f in formats))
1452 line(f, idlen) for f in formats
1453 if f.get('preference') is None or f['preference'] >= -1000]
1454 if len(formats) > 1:
1455 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1456 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1458 header_line = line({
1459 'format_id': 'format code', 'ext': 'extension',
1460 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1461 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1462 (info_dict['id'], header_line, '\n'.join(formats_s)))
1464 def urlopen(self, req):
1465 """ Start an HTTP download """
1467 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1468 # always respected by websites, some tend to give out URLs with non percent-encoded
1469 # non-ASCII characters (see telemb.py, ard.py [#3412])
1470 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1471 # To work around aforementioned issue we will replace request's original URL with
1472 # percent-encoded one
1473 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1474 url = req if req_is_string else req.get_full_url()
1475 url_escaped = escape_url(url)
1477 # Substitute URL if any change after escaping
1478 if url != url_escaped:
1482 req = compat_urllib_request.Request(
1483 url_escaped, data=req.data, headers=req.headers,
1484 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1486 return self._opener.open(req, timeout=self._socket_timeout)
1488 def print_debug_header(self):
1489 if not self.params.get('verbose'):
1492 if type('') is not compat_str:
1493 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1494 self.report_warning(
1495 'Your Python is broken! Update to a newer and supported version')
1497 stdout_encoding = getattr(
1498 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1500 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1501 locale.getpreferredencoding(),
1502 sys.getfilesystemencoding(),
1504 self.get_encoding()))
1505 write_string(encoding_str, encoding=None)
1507 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1509 sp = subprocess.Popen(
1510 ['git', 'rev-parse', '--short', 'HEAD'],
1511 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1512 cwd=os.path.dirname(os.path.abspath(__file__)))
1513 out, err = sp.communicate()
1514 out = out.decode().strip()
1515 if re.match('[0-9a-f]+', out):
1516 self._write_string('[debug] Git HEAD: ' + out + '\n')
1522 self._write_string('[debug] Python version %s - %s\n' % (
1523 platform.python_version(), platform_name()))
1525 exe_versions = FFmpegPostProcessor.get_versions()
1526 exe_versions['rtmpdump'] = rtmpdump_version()
1527 exe_str = ', '.join(
1529 for exe, v in sorted(exe_versions.items())
1534 self._write_string('[debug] exe versions: %s\n' % exe_str)
1537 for handler in self._opener.handlers:
1538 if hasattr(handler, 'proxies'):
1539 proxy_map.update(handler.proxies)
1540 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1542 if self.params.get('call_home', False):
1543 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1544 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1545 latest_version = self.urlopen(
1546 'https://yt-dl.org/latest/version').read().decode('utf-8')
1547 if version_tuple(latest_version) > version_tuple(__version__):
1548 self.report_warning(
1549 'You are using an outdated version (newest version: %s)! '
1550 'See https://yt-dl.org/update if you need help updating.' %
1553 def _setup_opener(self):
1554 timeout_val = self.params.get('socket_timeout')
1555 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1557 opts_cookiefile = self.params.get('cookiefile')
1558 opts_proxy = self.params.get('proxy')
1560 if opts_cookiefile is None:
1561 self.cookiejar = compat_cookiejar.CookieJar()
1563 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1565 if os.access(opts_cookiefile, os.R_OK):
1566 self.cookiejar.load()
1568 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1570 if opts_proxy is not None:
1571 if opts_proxy == '':
1574 proxies = {'http': opts_proxy, 'https': opts_proxy}
1576 proxies = compat_urllib_request.getproxies()
1577 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1578 if 'http' in proxies and 'https' not in proxies:
1579 proxies['https'] = proxies['http']
1580 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1582 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1583 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1584 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1585 opener = compat_urllib_request.build_opener(
1586 https_handler, proxy_handler, cookie_processor, ydlh)
1587 # Delete the default user-agent header, which would otherwise apply in
1588 # cases where our custom HTTP handler doesn't come into play
1589 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1590 opener.addheaders = []
1591 self._opener = opener
1593 def encode(self, s):
1594 if isinstance(s, bytes):
1595 return s # Already encoded
1598 return s.encode(self.get_encoding())
1599 except UnicodeEncodeError as err:
1600 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1603 def get_encoding(self):
1604 encoding = self.params.get('encoding')
1605 if encoding is None:
1606 encoding = preferredencoding()