2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
62 UnavailableVideoError,
72 from .cache import Cache
73 from .extractor import get_info_extractor, gen_extractors
74 from .downloader import get_suitable_downloader
75 from .downloader.rtmp import rtmpdump_version
76 from .postprocessor import (
78 FFmpegFixupStretchedPP,
83 from .version import __version__
86 class YoutubeDL(object):
89 YoutubeDL objects are the ones responsible of downloading the
90 actual video file and writing it to disk if the user has requested
91 it, among some other tasks. In most cases there should be one per
92 program. As, given a video URL, the downloader doesn't know how to
93 extract all the needed information, task that InfoExtractors do, it
94 has to pass the URL to one of them.
96 For this, YoutubeDL objects have a method that allows
97 InfoExtractors to be registered in a given order. When it is passed
98 a URL, the YoutubeDL object handles it to the first InfoExtractor it
99 finds that reports being able to handle it. The InfoExtractor extracts
100 all the information about the video or videos the URL refers to, and
101 YoutubeDL process the extracted information, possibly using a File
102 Downloader to download the video.
104 YoutubeDL objects accept a lot of parameters. In order not to saturate
105 the object constructor with arguments, it receives a dictionary of
106 options instead. These options are available through the params
107 attribute for the InfoExtractors to use. The YoutubeDL also
108 registers itself as the downloader in charge for the InfoExtractors
109 that are added to it, so this is a "mutual registration".
113 username: Username for authentication purposes.
114 password: Password for authentication purposes.
115 videopassword: Password for acces a video.
116 usenetrc: Use netrc for authentication instead.
117 verbose: Print additional info to stdout.
118 quiet: Do not print messages to stdout.
119 no_warnings: Do not print out anything for warnings.
120 forceurl: Force printing final URL.
121 forcetitle: Force printing title.
122 forceid: Force printing ID.
123 forcethumbnail: Force printing thumbnail URL.
124 forcedescription: Force printing description.
125 forcefilename: Force printing final filename.
126 forceduration: Force printing duration.
127 forcejson: Force printing info_dict as JSON.
128 dump_single_json: Force printing the info_dict of the whole playlist
129 (or video) as a single JSON line.
130 simulate: Do not download the video files.
131 format: Video format code. See options.py for more information.
132 format_limit: Highest quality format to try.
133 outtmpl: Template for output names.
134 restrictfilenames: Do not allow "&" and spaces in file names
135 ignoreerrors: Do not stop on download errors.
136 nooverwrites: Prevent overwriting files.
137 playliststart: Playlist item to start at.
138 playlistend: Playlist item to end at.
139 playlistreverse: Download playlist items in reverse order.
140 matchtitle: Download only matching titles.
141 rejecttitle: Reject downloads for matching titles.
142 logger: Log messages to a logging.Logger instance.
143 logtostderr: Log messages to stderr instead of stdout.
144 writedescription: Write the video description to a .description file
145 writeinfojson: Write the video description to a .info.json file
146 writeannotations: Write the video annotations to a .annotations.xml file
147 writethumbnail: Write the thumbnail image to a file
148 writesubtitles: Write the video subtitles to a file
149 writeautomaticsub: Write the automatic subtitles to a file
150 allsubtitles: Downloads all the subtitles of the video
151 (requires writesubtitles or writeautomaticsub)
152 listsubtitles: Lists all available subtitles for the video
153 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
154 subtitleslangs: List of languages of the subtitles to download
155 keepvideo: Keep the video file after post-processing
156 daterange: A DateRange object, download only if the upload_date is in the range.
157 skip_download: Skip the actual download of the video file
158 cachedir: Location of the cache files in the filesystem.
159 False to disable filesystem cache.
160 noplaylist: Download single video instead of a playlist if in doubt.
161 age_limit: An integer representing the user's age in years.
162 Unsuitable videos for the given age are skipped.
163 min_views: An integer representing the minimum view count the video
164 must have in order to not be skipped.
165 Videos without view count information are always
166 downloaded. None for no limit.
167 max_views: An integer representing the maximum view count.
168 Videos that are more popular than that are not
170 Videos without view count information are always
171 downloaded. None for no limit.
172 download_archive: File name of a file where all downloads are recorded.
173 Videos already present in the file are not downloaded
175 cookiefile: File name where cookies should be read from and dumped to.
176 nocheckcertificate:Do not verify SSL certificates
177 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
178 At the moment, this is only supported by YouTube.
179 proxy: URL of the proxy server to use
180 socket_timeout: Time to wait for unresponsive hosts, in seconds
181 bidi_workaround: Work around buggy terminals without bidirectional text
182 support, using fridibi
183 debug_printtraffic:Print out sent and received HTTP traffic
184 include_ads: Download ads as well
185 default_search: Prepend this string if an input url is not valid.
186 'auto' for elaborate guessing
187 encoding: Use this encoding instead of the system-specified.
188 extract_flat: Do not resolve URLs, return the immediate result.
189 Pass in 'in_playlist' to only show this behavior for
191 postprocessors: A list of dictionaries, each with an entry
192 * key: The name of the postprocessor. See
193 youtube_dl/postprocessor/__init__.py for a list.
194 as well as any further keyword arguments for the
196 progress_hooks: A list of functions that get called on download
197 progress, with a dictionary with the entries
198 * filename: The final filename
199 * status: One of "downloading" and "finished"
201 The dict may also have some of the following entries:
203 * downloaded_bytes: Bytes on disk
204 * total_bytes: Size of the whole file, None if unknown
205 * tmpfilename: The filename we're currently writing to
206 * eta: The estimated time in seconds, None if unknown
207 * speed: The download speed in bytes/second, None if
210 Progress hooks are guaranteed to be called at least once
211 (with status "finished") if the download is successful.
212 merge_output_format: Extension to use when merging formats.
213 fixup: Automatically correct known faults of the file.
215 - "never": do nothing
216 - "warn": only emit a warning
217 - "detect_or_warn": check whether we can do anything
218 about it, warn otherwise (default)
219 source_address: (Experimental) Client-side IP address to bind to.
220 call_home: Boolean, true iff we are allowed to contact the
221 youtube-dl servers for debugging.
222 sleep_interval: Number of seconds to sleep before each download.
223 external_downloader: Executable of the external downloader to call.
226 The following parameters are not used by YoutubeDL itself, they are used by
228 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
229 noresizebuffer, retries, continuedl, noprogress, consoletitle
231 The following options are used by the post processors:
232 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
233 otherwise prefer avconv.
234 exec_cmd: Arbitrary command to run after downloading
240 _download_retcode = None
241 _num_downloads = None
244 def __init__(self, params=None, auto_init=True):
245 """Create a FileDownloader object with the given options."""
249 self._ies_instances = {}
251 self._progress_hooks = []
252 self._download_retcode = 0
253 self._num_downloads = 0
254 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
255 self._err_file = sys.stderr
257 self.cache = Cache(self)
259 if params.get('bidi_workaround', False):
262 master, slave = pty.openpty()
263 width = get_term_width()
267 width_args = ['-w', str(width)]
269 stdin=subprocess.PIPE,
271 stderr=self._err_file)
273 self._output_process = subprocess.Popen(
274 ['bidiv'] + width_args, **sp_kwargs
277 self._output_process = subprocess.Popen(
278 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
279 self._output_channel = os.fdopen(master, 'rb')
280 except OSError as ose:
282 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
286 if (sys.version_info >= (3,) and sys.platform != 'win32' and
287 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
288 and not params.get('restrictfilenames', False)):
289 # On Python 3, the Unicode filesystem API will throw errors (#1474)
291 'Assuming --restrict-filenames since file system encoding '
292 'cannot encode all characters. '
293 'Set the LC_ALL environment variable to fix this.')
294 self.params['restrictfilenames'] = True
296 if '%(stitle)s' in self.params.get('outtmpl', ''):
297 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
302 self.print_debug_header()
303 self.add_default_info_extractors()
305 for pp_def_raw in self.params.get('postprocessors', []):
306 pp_class = get_postprocessor(pp_def_raw['key'])
307 pp_def = dict(pp_def_raw)
309 pp = pp_class(self, **compat_kwargs(pp_def))
310 self.add_post_processor(pp)
312 for ph in self.params.get('progress_hooks', []):
313 self.add_progress_hook(ph)
315 def warn_if_short_id(self, argv):
316 # short YouTube ID starting with dash?
318 i for i, a in enumerate(argv)
319 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
323 [a for i, a in enumerate(argv) if i not in idxs] +
324 ['--'] + [argv[i] for i in idxs]
327 'Long argument string detected. '
328 'Use -- to separate parameters and URLs, like this:\n%s\n' %
329 args_to_str(correct_argv))
331 def add_info_extractor(self, ie):
332 """Add an InfoExtractor object to the end of the list."""
334 self._ies_instances[ie.ie_key()] = ie
335 ie.set_downloader(self)
337 def get_info_extractor(self, ie_key):
339 Get an instance of an IE with name ie_key, it will try to get one from
340 the _ies list, if there's no instance it will create a new one and add
341 it to the extractor list.
343 ie = self._ies_instances.get(ie_key)
345 ie = get_info_extractor(ie_key)()
346 self.add_info_extractor(ie)
349 def add_default_info_extractors(self):
351 Add the InfoExtractors returned by gen_extractors to the end of the list
353 for ie in gen_extractors():
354 self.add_info_extractor(ie)
356 def add_post_processor(self, pp):
357 """Add a PostProcessor object to the end of the chain."""
359 pp.set_downloader(self)
361 def add_progress_hook(self, ph):
362 """Add the progress hook (currently only for the file downloader)"""
363 self._progress_hooks.append(ph)
365 def _bidi_workaround(self, message):
366 if not hasattr(self, '_output_channel'):
369 assert hasattr(self, '_output_process')
370 assert isinstance(message, compat_str)
371 line_count = message.count('\n') + 1
372 self._output_process.stdin.write((message + '\n').encode('utf-8'))
373 self._output_process.stdin.flush()
374 res = ''.join(self._output_channel.readline().decode('utf-8')
375 for _ in range(line_count))
376 return res[:-len('\n')]
378 def to_screen(self, message, skip_eol=False):
379 """Print message to stdout if not in quiet mode."""
380 return self.to_stdout(message, skip_eol, check_quiet=True)
382 def _write_string(self, s, out=None):
383 write_string(s, out=out, encoding=self.params.get('encoding'))
385 def to_stdout(self, message, skip_eol=False, check_quiet=False):
386 """Print message to stdout if not in quiet mode."""
387 if self.params.get('logger'):
388 self.params['logger'].debug(message)
389 elif not check_quiet or not self.params.get('quiet', False):
390 message = self._bidi_workaround(message)
391 terminator = ['\n', ''][skip_eol]
392 output = message + terminator
394 self._write_string(output, self._screen_file)
396 def to_stderr(self, message):
397 """Print message to stderr."""
398 assert isinstance(message, compat_str)
399 if self.params.get('logger'):
400 self.params['logger'].error(message)
402 message = self._bidi_workaround(message)
403 output = message + '\n'
404 self._write_string(output, self._err_file)
406 def to_console_title(self, message):
407 if not self.params.get('consoletitle', False):
409 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
410 # c_wchar_p() might not be necessary if `message` is
411 # already of type unicode()
412 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
413 elif 'TERM' in os.environ:
414 self._write_string('\033]0;%s\007' % message, self._screen_file)
416 def save_console_title(self):
417 if not self.params.get('consoletitle', False):
419 if 'TERM' in os.environ:
420 # Save the title on stack
421 self._write_string('\033[22;0t', self._screen_file)
423 def restore_console_title(self):
424 if not self.params.get('consoletitle', False):
426 if 'TERM' in os.environ:
427 # Restore the title from stack
428 self._write_string('\033[23;0t', self._screen_file)
431 self.save_console_title()
434 def __exit__(self, *args):
435 self.restore_console_title()
437 if self.params.get('cookiefile') is not None:
438 self.cookiejar.save()
440 def trouble(self, message=None, tb=None):
441 """Determine action to take when a download problem appears.
443 Depending on if the downloader has been configured to ignore
444 download errors or not, this method may throw an exception or
445 not when errors are found, after printing the message.
447 tb, if given, is additional traceback information.
449 if message is not None:
450 self.to_stderr(message)
451 if self.params.get('verbose'):
453 if sys.exc_info()[0]: # if .trouble has been called from an except block
455 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
456 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
457 tb += compat_str(traceback.format_exc())
459 tb_data = traceback.format_list(traceback.extract_stack())
460 tb = ''.join(tb_data)
462 if not self.params.get('ignoreerrors', False):
463 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
464 exc_info = sys.exc_info()[1].exc_info
466 exc_info = sys.exc_info()
467 raise DownloadError(message, exc_info)
468 self._download_retcode = 1
470 def report_warning(self, message):
472 Print the message to stderr, it will be prefixed with 'WARNING:'
473 If stderr is a tty file the 'WARNING:' will be colored
475 if self.params.get('logger') is not None:
476 self.params['logger'].warning(message)
478 if self.params.get('no_warnings'):
480 if self._err_file.isatty() and os.name != 'nt':
481 _msg_header = '\033[0;33mWARNING:\033[0m'
483 _msg_header = 'WARNING:'
484 warning_message = '%s %s' % (_msg_header, message)
485 self.to_stderr(warning_message)
487 def report_error(self, message, tb=None):
489 Do the same as trouble, but prefixes the message with 'ERROR:', colored
490 in red if stderr is a tty file.
492 if self._err_file.isatty() and os.name != 'nt':
493 _msg_header = '\033[0;31mERROR:\033[0m'
495 _msg_header = 'ERROR:'
496 error_message = '%s %s' % (_msg_header, message)
497 self.trouble(error_message, tb)
499 def report_file_already_downloaded(self, file_name):
500 """Report file has already been fully downloaded."""
502 self.to_screen('[download] %s has already been downloaded' % file_name)
503 except UnicodeEncodeError:
504 self.to_screen('[download] The file has already been downloaded')
506 def prepare_filename(self, info_dict):
507 """Generate the output filename."""
509 template_dict = dict(info_dict)
511 template_dict['epoch'] = int(time.time())
512 autonumber_size = self.params.get('autonumber_size')
513 if autonumber_size is None:
515 autonumber_templ = '%0' + str(autonumber_size) + 'd'
516 template_dict['autonumber'] = autonumber_templ % self._num_downloads
517 if template_dict.get('playlist_index') is not None:
518 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
519 if template_dict.get('resolution') is None:
520 if template_dict.get('width') and template_dict.get('height'):
521 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
522 elif template_dict.get('height'):
523 template_dict['resolution'] = '%sp' % template_dict['height']
524 elif template_dict.get('width'):
525 template_dict['resolution'] = '?x%d' % template_dict['width']
527 sanitize = lambda k, v: sanitize_filename(
529 restricted=self.params.get('restrictfilenames'),
531 template_dict = dict((k, sanitize(k, v))
532 for k, v in template_dict.items()
534 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
536 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
537 tmpl = compat_expanduser(outtmpl)
538 filename = tmpl % template_dict
540 except ValueError as err:
541 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
544 def _match_entry(self, info_dict):
545 """ Returns None iff the file should be downloaded """
547 video_title = info_dict.get('title', info_dict.get('id', 'video'))
548 if 'title' in info_dict:
549 # This can happen when we're just evaluating the playlist
550 title = info_dict['title']
551 matchtitle = self.params.get('matchtitle', False)
553 if not re.search(matchtitle, title, re.IGNORECASE):
554 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
555 rejecttitle = self.params.get('rejecttitle', False)
557 if re.search(rejecttitle, title, re.IGNORECASE):
558 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
559 date = info_dict.get('upload_date', None)
561 dateRange = self.params.get('daterange', DateRange())
562 if date not in dateRange:
563 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
564 view_count = info_dict.get('view_count', None)
565 if view_count is not None:
566 min_views = self.params.get('min_views')
567 if min_views is not None and view_count < min_views:
568 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
569 max_views = self.params.get('max_views')
570 if max_views is not None and view_count > max_views:
571 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
572 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
573 return 'Skipping "%s" because it is age restricted' % title
574 if self.in_download_archive(info_dict):
575 return '%s has already been recorded in archive' % video_title
579 def add_extra_info(info_dict, extra_info):
580 '''Set the keys from extra_info in info dict if they are missing'''
581 for key, value in extra_info.items():
582 info_dict.setdefault(key, value)
584 def extract_info(self, url, download=True, ie_key=None, extra_info={},
587 Returns a list with a dictionary for each video we find.
588 If 'download', also downloads the videos.
589 extra_info is a dict containing the extra values to add to each result
593 ies = [self.get_info_extractor(ie_key)]
598 if not ie.suitable(url):
602 self.report_warning('The program functionality for this site has been marked as broken, '
603 'and will probably not work.')
606 ie_result = ie.extract(url)
607 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
609 if isinstance(ie_result, list):
610 # Backwards compatibility: old IE result format
612 '_type': 'compat_list',
613 'entries': ie_result,
615 self.add_default_extra_info(ie_result, ie, url)
617 return self.process_ie_result(ie_result, download, extra_info)
620 except ExtractorError as de: # An error we somewhat expected
621 self.report_error(compat_str(de), de.format_traceback())
623 except MaxDownloadsReached:
625 except Exception as e:
626 if self.params.get('ignoreerrors', False):
627 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
632 self.report_error('no suitable InfoExtractor for URL %s' % url)
634 def add_default_extra_info(self, ie_result, ie, url):
635 self.add_extra_info(ie_result, {
636 'extractor': ie.IE_NAME,
638 'webpage_url_basename': url_basename(url),
639 'extractor_key': ie.ie_key(),
642 def process_ie_result(self, ie_result, download=True, extra_info={}):
644 Take the result of the ie(may be modified) and resolve all unresolved
645 references (URLs, playlist items).
647 It will also download the videos if 'download'.
648 Returns the resolved ie_result.
651 result_type = ie_result.get('_type', 'video')
653 if result_type in ('url', 'url_transparent'):
654 extract_flat = self.params.get('extract_flat', False)
655 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
656 extract_flat is True):
657 if self.params.get('forcejson', False):
658 self.to_stdout(json.dumps(ie_result))
661 if result_type == 'video':
662 self.add_extra_info(ie_result, extra_info)
663 return self.process_video_result(ie_result, download=download)
664 elif result_type == 'url':
665 # We have to add extra_info to the results because it may be
666 # contained in a playlist
667 return self.extract_info(ie_result['url'],
669 ie_key=ie_result.get('ie_key'),
670 extra_info=extra_info)
671 elif result_type == 'url_transparent':
672 # Use the information from the embedding page
673 info = self.extract_info(
674 ie_result['url'], ie_key=ie_result.get('ie_key'),
675 extra_info=extra_info, download=False, process=False)
677 force_properties = dict(
678 (k, v) for k, v in ie_result.items() if v is not None)
679 for f in ('_type', 'url'):
680 if f in force_properties:
681 del force_properties[f]
682 new_result = info.copy()
683 new_result.update(force_properties)
685 assert new_result.get('_type') != 'url_transparent'
687 return self.process_ie_result(
688 new_result, download=download, extra_info=extra_info)
689 elif result_type == 'playlist' or result_type == 'multi_video':
690 # We process each entry in the playlist
691 playlist = ie_result.get('title', None) or ie_result.get('id', None)
692 self.to_screen('[download] Downloading playlist: %s' % playlist)
694 playlist_results = []
696 playliststart = self.params.get('playliststart', 1) - 1
697 playlistend = self.params.get('playlistend', None)
698 # For backwards compatibility, interpret -1 as whole list
699 if playlistend == -1:
702 ie_entries = ie_result['entries']
703 if isinstance(ie_entries, list):
704 n_all_entries = len(ie_entries)
705 entries = ie_entries[playliststart:playlistend]
706 n_entries = len(entries)
708 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
709 (ie_result['extractor'], playlist, n_all_entries, n_entries))
710 elif isinstance(ie_entries, PagedList):
711 entries = ie_entries.getslice(
712 playliststart, playlistend)
713 n_entries = len(entries)
715 "[%s] playlist %s: Downloading %d videos" %
716 (ie_result['extractor'], playlist, n_entries))
718 entries = list(itertools.islice(
719 ie_entries, playliststart, playlistend))
720 n_entries = len(entries)
722 "[%s] playlist %s: Downloading %d videos" %
723 (ie_result['extractor'], playlist, n_entries))
725 if self.params.get('playlistreverse', False):
726 entries = entries[::-1]
728 for i, entry in enumerate(entries, 1):
729 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
731 'n_entries': n_entries,
732 'playlist': playlist,
733 'playlist_id': ie_result.get('id'),
734 'playlist_title': ie_result.get('title'),
735 'playlist_index': i + playliststart,
736 'extractor': ie_result['extractor'],
737 'webpage_url': ie_result['webpage_url'],
738 'webpage_url_basename': url_basename(ie_result['webpage_url']),
739 'extractor_key': ie_result['extractor_key'],
742 reason = self._match_entry(entry)
743 if reason is not None:
744 self.to_screen('[download] ' + reason)
747 entry_result = self.process_ie_result(entry,
750 playlist_results.append(entry_result)
751 ie_result['entries'] = playlist_results
753 elif result_type == 'compat_list':
755 'Extractor %s returned a compat_list result. '
756 'It needs to be updated.' % ie_result.get('extractor'))
762 'extractor': ie_result['extractor'],
763 'webpage_url': ie_result['webpage_url'],
764 'webpage_url_basename': url_basename(ie_result['webpage_url']),
765 'extractor_key': ie_result['extractor_key'],
769 ie_result['entries'] = [
770 self.process_ie_result(_fixup(r), download, extra_info)
771 for r in ie_result['entries']
775 raise Exception('Invalid result type: %s' % result_type)
777 def _apply_format_filter(self, format_spec, available_formats):
778 " Returns a tuple of the remaining format_spec and filtered formats "
788 operator_rex = re.compile(r'''(?x)\s*\[
789 (?P<key>width|height|tbr|abr|vbr|filesize)
790 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
791 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
793 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
794 m = operator_rex.search(format_spec)
796 raise ValueError('Invalid format specification %r' % format_spec)
799 comparison_value = int(m.group('value'))
801 comparison_value = parse_filesize(m.group('value'))
802 if comparison_value is None:
803 comparison_value = parse_filesize(m.group('value') + 'B')
804 if comparison_value is None:
806 'Invalid value %r in format specification %r' % (
807 m.group('value'), format_spec))
808 op = OPERATORS[m.group('op')]
811 actual_value = f.get(m.group('key'))
812 if actual_value is None:
813 return m.group('none_inclusive')
814 return op(actual_value, comparison_value)
815 new_formats = [f for f in available_formats if _filter(f)]
817 new_format_spec = format_spec[:-len(m.group(0))]
818 if not new_format_spec:
819 new_format_spec = 'best'
821 return (new_format_spec, new_formats)
823 def select_format(self, format_spec, available_formats):
824 while format_spec.endswith(']'):
825 format_spec, available_formats = self._apply_format_filter(
826 format_spec, available_formats)
827 if not available_formats:
830 if format_spec == 'best' or format_spec is None:
831 return available_formats[-1]
832 elif format_spec == 'worst':
833 return available_formats[0]
834 elif format_spec == 'bestaudio':
836 f for f in available_formats
837 if f.get('vcodec') == 'none']
839 return audio_formats[-1]
840 elif format_spec == 'worstaudio':
842 f for f in available_formats
843 if f.get('vcodec') == 'none']
845 return audio_formats[0]
846 elif format_spec == 'bestvideo':
848 f for f in available_formats
849 if f.get('acodec') == 'none']
851 return video_formats[-1]
852 elif format_spec == 'worstvideo':
854 f for f in available_formats
855 if f.get('acodec') == 'none']
857 return video_formats[0]
859 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
860 if format_spec in extensions:
861 filter_f = lambda f: f['ext'] == format_spec
863 filter_f = lambda f: f['format_id'] == format_spec
864 matches = list(filter(filter_f, available_formats))
869 def _calc_headers(self, info_dict):
870 res = std_headers.copy()
872 add_headers = info_dict.get('http_headers')
874 res.update(add_headers)
876 cookies = self._calc_cookies(info_dict)
878 res['Cookie'] = cookies
882 def _calc_cookies(self, info_dict):
883 class _PseudoRequest(object):
884 def __init__(self, url):
887 self.unverifiable = False
889 def add_unredirected_header(self, k, v):
892 def get_full_url(self):
895 def is_unverifiable(self):
896 return self.unverifiable
898 def has_header(self, h):
899 return h in self.headers
901 pr = _PseudoRequest(info_dict['url'])
902 self.cookiejar.add_cookie_header(pr)
903 return pr.headers.get('Cookie')
905 def process_video_result(self, info_dict, download=True):
906 assert info_dict.get('_type', 'video') == 'video'
908 if 'id' not in info_dict:
909 raise ExtractorError('Missing "id" field in extractor result')
910 if 'title' not in info_dict:
911 raise ExtractorError('Missing "title" field in extractor result')
913 if 'playlist' not in info_dict:
914 # It isn't part of a playlist
915 info_dict['playlist'] = None
916 info_dict['playlist_index'] = None
918 thumbnails = info_dict.get('thumbnails')
920 thumbnails.sort(key=lambda t: (
921 t.get('width'), t.get('height'), t.get('url')))
923 if 'width' in t and 'height' in t:
924 t['resolution'] = '%dx%d' % (t['width'], t['height'])
926 if thumbnails and 'thumbnail' not in info_dict:
927 info_dict['thumbnail'] = thumbnails[-1]['url']
929 if 'display_id' not in info_dict and 'id' in info_dict:
930 info_dict['display_id'] = info_dict['id']
932 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
933 # Working around negative timestamps in Windows
934 # (see http://bugs.python.org/issue1646728)
935 if info_dict['timestamp'] < 0 and os.name == 'nt':
936 info_dict['timestamp'] = 0
937 upload_date = datetime.datetime.utcfromtimestamp(
938 info_dict['timestamp'])
939 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
941 # This extractors handle format selection themselves
942 if info_dict['extractor'] in ['Youku']:
944 self.process_info(info_dict)
947 # We now pick which formats have to be downloaded
948 if info_dict.get('formats') is None:
949 # There's only one format available
950 formats = [info_dict]
952 formats = info_dict['formats']
955 raise ExtractorError('No video formats found!')
957 # We check that all the formats have the format and format_id fields
958 for i, format in enumerate(formats):
959 if 'url' not in format:
960 raise ExtractorError('Missing "url" key in result (index %d)' % i)
962 if format.get('format_id') is None:
963 format['format_id'] = compat_str(i)
964 if format.get('format') is None:
965 format['format'] = '{id} - {res}{note}'.format(
966 id=format['format_id'],
967 res=self.format_resolution(format),
968 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
970 # Automatically determine file extension if missing
971 if 'ext' not in format:
972 format['ext'] = determine_ext(format['url']).lower()
973 # Add HTTP headers, so that external programs can use them from the
975 full_format_info = info_dict.copy()
976 full_format_info.update(format)
977 format['http_headers'] = self._calc_headers(full_format_info)
979 format_limit = self.params.get('format_limit', None)
981 formats = list(takewhile_inclusive(
982 lambda f: f['format_id'] != format_limit, formats
985 # TODO Central sorting goes here
987 if formats[0] is not info_dict:
988 # only set the 'formats' fields if the original info_dict list them
989 # otherwise we end up with a circular reference, the first (and unique)
990 # element in the 'formats' field in info_dict is info_dict itself,
991 # wich can't be exported to json
992 info_dict['formats'] = formats
993 if self.params.get('listformats', None):
994 self.list_formats(info_dict)
997 req_format = self.params.get('format')
998 if req_format is None:
1000 formats_to_download = []
1001 # The -1 is for supporting YoutubeIE
1002 if req_format in ('-1', 'all'):
1003 formats_to_download = formats
1005 for rfstr in req_format.split(','):
1006 # We can accept formats requested in the format: 34/5/best, we pick
1007 # the first that is available, starting from left
1008 req_formats = rfstr.split('/')
1009 for rf in req_formats:
1010 if re.match(r'.+?\+.+?', rf) is not None:
1011 # Two formats have been requested like '137+139'
1012 format_1, format_2 = rf.split('+')
1013 formats_info = (self.select_format(format_1, formats),
1014 self.select_format(format_2, formats))
1015 if all(formats_info):
1016 # The first format must contain the video and the
1018 if formats_info[0].get('vcodec') == 'none':
1019 self.report_error('The first format must '
1020 'contain the video, try using '
1021 '"-f %s+%s"' % (format_2, format_1))
1024 formats_info[0]['ext']
1025 if self.params.get('merge_output_format') is None
1026 else self.params['merge_output_format'])
1028 'requested_formats': formats_info,
1030 'ext': formats_info[0]['ext'],
1031 'width': formats_info[0].get('width'),
1032 'height': formats_info[0].get('height'),
1033 'resolution': formats_info[0].get('resolution'),
1034 'fps': formats_info[0].get('fps'),
1035 'vcodec': formats_info[0].get('vcodec'),
1036 'vbr': formats_info[0].get('vbr'),
1037 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1038 'acodec': formats_info[1].get('acodec'),
1039 'abr': formats_info[1].get('abr'),
1043 selected_format = None
1045 selected_format = self.select_format(rf, formats)
1046 if selected_format is not None:
1047 formats_to_download.append(selected_format)
1049 if not formats_to_download:
1050 raise ExtractorError('requested format not available',
1054 if len(formats_to_download) > 1:
1055 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1056 for format in formats_to_download:
1057 new_info = dict(info_dict)
1058 new_info.update(format)
1059 self.process_info(new_info)
1060 # We update the info dict with the best quality format (backwards compatibility)
1061 info_dict.update(formats_to_download[-1])
1064 def process_info(self, info_dict):
1065 """Process a single resolved IE result."""
1067 assert info_dict.get('_type', 'video') == 'video'
1069 max_downloads = self.params.get('max_downloads')
1070 if max_downloads is not None:
1071 if self._num_downloads >= int(max_downloads):
1072 raise MaxDownloadsReached()
1074 info_dict['fulltitle'] = info_dict['title']
1075 if len(info_dict['title']) > 200:
1076 info_dict['title'] = info_dict['title'][:197] + '...'
1078 # Keep for backwards compatibility
1079 info_dict['stitle'] = info_dict['title']
1081 if 'format' not in info_dict:
1082 info_dict['format'] = info_dict['ext']
1084 reason = self._match_entry(info_dict)
1085 if reason is not None:
1086 self.to_screen('[download] ' + reason)
1089 self._num_downloads += 1
1091 filename = self.prepare_filename(info_dict)
1094 if self.params.get('forcetitle', False):
1095 self.to_stdout(info_dict['fulltitle'])
1096 if self.params.get('forceid', False):
1097 self.to_stdout(info_dict['id'])
1098 if self.params.get('forceurl', False):
1099 if info_dict.get('requested_formats') is not None:
1100 for f in info_dict['requested_formats']:
1101 self.to_stdout(f['url'] + f.get('play_path', ''))
1103 # For RTMP URLs, also include the playpath
1104 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1105 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1106 self.to_stdout(info_dict['thumbnail'])
1107 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1108 self.to_stdout(info_dict['description'])
1109 if self.params.get('forcefilename', False) and filename is not None:
1110 self.to_stdout(filename)
1111 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1112 self.to_stdout(formatSeconds(info_dict['duration']))
1113 if self.params.get('forceformat', False):
1114 self.to_stdout(info_dict['format'])
1115 if self.params.get('forcejson', False):
1116 info_dict['_filename'] = filename
1117 self.to_stdout(json.dumps(info_dict))
1118 if self.params.get('dump_single_json', False):
1119 info_dict['_filename'] = filename
1121 # Do nothing else if in simulate mode
1122 if self.params.get('simulate', False):
1125 if filename is None:
1129 dn = os.path.dirname(encodeFilename(filename))
1130 if dn and not os.path.exists(dn):
1132 except (OSError, IOError) as err:
1133 self.report_error('unable to create directory ' + compat_str(err))
1136 if self.params.get('writedescription', False):
1137 descfn = filename + '.description'
1138 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1139 self.to_screen('[info] Video description is already present')
1140 elif info_dict.get('description') is None:
1141 self.report_warning('There\'s no description to write.')
1144 self.to_screen('[info] Writing video description to: ' + descfn)
1145 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1146 descfile.write(info_dict['description'])
1147 except (OSError, IOError):
1148 self.report_error('Cannot write description file ' + descfn)
1151 if self.params.get('writeannotations', False):
1152 annofn = filename + '.annotations.xml'
1153 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1154 self.to_screen('[info] Video annotations are already present')
1157 self.to_screen('[info] Writing video annotations to: ' + annofn)
1158 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1159 annofile.write(info_dict['annotations'])
1160 except (KeyError, TypeError):
1161 self.report_warning('There are no annotations to write.')
1162 except (OSError, IOError):
1163 self.report_error('Cannot write annotations file: ' + annofn)
1166 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1167 self.params.get('writeautomaticsub')])
1169 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1170 # subtitles download errors are already managed as troubles in relevant IE
1171 # that way it will silently go on when used with unsupporting IE
1172 subtitles = info_dict['subtitles']
1173 sub_format = self.params.get('subtitlesformat', 'srt')
1174 for sub_lang in subtitles.keys():
1175 sub = subtitles[sub_lang]
1179 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1180 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1181 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1183 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1184 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1186 except (OSError, IOError):
1187 self.report_error('Cannot write subtitles file ' + sub_filename)
1190 if self.params.get('writeinfojson', False):
1191 infofn = os.path.splitext(filename)[0] + '.info.json'
1192 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1193 self.to_screen('[info] Video description metadata is already present')
1195 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1197 write_json_file(info_dict, infofn)
1198 except (OSError, IOError):
1199 self.report_error('Cannot write metadata to JSON file ' + infofn)
1202 if self.params.get('writethumbnail', False):
1203 if info_dict.get('thumbnail') is not None:
1204 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1205 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1206 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1207 self.to_screen('[%s] %s: Thumbnail is already present' %
1208 (info_dict['extractor'], info_dict['id']))
1210 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1211 (info_dict['extractor'], info_dict['id']))
1213 uf = self.urlopen(info_dict['thumbnail'])
1214 with open(thumb_filename, 'wb') as thumbf:
1215 shutil.copyfileobj(uf, thumbf)
1216 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1217 (info_dict['extractor'], info_dict['id'], thumb_filename))
1218 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1219 self.report_warning('Unable to download thumbnail "%s": %s' %
1220 (info_dict['thumbnail'], compat_str(err)))
1222 if not self.params.get('skip_download', False):
1225 fd = get_suitable_downloader(info, self.params)(self, self.params)
1226 for ph in self._progress_hooks:
1227 fd.add_progress_hook(ph)
1228 if self.params.get('verbose'):
1229 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1230 return fd.download(name, info)
1231 if info_dict.get('requested_formats') is not None:
1234 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1235 if not merger._executable:
1237 self.report_warning('You have requested multiple '
1238 'formats but ffmpeg or avconv are not installed.'
1239 ' The formats won\'t be merged')
1241 postprocessors = [merger]
1242 for f in info_dict['requested_formats']:
1243 new_info = dict(info_dict)
1245 fname = self.prepare_filename(new_info)
1246 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1247 downloaded.append(fname)
1248 partial_success = dl(fname, new_info)
1249 success = success and partial_success
1250 info_dict['__postprocessors'] = postprocessors
1251 info_dict['__files_to_merge'] = downloaded
1253 # Just a single file
1254 success = dl(filename, info_dict)
1255 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1256 self.report_error('unable to download video data: %s' % str(err))
1258 except (OSError, IOError) as err:
1259 raise UnavailableVideoError(err)
1260 except (ContentTooShortError, ) as err:
1261 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1266 fixup_policy = self.params.get('fixup')
1267 if fixup_policy is None:
1268 fixup_policy = 'detect_or_warn'
1270 stretched_ratio = info_dict.get('stretched_ratio')
1271 if stretched_ratio is not None and stretched_ratio != 1:
1272 if fixup_policy == 'warn':
1273 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1274 info_dict['id'], stretched_ratio))
1275 elif fixup_policy == 'detect_or_warn':
1276 stretched_pp = FFmpegFixupStretchedPP(self)
1277 if stretched_pp.available:
1278 info_dict.setdefault('__postprocessors', [])
1279 info_dict['__postprocessors'].append(stretched_pp)
1281 self.report_warning(
1282 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1283 info_dict['id'], stretched_ratio))
1285 assert fixup_policy in ('ignore', 'never')
1287 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1288 if fixup_policy == 'warn':
1289 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1291 elif fixup_policy == 'detect_or_warn':
1292 fixup_pp = FFmpegFixupM4aPP(self)
1293 if fixup_pp.available:
1294 info_dict.setdefault('__postprocessors', [])
1295 info_dict['__postprocessors'].append(fixup_pp)
1297 self.report_warning(
1298 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1301 assert fixup_policy in ('ignore', 'never')
1304 self.post_process(filename, info_dict)
1305 except (PostProcessingError) as err:
1306 self.report_error('postprocessing: %s' % str(err))
1308 self.record_download_archive(info_dict)
1310 def download(self, url_list):
1311 """Download a given list of URLs."""
1312 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1313 if (len(url_list) > 1 and
1315 and self.params.get('max_downloads') != 1):
1316 raise SameFileError(outtmpl)
1318 for url in url_list:
1320 # It also downloads the videos
1321 res = self.extract_info(url)
1322 except UnavailableVideoError:
1323 self.report_error('unable to download video')
1324 except MaxDownloadsReached:
1325 self.to_screen('[info] Maximum number of downloaded files reached.')
1328 if self.params.get('dump_single_json', False):
1329 self.to_stdout(json.dumps(res))
1331 return self._download_retcode
1333 def download_with_info_file(self, info_filename):
1334 with io.open(info_filename, 'r', encoding='utf-8') as f:
1337 self.process_ie_result(info, download=True)
1338 except DownloadError:
1339 webpage_url = info.get('webpage_url')
1340 if webpage_url is not None:
1341 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1342 return self.download([webpage_url])
1345 return self._download_retcode
1347 def post_process(self, filename, ie_info):
1348 """Run all the postprocessors on the given file."""
1349 info = dict(ie_info)
1350 info['filepath'] = filename
1352 if ie_info.get('__postprocessors') is not None:
1353 pps_chain.extend(ie_info['__postprocessors'])
1354 pps_chain.extend(self._pps)
1355 for pp in pps_chain:
1357 old_filename = info['filepath']
1359 keep_video_wish, info = pp.run(info)
1360 if keep_video_wish is not None:
1362 keep_video = keep_video_wish
1363 elif keep_video is None:
1364 # No clear decision yet, let IE decide
1365 keep_video = keep_video_wish
1366 except PostProcessingError as e:
1367 self.report_error(e.msg)
1368 if keep_video is False and not self.params.get('keepvideo', False):
1370 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1371 os.remove(encodeFilename(old_filename))
1372 except (IOError, OSError):
1373 self.report_warning('Unable to remove downloaded video file')
1375 def _make_archive_id(self, info_dict):
1376 # Future-proof against any change in case
1377 # and backwards compatibility with prior versions
1378 extractor = info_dict.get('extractor_key')
1379 if extractor is None:
1380 if 'id' in info_dict:
1381 extractor = info_dict.get('ie_key') # key in a playlist
1382 if extractor is None:
1383 return None # Incomplete video information
1384 return extractor.lower() + ' ' + info_dict['id']
1386 def in_download_archive(self, info_dict):
1387 fn = self.params.get('download_archive')
1391 vid_id = self._make_archive_id(info_dict)
1393 return False # Incomplete video information
1396 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1397 for line in archive_file:
1398 if line.strip() == vid_id:
1400 except IOError as ioe:
1401 if ioe.errno != errno.ENOENT:
1405 def record_download_archive(self, info_dict):
1406 fn = self.params.get('download_archive')
1409 vid_id = self._make_archive_id(info_dict)
1411 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1412 archive_file.write(vid_id + '\n')
1415 def format_resolution(format, default='unknown'):
1416 if format.get('vcodec') == 'none':
1418 if format.get('resolution') is not None:
1419 return format['resolution']
1420 if format.get('height') is not None:
1421 if format.get('width') is not None:
1422 res = '%sx%s' % (format['width'], format['height'])
1424 res = '%sp' % format['height']
1425 elif format.get('width') is not None:
1426 res = '?x%d' % format['width']
1431 def _format_note(self, fdict):
1433 if fdict.get('ext') in ['f4f', 'f4m']:
1434 res += '(unsupported) '
1435 if fdict.get('format_note') is not None:
1436 res += fdict['format_note'] + ' '
1437 if fdict.get('tbr') is not None:
1438 res += '%4dk ' % fdict['tbr']
1439 if fdict.get('container') is not None:
1442 res += '%s container' % fdict['container']
1443 if (fdict.get('vcodec') is not None and
1444 fdict.get('vcodec') != 'none'):
1447 res += fdict['vcodec']
1448 if fdict.get('vbr') is not None:
1450 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1452 if fdict.get('vbr') is not None:
1453 res += '%4dk' % fdict['vbr']
1454 if fdict.get('fps') is not None:
1455 res += ', %sfps' % fdict['fps']
1456 if fdict.get('acodec') is not None:
1459 if fdict['acodec'] == 'none':
1462 res += '%-5s' % fdict['acodec']
1463 elif fdict.get('abr') is not None:
1467 if fdict.get('abr') is not None:
1468 res += '@%3dk' % fdict['abr']
1469 if fdict.get('asr') is not None:
1470 res += ' (%5dHz)' % fdict['asr']
1471 if fdict.get('filesize') is not None:
1474 res += format_bytes(fdict['filesize'])
1475 elif fdict.get('filesize_approx') is not None:
1478 res += '~' + format_bytes(fdict['filesize_approx'])
1481 def list_formats(self, info_dict):
1482 def line(format, idlen=20):
1483 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1484 format['format_id'],
1486 self.format_resolution(format),
1487 self._format_note(format),
1490 formats = info_dict.get('formats', [info_dict])
1491 idlen = max(len('format code'),
1492 max(len(f['format_id']) for f in formats))
1494 line(f, idlen) for f in formats
1495 if f.get('preference') is None or f['preference'] >= -1000]
1496 if len(formats) > 1:
1497 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1498 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1500 header_line = line({
1501 'format_id': 'format code', 'ext': 'extension',
1502 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1503 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1504 (info_dict['id'], header_line, '\n'.join(formats_s)))
1506 def urlopen(self, req):
1507 """ Start an HTTP download """
1509 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1510 # always respected by websites, some tend to give out URLs with non percent-encoded
1511 # non-ASCII characters (see telemb.py, ard.py [#3412])
1512 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1513 # To work around aforementioned issue we will replace request's original URL with
1514 # percent-encoded one
1515 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1516 url = req if req_is_string else req.get_full_url()
1517 url_escaped = escape_url(url)
1519 # Substitute URL if any change after escaping
1520 if url != url_escaped:
1524 req = compat_urllib_request.Request(
1525 url_escaped, data=req.data, headers=req.headers,
1526 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1528 return self._opener.open(req, timeout=self._socket_timeout)
1530 def print_debug_header(self):
1531 if not self.params.get('verbose'):
1534 if type('') is not compat_str:
1535 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1536 self.report_warning(
1537 'Your Python is broken! Update to a newer and supported version')
1539 stdout_encoding = getattr(
1540 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1542 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1543 locale.getpreferredencoding(),
1544 sys.getfilesystemencoding(),
1546 self.get_encoding()))
1547 write_string(encoding_str, encoding=None)
1549 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1551 sp = subprocess.Popen(
1552 ['git', 'rev-parse', '--short', 'HEAD'],
1553 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1554 cwd=os.path.dirname(os.path.abspath(__file__)))
1555 out, err = sp.communicate()
1556 out = out.decode().strip()
1557 if re.match('[0-9a-f]+', out):
1558 self._write_string('[debug] Git HEAD: ' + out + '\n')
1564 self._write_string('[debug] Python version %s - %s\n' % (
1565 platform.python_version(), platform_name()))
1567 exe_versions = FFmpegPostProcessor.get_versions()
1568 exe_versions['rtmpdump'] = rtmpdump_version()
1569 exe_str = ', '.join(
1571 for exe, v in sorted(exe_versions.items())
1576 self._write_string('[debug] exe versions: %s\n' % exe_str)
1579 for handler in self._opener.handlers:
1580 if hasattr(handler, 'proxies'):
1581 proxy_map.update(handler.proxies)
1582 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1584 if self.params.get('call_home', False):
1585 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1586 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1587 latest_version = self.urlopen(
1588 'https://yt-dl.org/latest/version').read().decode('utf-8')
1589 if version_tuple(latest_version) > version_tuple(__version__):
1590 self.report_warning(
1591 'You are using an outdated version (newest version: %s)! '
1592 'See https://yt-dl.org/update if you need help updating.' %
1595 def _setup_opener(self):
1596 timeout_val = self.params.get('socket_timeout')
1597 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1599 opts_cookiefile = self.params.get('cookiefile')
1600 opts_proxy = self.params.get('proxy')
1602 if opts_cookiefile is None:
1603 self.cookiejar = compat_cookiejar.CookieJar()
1605 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1607 if os.access(opts_cookiefile, os.R_OK):
1608 self.cookiejar.load()
1610 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1612 if opts_proxy is not None:
1613 if opts_proxy == '':
1616 proxies = {'http': opts_proxy, 'https': opts_proxy}
1618 proxies = compat_urllib_request.getproxies()
1619 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1620 if 'http' in proxies and 'https' not in proxies:
1621 proxies['https'] = proxies['http']
1622 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1624 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1625 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1626 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1627 opener = compat_urllib_request.build_opener(
1628 https_handler, proxy_handler, cookie_processor, ydlh)
1629 # Delete the default user-agent header, which would otherwise apply in
1630 # cases where our custom HTTP handler doesn't come into play
1631 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1632 opener.addheaders = []
1633 self._opener = opener
1635 def encode(self, s):
1636 if isinstance(s, bytes):
1637 return s # Already encoded
1640 return s.encode(self.get_encoding())
1641 except UnicodeEncodeError as err:
1642 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1645 def get_encoding(self):
1646 encoding = self.params.get('encoding')
1647 if encoding is None:
1648 encoding = preferredencoding()