2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
63 UnavailableVideoError,
73 from .cache import Cache
74 from .extractor import get_info_extractor, gen_extractors
75 from .downloader import get_suitable_downloader
76 from .downloader.rtmp import rtmpdump_version
77 from .postprocessor import (
79 FFmpegFixupStretchedPP,
84 from .version import __version__
87 class YoutubeDL(object):
90 YoutubeDL objects are the ones responsible of downloading the
91 actual video file and writing it to disk if the user has requested
92 it, among some other tasks. In most cases there should be one per
93 program. As, given a video URL, the downloader doesn't know how to
94 extract all the needed information, task that InfoExtractors do, it
95 has to pass the URL to one of them.
97 For this, YoutubeDL objects have a method that allows
98 InfoExtractors to be registered in a given order. When it is passed
99 a URL, the YoutubeDL object handles it to the first InfoExtractor it
100 finds that reports being able to handle it. The InfoExtractor extracts
101 all the information about the video or videos the URL refers to, and
102 YoutubeDL process the extracted information, possibly using a File
103 Downloader to download the video.
105 YoutubeDL objects accept a lot of parameters. In order not to saturate
106 the object constructor with arguments, it receives a dictionary of
107 options instead. These options are available through the params
108 attribute for the InfoExtractors to use. The YoutubeDL also
109 registers itself as the downloader in charge for the InfoExtractors
110 that are added to it, so this is a "mutual registration".
114 username: Username for authentication purposes.
115 password: Password for authentication purposes.
116 videopassword: Password for acces a video.
117 usenetrc: Use netrc for authentication instead.
118 verbose: Print additional info to stdout.
119 quiet: Do not print messages to stdout.
120 no_warnings: Do not print out anything for warnings.
121 forceurl: Force printing final URL.
122 forcetitle: Force printing title.
123 forceid: Force printing ID.
124 forcethumbnail: Force printing thumbnail URL.
125 forcedescription: Force printing description.
126 forcefilename: Force printing final filename.
127 forceduration: Force printing duration.
128 forcejson: Force printing info_dict as JSON.
129 dump_single_json: Force printing the info_dict of the whole playlist
130 (or video) as a single JSON line.
131 simulate: Do not download the video files.
132 format: Video format code. See options.py for more information.
133 format_limit: Highest quality format to try.
134 outtmpl: Template for output names.
135 restrictfilenames: Do not allow "&" and spaces in file names
136 ignoreerrors: Do not stop on download errors.
137 nooverwrites: Prevent overwriting files.
138 playliststart: Playlist item to start at.
139 playlistend: Playlist item to end at.
140 playlistreverse: Download playlist items in reverse order.
141 matchtitle: Download only matching titles.
142 rejecttitle: Reject downloads for matching titles.
143 logger: Log messages to a logging.Logger instance.
144 logtostderr: Log messages to stderr instead of stdout.
145 writedescription: Write the video description to a .description file
146 writeinfojson: Write the video description to a .info.json file
147 writeannotations: Write the video annotations to a .annotations.xml file
148 writethumbnail: Write the thumbnail image to a file
149 write_all_thumbnails: Write all thumbnail formats to files
150 writesubtitles: Write the video subtitles to a file
151 writeautomaticsub: Write the automatic subtitles to a file
152 allsubtitles: Downloads all the subtitles of the video
153 (requires writesubtitles or writeautomaticsub)
154 listsubtitles: Lists all available subtitles for the video
155 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
156 subtitleslangs: List of languages of the subtitles to download
157 keepvideo: Keep the video file after post-processing
158 daterange: A DateRange object, download only if the upload_date is in the range.
159 skip_download: Skip the actual download of the video file
160 cachedir: Location of the cache files in the filesystem.
161 False to disable filesystem cache.
162 noplaylist: Download single video instead of a playlist if in doubt.
163 age_limit: An integer representing the user's age in years.
164 Unsuitable videos for the given age are skipped.
165 min_views: An integer representing the minimum view count the video
166 must have in order to not be skipped.
167 Videos without view count information are always
168 downloaded. None for no limit.
169 max_views: An integer representing the maximum view count.
170 Videos that are more popular than that are not
172 Videos without view count information are always
173 downloaded. None for no limit.
174 download_archive: File name of a file where all downloads are recorded.
175 Videos already present in the file are not downloaded
177 cookiefile: File name where cookies should be read from and dumped to.
178 nocheckcertificate:Do not verify SSL certificates
179 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
180 At the moment, this is only supported by YouTube.
181 proxy: URL of the proxy server to use
182 socket_timeout: Time to wait for unresponsive hosts, in seconds
183 bidi_workaround: Work around buggy terminals without bidirectional text
184 support, using fridibi
185 debug_printtraffic:Print out sent and received HTTP traffic
186 include_ads: Download ads as well
187 default_search: Prepend this string if an input url is not valid.
188 'auto' for elaborate guessing
189 encoding: Use this encoding instead of the system-specified.
190 extract_flat: Do not resolve URLs, return the immediate result.
191 Pass in 'in_playlist' to only show this behavior for
193 postprocessors: A list of dictionaries, each with an entry
194 * key: The name of the postprocessor. See
195 youtube_dl/postprocessor/__init__.py for a list.
196 as well as any further keyword arguments for the
198 progress_hooks: A list of functions that get called on download
199 progress, with a dictionary with the entries
200 * filename: The final filename
201 * status: One of "downloading" and "finished"
203 The dict may also have some of the following entries:
205 * downloaded_bytes: Bytes on disk
206 * total_bytes: Size of the whole file, None if unknown
207 * tmpfilename: The filename we're currently writing to
208 * eta: The estimated time in seconds, None if unknown
209 * speed: The download speed in bytes/second, None if
212 Progress hooks are guaranteed to be called at least once
213 (with status "finished") if the download is successful.
214 merge_output_format: Extension to use when merging formats.
215 fixup: Automatically correct known faults of the file.
217 - "never": do nothing
218 - "warn": only emit a warning
219 - "detect_or_warn": check whether we can do anything
220 about it, warn otherwise (default)
221 source_address: (Experimental) Client-side IP address to bind to.
222 call_home: Boolean, true iff we are allowed to contact the
223 youtube-dl servers for debugging.
224 sleep_interval: Number of seconds to sleep before each download.
225 external_downloader: Executable of the external downloader to call.
226 listformats: Print an overview of available video formats and exit.
227 list_thumbnails: Print a table of all thumbnails and exit.
230 The following parameters are not used by YoutubeDL itself, they are used by
232 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
233 noresizebuffer, retries, continuedl, noprogress, consoletitle
235 The following options are used by the post processors:
236 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
237 otherwise prefer avconv.
238 exec_cmd: Arbitrary command to run after downloading
244 _download_retcode = None
245 _num_downloads = None
248 def __init__(self, params=None, auto_init=True):
249 """Create a FileDownloader object with the given options."""
253 self._ies_instances = {}
255 self._progress_hooks = []
256 self._download_retcode = 0
257 self._num_downloads = 0
258 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
259 self._err_file = sys.stderr
261 self.cache = Cache(self)
263 if params.get('bidi_workaround', False):
266 master, slave = pty.openpty()
267 width = get_term_width()
271 width_args = ['-w', str(width)]
273 stdin=subprocess.PIPE,
275 stderr=self._err_file)
277 self._output_process = subprocess.Popen(
278 ['bidiv'] + width_args, **sp_kwargs
281 self._output_process = subprocess.Popen(
282 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
283 self._output_channel = os.fdopen(master, 'rb')
284 except OSError as ose:
286 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
290 if (sys.version_info >= (3,) and sys.platform != 'win32' and
291 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
292 and not params.get('restrictfilenames', False)):
293 # On Python 3, the Unicode filesystem API will throw errors (#1474)
295 'Assuming --restrict-filenames since file system encoding '
296 'cannot encode all characters. '
297 'Set the LC_ALL environment variable to fix this.')
298 self.params['restrictfilenames'] = True
300 if '%(stitle)s' in self.params.get('outtmpl', ''):
301 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
306 self.print_debug_header()
307 self.add_default_info_extractors()
309 for pp_def_raw in self.params.get('postprocessors', []):
310 pp_class = get_postprocessor(pp_def_raw['key'])
311 pp_def = dict(pp_def_raw)
313 pp = pp_class(self, **compat_kwargs(pp_def))
314 self.add_post_processor(pp)
316 for ph in self.params.get('progress_hooks', []):
317 self.add_progress_hook(ph)
319 def warn_if_short_id(self, argv):
320 # short YouTube ID starting with dash?
322 i for i, a in enumerate(argv)
323 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
327 [a for i, a in enumerate(argv) if i not in idxs] +
328 ['--'] + [argv[i] for i in idxs]
331 'Long argument string detected. '
332 'Use -- to separate parameters and URLs, like this:\n%s\n' %
333 args_to_str(correct_argv))
335 def add_info_extractor(self, ie):
336 """Add an InfoExtractor object to the end of the list."""
338 self._ies_instances[ie.ie_key()] = ie
339 ie.set_downloader(self)
341 def get_info_extractor(self, ie_key):
343 Get an instance of an IE with name ie_key, it will try to get one from
344 the _ies list, if there's no instance it will create a new one and add
345 it to the extractor list.
347 ie = self._ies_instances.get(ie_key)
349 ie = get_info_extractor(ie_key)()
350 self.add_info_extractor(ie)
353 def add_default_info_extractors(self):
355 Add the InfoExtractors returned by gen_extractors to the end of the list
357 for ie in gen_extractors():
358 self.add_info_extractor(ie)
360 def add_post_processor(self, pp):
361 """Add a PostProcessor object to the end of the chain."""
363 pp.set_downloader(self)
365 def add_progress_hook(self, ph):
366 """Add the progress hook (currently only for the file downloader)"""
367 self._progress_hooks.append(ph)
369 def _bidi_workaround(self, message):
370 if not hasattr(self, '_output_channel'):
373 assert hasattr(self, '_output_process')
374 assert isinstance(message, compat_str)
375 line_count = message.count('\n') + 1
376 self._output_process.stdin.write((message + '\n').encode('utf-8'))
377 self._output_process.stdin.flush()
378 res = ''.join(self._output_channel.readline().decode('utf-8')
379 for _ in range(line_count))
380 return res[:-len('\n')]
382 def to_screen(self, message, skip_eol=False):
383 """Print message to stdout if not in quiet mode."""
384 return self.to_stdout(message, skip_eol, check_quiet=True)
386 def _write_string(self, s, out=None):
387 write_string(s, out=out, encoding=self.params.get('encoding'))
389 def to_stdout(self, message, skip_eol=False, check_quiet=False):
390 """Print message to stdout if not in quiet mode."""
391 if self.params.get('logger'):
392 self.params['logger'].debug(message)
393 elif not check_quiet or not self.params.get('quiet', False):
394 message = self._bidi_workaround(message)
395 terminator = ['\n', ''][skip_eol]
396 output = message + terminator
398 self._write_string(output, self._screen_file)
400 def to_stderr(self, message):
401 """Print message to stderr."""
402 assert isinstance(message, compat_str)
403 if self.params.get('logger'):
404 self.params['logger'].error(message)
406 message = self._bidi_workaround(message)
407 output = message + '\n'
408 self._write_string(output, self._err_file)
410 def to_console_title(self, message):
411 if not self.params.get('consoletitle', False):
413 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
414 # c_wchar_p() might not be necessary if `message` is
415 # already of type unicode()
416 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
417 elif 'TERM' in os.environ:
418 self._write_string('\033]0;%s\007' % message, self._screen_file)
420 def save_console_title(self):
421 if not self.params.get('consoletitle', False):
423 if 'TERM' in os.environ:
424 # Save the title on stack
425 self._write_string('\033[22;0t', self._screen_file)
427 def restore_console_title(self):
428 if not self.params.get('consoletitle', False):
430 if 'TERM' in os.environ:
431 # Restore the title from stack
432 self._write_string('\033[23;0t', self._screen_file)
435 self.save_console_title()
438 def __exit__(self, *args):
439 self.restore_console_title()
441 if self.params.get('cookiefile') is not None:
442 self.cookiejar.save()
444 def trouble(self, message=None, tb=None):
445 """Determine action to take when a download problem appears.
447 Depending on if the downloader has been configured to ignore
448 download errors or not, this method may throw an exception or
449 not when errors are found, after printing the message.
451 tb, if given, is additional traceback information.
453 if message is not None:
454 self.to_stderr(message)
455 if self.params.get('verbose'):
457 if sys.exc_info()[0]: # if .trouble has been called from an except block
459 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
460 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
461 tb += compat_str(traceback.format_exc())
463 tb_data = traceback.format_list(traceback.extract_stack())
464 tb = ''.join(tb_data)
466 if not self.params.get('ignoreerrors', False):
467 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
468 exc_info = sys.exc_info()[1].exc_info
470 exc_info = sys.exc_info()
471 raise DownloadError(message, exc_info)
472 self._download_retcode = 1
474 def report_warning(self, message):
476 Print the message to stderr, it will be prefixed with 'WARNING:'
477 If stderr is a tty file the 'WARNING:' will be colored
479 if self.params.get('logger') is not None:
480 self.params['logger'].warning(message)
482 if self.params.get('no_warnings'):
484 if self._err_file.isatty() and os.name != 'nt':
485 _msg_header = '\033[0;33mWARNING:\033[0m'
487 _msg_header = 'WARNING:'
488 warning_message = '%s %s' % (_msg_header, message)
489 self.to_stderr(warning_message)
491 def report_error(self, message, tb=None):
493 Do the same as trouble, but prefixes the message with 'ERROR:', colored
494 in red if stderr is a tty file.
496 if self._err_file.isatty() and os.name != 'nt':
497 _msg_header = '\033[0;31mERROR:\033[0m'
499 _msg_header = 'ERROR:'
500 error_message = '%s %s' % (_msg_header, message)
501 self.trouble(error_message, tb)
503 def report_file_already_downloaded(self, file_name):
504 """Report file has already been fully downloaded."""
506 self.to_screen('[download] %s has already been downloaded' % file_name)
507 except UnicodeEncodeError:
508 self.to_screen('[download] The file has already been downloaded')
510 def prepare_filename(self, info_dict):
511 """Generate the output filename."""
513 template_dict = dict(info_dict)
515 template_dict['epoch'] = int(time.time())
516 autonumber_size = self.params.get('autonumber_size')
517 if autonumber_size is None:
519 autonumber_templ = '%0' + str(autonumber_size) + 'd'
520 template_dict['autonumber'] = autonumber_templ % self._num_downloads
521 if template_dict.get('playlist_index') is not None:
522 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
523 if template_dict.get('resolution') is None:
524 if template_dict.get('width') and template_dict.get('height'):
525 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
526 elif template_dict.get('height'):
527 template_dict['resolution'] = '%sp' % template_dict['height']
528 elif template_dict.get('width'):
529 template_dict['resolution'] = '?x%d' % template_dict['width']
531 sanitize = lambda k, v: sanitize_filename(
533 restricted=self.params.get('restrictfilenames'),
535 template_dict = dict((k, sanitize(k, v))
536 for k, v in template_dict.items()
538 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
540 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
541 tmpl = compat_expanduser(outtmpl)
542 filename = tmpl % template_dict
544 except ValueError as err:
545 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
548 def _match_entry(self, info_dict):
549 """ Returns None iff the file should be downloaded """
551 video_title = info_dict.get('title', info_dict.get('id', 'video'))
552 if 'title' in info_dict:
553 # This can happen when we're just evaluating the playlist
554 title = info_dict['title']
555 matchtitle = self.params.get('matchtitle', False)
557 if not re.search(matchtitle, title, re.IGNORECASE):
558 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
559 rejecttitle = self.params.get('rejecttitle', False)
561 if re.search(rejecttitle, title, re.IGNORECASE):
562 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
563 date = info_dict.get('upload_date', None)
565 dateRange = self.params.get('daterange', DateRange())
566 if date not in dateRange:
567 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
568 view_count = info_dict.get('view_count', None)
569 if view_count is not None:
570 min_views = self.params.get('min_views')
571 if min_views is not None and view_count < min_views:
572 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
573 max_views = self.params.get('max_views')
574 if max_views is not None and view_count > max_views:
575 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
576 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
577 return 'Skipping "%s" because it is age restricted' % title
578 if self.in_download_archive(info_dict):
579 return '%s has already been recorded in archive' % video_title
583 def add_extra_info(info_dict, extra_info):
584 '''Set the keys from extra_info in info dict if they are missing'''
585 for key, value in extra_info.items():
586 info_dict.setdefault(key, value)
588 def extract_info(self, url, download=True, ie_key=None, extra_info={},
591 Returns a list with a dictionary for each video we find.
592 If 'download', also downloads the videos.
593 extra_info is a dict containing the extra values to add to each result
597 ies = [self.get_info_extractor(ie_key)]
602 if not ie.suitable(url):
606 self.report_warning('The program functionality for this site has been marked as broken, '
607 'and will probably not work.')
610 ie_result = ie.extract(url)
611 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
613 if isinstance(ie_result, list):
614 # Backwards compatibility: old IE result format
616 '_type': 'compat_list',
617 'entries': ie_result,
619 self.add_default_extra_info(ie_result, ie, url)
621 return self.process_ie_result(ie_result, download, extra_info)
624 except ExtractorError as de: # An error we somewhat expected
625 self.report_error(compat_str(de), de.format_traceback())
627 except MaxDownloadsReached:
629 except Exception as e:
630 if self.params.get('ignoreerrors', False):
631 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
636 self.report_error('no suitable InfoExtractor for URL %s' % url)
638 def add_default_extra_info(self, ie_result, ie, url):
639 self.add_extra_info(ie_result, {
640 'extractor': ie.IE_NAME,
642 'webpage_url_basename': url_basename(url),
643 'extractor_key': ie.ie_key(),
646 def process_ie_result(self, ie_result, download=True, extra_info={}):
648 Take the result of the ie(may be modified) and resolve all unresolved
649 references (URLs, playlist items).
651 It will also download the videos if 'download'.
652 Returns the resolved ie_result.
655 result_type = ie_result.get('_type', 'video')
657 if result_type in ('url', 'url_transparent'):
658 extract_flat = self.params.get('extract_flat', False)
659 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
660 extract_flat is True):
661 if self.params.get('forcejson', False):
662 self.to_stdout(json.dumps(ie_result))
665 if result_type == 'video':
666 self.add_extra_info(ie_result, extra_info)
667 return self.process_video_result(ie_result, download=download)
668 elif result_type == 'url':
669 # We have to add extra_info to the results because it may be
670 # contained in a playlist
671 return self.extract_info(ie_result['url'],
673 ie_key=ie_result.get('ie_key'),
674 extra_info=extra_info)
675 elif result_type == 'url_transparent':
676 # Use the information from the embedding page
677 info = self.extract_info(
678 ie_result['url'], ie_key=ie_result.get('ie_key'),
679 extra_info=extra_info, download=False, process=False)
681 force_properties = dict(
682 (k, v) for k, v in ie_result.items() if v is not None)
683 for f in ('_type', 'url'):
684 if f in force_properties:
685 del force_properties[f]
686 new_result = info.copy()
687 new_result.update(force_properties)
689 assert new_result.get('_type') != 'url_transparent'
691 return self.process_ie_result(
692 new_result, download=download, extra_info=extra_info)
693 elif result_type == 'playlist' or result_type == 'multi_video':
694 # We process each entry in the playlist
695 playlist = ie_result.get('title', None) or ie_result.get('id', None)
696 self.to_screen('[download] Downloading playlist: %s' % playlist)
698 playlist_results = []
700 playliststart = self.params.get('playliststart', 1) - 1
701 playlistend = self.params.get('playlistend', None)
702 # For backwards compatibility, interpret -1 as whole list
703 if playlistend == -1:
706 ie_entries = ie_result['entries']
707 if isinstance(ie_entries, list):
708 n_all_entries = len(ie_entries)
709 entries = ie_entries[playliststart:playlistend]
710 n_entries = len(entries)
712 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
713 (ie_result['extractor'], playlist, n_all_entries, n_entries))
714 elif isinstance(ie_entries, PagedList):
715 entries = ie_entries.getslice(
716 playliststart, playlistend)
717 n_entries = len(entries)
719 "[%s] playlist %s: Downloading %d videos" %
720 (ie_result['extractor'], playlist, n_entries))
722 entries = list(itertools.islice(
723 ie_entries, playliststart, playlistend))
724 n_entries = len(entries)
726 "[%s] playlist %s: Downloading %d videos" %
727 (ie_result['extractor'], playlist, n_entries))
729 if self.params.get('playlistreverse', False):
730 entries = entries[::-1]
732 for i, entry in enumerate(entries, 1):
733 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
735 'n_entries': n_entries,
736 'playlist': playlist,
737 'playlist_id': ie_result.get('id'),
738 'playlist_title': ie_result.get('title'),
739 'playlist_index': i + playliststart,
740 'extractor': ie_result['extractor'],
741 'webpage_url': ie_result['webpage_url'],
742 'webpage_url_basename': url_basename(ie_result['webpage_url']),
743 'extractor_key': ie_result['extractor_key'],
746 reason = self._match_entry(entry)
747 if reason is not None:
748 self.to_screen('[download] ' + reason)
751 entry_result = self.process_ie_result(entry,
754 playlist_results.append(entry_result)
755 ie_result['entries'] = playlist_results
757 elif result_type == 'compat_list':
759 'Extractor %s returned a compat_list result. '
760 'It needs to be updated.' % ie_result.get('extractor'))
766 'extractor': ie_result['extractor'],
767 'webpage_url': ie_result['webpage_url'],
768 'webpage_url_basename': url_basename(ie_result['webpage_url']),
769 'extractor_key': ie_result['extractor_key'],
773 ie_result['entries'] = [
774 self.process_ie_result(_fixup(r), download, extra_info)
775 for r in ie_result['entries']
779 raise Exception('Invalid result type: %s' % result_type)
781 def _apply_format_filter(self, format_spec, available_formats):
782 " Returns a tuple of the remaining format_spec and filtered formats "
792 operator_rex = re.compile(r'''(?x)\s*\[
793 (?P<key>width|height|tbr|abr|vbr|filesize)
794 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
795 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
797 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
798 m = operator_rex.search(format_spec)
800 raise ValueError('Invalid format specification %r' % format_spec)
803 comparison_value = int(m.group('value'))
805 comparison_value = parse_filesize(m.group('value'))
806 if comparison_value is None:
807 comparison_value = parse_filesize(m.group('value') + 'B')
808 if comparison_value is None:
810 'Invalid value %r in format specification %r' % (
811 m.group('value'), format_spec))
812 op = OPERATORS[m.group('op')]
815 actual_value = f.get(m.group('key'))
816 if actual_value is None:
817 return m.group('none_inclusive')
818 return op(actual_value, comparison_value)
819 new_formats = [f for f in available_formats if _filter(f)]
821 new_format_spec = format_spec[:-len(m.group(0))]
822 if not new_format_spec:
823 new_format_spec = 'best'
825 return (new_format_spec, new_formats)
827 def select_format(self, format_spec, available_formats):
828 while format_spec.endswith(']'):
829 format_spec, available_formats = self._apply_format_filter(
830 format_spec, available_formats)
831 if not available_formats:
834 if format_spec == 'best' or format_spec is None:
835 return available_formats[-1]
836 elif format_spec == 'worst':
837 return available_formats[0]
838 elif format_spec == 'bestaudio':
840 f for f in available_formats
841 if f.get('vcodec') == 'none']
843 return audio_formats[-1]
844 elif format_spec == 'worstaudio':
846 f for f in available_formats
847 if f.get('vcodec') == 'none']
849 return audio_formats[0]
850 elif format_spec == 'bestvideo':
852 f for f in available_formats
853 if f.get('acodec') == 'none']
855 return video_formats[-1]
856 elif format_spec == 'worstvideo':
858 f for f in available_formats
859 if f.get('acodec') == 'none']
861 return video_formats[0]
863 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
864 if format_spec in extensions:
865 filter_f = lambda f: f['ext'] == format_spec
867 filter_f = lambda f: f['format_id'] == format_spec
868 matches = list(filter(filter_f, available_formats))
873 def _calc_headers(self, info_dict):
874 res = std_headers.copy()
876 add_headers = info_dict.get('http_headers')
878 res.update(add_headers)
880 cookies = self._calc_cookies(info_dict)
882 res['Cookie'] = cookies
886 def _calc_cookies(self, info_dict):
887 class _PseudoRequest(object):
888 def __init__(self, url):
891 self.unverifiable = False
893 def add_unredirected_header(self, k, v):
896 def get_full_url(self):
899 def is_unverifiable(self):
900 return self.unverifiable
902 def has_header(self, h):
903 return h in self.headers
905 pr = _PseudoRequest(info_dict['url'])
906 self.cookiejar.add_cookie_header(pr)
907 return pr.headers.get('Cookie')
909 def process_video_result(self, info_dict, download=True):
910 assert info_dict.get('_type', 'video') == 'video'
912 if 'id' not in info_dict:
913 raise ExtractorError('Missing "id" field in extractor result')
914 if 'title' not in info_dict:
915 raise ExtractorError('Missing "title" field in extractor result')
917 if 'playlist' not in info_dict:
918 # It isn't part of a playlist
919 info_dict['playlist'] = None
920 info_dict['playlist_index'] = None
922 thumbnails = info_dict.get('thumbnails')
923 if thumbnails is None:
924 thumbnail = info_dict.get('thumbnail')
926 thumbnails = [{'url': thumbnail}]
928 thumbnails.sort(key=lambda t: (
929 t.get('preference'), t.get('width'), t.get('height'),
930 t.get('id'), t.get('url')))
932 if 'width' in t and 'height' in t:
933 t['resolution'] = '%dx%d' % (t['width'], t['height'])
935 if thumbnails and 'thumbnail' not in info_dict:
936 info_dict['thumbnail'] = thumbnails[-1]['url']
938 if 'display_id' not in info_dict and 'id' in info_dict:
939 info_dict['display_id'] = info_dict['id']
941 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
942 # Working around negative timestamps in Windows
943 # (see http://bugs.python.org/issue1646728)
944 if info_dict['timestamp'] < 0 and os.name == 'nt':
945 info_dict['timestamp'] = 0
946 upload_date = datetime.datetime.utcfromtimestamp(
947 info_dict['timestamp'])
948 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
950 # This extractors handle format selection themselves
951 if info_dict['extractor'] in ['Youku']:
953 self.process_info(info_dict)
956 # We now pick which formats have to be downloaded
957 if info_dict.get('formats') is None:
958 # There's only one format available
959 formats = [info_dict]
961 formats = info_dict['formats']
964 raise ExtractorError('No video formats found!')
966 # We check that all the formats have the format and format_id fields
967 for i, format in enumerate(formats):
968 if 'url' not in format:
969 raise ExtractorError('Missing "url" key in result (index %d)' % i)
971 if format.get('format_id') is None:
972 format['format_id'] = compat_str(i)
973 if format.get('format') is None:
974 format['format'] = '{id} - {res}{note}'.format(
975 id=format['format_id'],
976 res=self.format_resolution(format),
977 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
979 # Automatically determine file extension if missing
980 if 'ext' not in format:
981 format['ext'] = determine_ext(format['url']).lower()
982 # Add HTTP headers, so that external programs can use them from the
984 full_format_info = info_dict.copy()
985 full_format_info.update(format)
986 format['http_headers'] = self._calc_headers(full_format_info)
988 format_limit = self.params.get('format_limit', None)
990 formats = list(takewhile_inclusive(
991 lambda f: f['format_id'] != format_limit, formats
994 # TODO Central sorting goes here
996 if formats[0] is not info_dict:
997 # only set the 'formats' fields if the original info_dict list them
998 # otherwise we end up with a circular reference, the first (and unique)
999 # element in the 'formats' field in info_dict is info_dict itself,
1000 # wich can't be exported to json
1001 info_dict['formats'] = formats
1002 if self.params.get('listformats'):
1003 self.list_formats(info_dict)
1005 if self.params.get('list_thumbnails'):
1006 self.list_thumbnails(info_dict)
1009 req_format = self.params.get('format')
1010 if req_format is None:
1012 formats_to_download = []
1013 # The -1 is for supporting YoutubeIE
1014 if req_format in ('-1', 'all'):
1015 formats_to_download = formats
1017 for rfstr in req_format.split(','):
1018 # We can accept formats requested in the format: 34/5/best, we pick
1019 # the first that is available, starting from left
1020 req_formats = rfstr.split('/')
1021 for rf in req_formats:
1022 if re.match(r'.+?\+.+?', rf) is not None:
1023 # Two formats have been requested like '137+139'
1024 format_1, format_2 = rf.split('+')
1025 formats_info = (self.select_format(format_1, formats),
1026 self.select_format(format_2, formats))
1027 if all(formats_info):
1028 # The first format must contain the video and the
1030 if formats_info[0].get('vcodec') == 'none':
1031 self.report_error('The first format must '
1032 'contain the video, try using '
1033 '"-f %s+%s"' % (format_2, format_1))
1036 formats_info[0]['ext']
1037 if self.params.get('merge_output_format') is None
1038 else self.params['merge_output_format'])
1040 'requested_formats': formats_info,
1042 'ext': formats_info[0]['ext'],
1043 'width': formats_info[0].get('width'),
1044 'height': formats_info[0].get('height'),
1045 'resolution': formats_info[0].get('resolution'),
1046 'fps': formats_info[0].get('fps'),
1047 'vcodec': formats_info[0].get('vcodec'),
1048 'vbr': formats_info[0].get('vbr'),
1049 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1050 'acodec': formats_info[1].get('acodec'),
1051 'abr': formats_info[1].get('abr'),
1055 selected_format = None
1057 selected_format = self.select_format(rf, formats)
1058 if selected_format is not None:
1059 formats_to_download.append(selected_format)
1061 if not formats_to_download:
1062 raise ExtractorError('requested format not available',
1066 if len(formats_to_download) > 1:
1067 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1068 for format in formats_to_download:
1069 new_info = dict(info_dict)
1070 new_info.update(format)
1071 self.process_info(new_info)
1072 # We update the info dict with the best quality format (backwards compatibility)
1073 info_dict.update(formats_to_download[-1])
1076 def process_info(self, info_dict):
1077 """Process a single resolved IE result."""
1079 assert info_dict.get('_type', 'video') == 'video'
1081 max_downloads = self.params.get('max_downloads')
1082 if max_downloads is not None:
1083 if self._num_downloads >= int(max_downloads):
1084 raise MaxDownloadsReached()
1086 info_dict['fulltitle'] = info_dict['title']
1087 if len(info_dict['title']) > 200:
1088 info_dict['title'] = info_dict['title'][:197] + '...'
1090 # Keep for backwards compatibility
1091 info_dict['stitle'] = info_dict['title']
1093 if 'format' not in info_dict:
1094 info_dict['format'] = info_dict['ext']
1096 reason = self._match_entry(info_dict)
1097 if reason is not None:
1098 self.to_screen('[download] ' + reason)
1101 self._num_downloads += 1
1103 filename = self.prepare_filename(info_dict)
1106 if self.params.get('forcetitle', False):
1107 self.to_stdout(info_dict['fulltitle'])
1108 if self.params.get('forceid', False):
1109 self.to_stdout(info_dict['id'])
1110 if self.params.get('forceurl', False):
1111 if info_dict.get('requested_formats') is not None:
1112 for f in info_dict['requested_formats']:
1113 self.to_stdout(f['url'] + f.get('play_path', ''))
1115 # For RTMP URLs, also include the playpath
1116 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1117 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1118 self.to_stdout(info_dict['thumbnail'])
1119 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1120 self.to_stdout(info_dict['description'])
1121 if self.params.get('forcefilename', False) and filename is not None:
1122 self.to_stdout(filename)
1123 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1124 self.to_stdout(formatSeconds(info_dict['duration']))
1125 if self.params.get('forceformat', False):
1126 self.to_stdout(info_dict['format'])
1127 if self.params.get('forcejson', False):
1128 info_dict['_filename'] = filename
1129 self.to_stdout(json.dumps(info_dict))
1130 if self.params.get('dump_single_json', False):
1131 info_dict['_filename'] = filename
1133 # Do nothing else if in simulate mode
1134 if self.params.get('simulate', False):
1137 if filename is None:
1141 dn = os.path.dirname(encodeFilename(filename))
1142 if dn and not os.path.exists(dn):
1144 except (OSError, IOError) as err:
1145 self.report_error('unable to create directory ' + compat_str(err))
1148 if self.params.get('writedescription', False):
1149 descfn = filename + '.description'
1150 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1151 self.to_screen('[info] Video description is already present')
1152 elif info_dict.get('description') is None:
1153 self.report_warning('There\'s no description to write.')
1156 self.to_screen('[info] Writing video description to: ' + descfn)
1157 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1158 descfile.write(info_dict['description'])
1159 except (OSError, IOError):
1160 self.report_error('Cannot write description file ' + descfn)
1163 if self.params.get('writeannotations', False):
1164 annofn = filename + '.annotations.xml'
1165 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1166 self.to_screen('[info] Video annotations are already present')
1169 self.to_screen('[info] Writing video annotations to: ' + annofn)
1170 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1171 annofile.write(info_dict['annotations'])
1172 except (KeyError, TypeError):
1173 self.report_warning('There are no annotations to write.')
1174 except (OSError, IOError):
1175 self.report_error('Cannot write annotations file: ' + annofn)
1178 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1179 self.params.get('writeautomaticsub')])
1181 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1182 # subtitles download errors are already managed as troubles in relevant IE
1183 # that way it will silently go on when used with unsupporting IE
1184 subtitles = info_dict['subtitles']
1185 sub_format = self.params.get('subtitlesformat', 'srt')
1186 for sub_lang in subtitles.keys():
1187 sub = subtitles[sub_lang]
1191 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1192 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1193 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1195 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1196 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1198 except (OSError, IOError):
1199 self.report_error('Cannot write subtitles file ' + sub_filename)
1202 if self.params.get('writeinfojson', False):
1203 infofn = os.path.splitext(filename)[0] + '.info.json'
1204 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1205 self.to_screen('[info] Video description metadata is already present')
1207 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1209 write_json_file(info_dict, infofn)
1210 except (OSError, IOError):
1211 self.report_error('Cannot write metadata to JSON file ' + infofn)
1214 self._write_thumbnails(info_dict, filename)
1216 if not self.params.get('skip_download', False):
1219 fd = get_suitable_downloader(info, self.params)(self, self.params)
1220 for ph in self._progress_hooks:
1221 fd.add_progress_hook(ph)
1222 if self.params.get('verbose'):
1223 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1224 return fd.download(name, info)
1225 if info_dict.get('requested_formats') is not None:
1228 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1229 if not merger._executable:
1231 self.report_warning('You have requested multiple '
1232 'formats but ffmpeg or avconv are not installed.'
1233 ' The formats won\'t be merged')
1235 postprocessors = [merger]
1236 for f in info_dict['requested_formats']:
1237 new_info = dict(info_dict)
1239 fname = self.prepare_filename(new_info)
1240 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1241 downloaded.append(fname)
1242 partial_success = dl(fname, new_info)
1243 success = success and partial_success
1244 info_dict['__postprocessors'] = postprocessors
1245 info_dict['__files_to_merge'] = downloaded
1247 # Just a single file
1248 success = dl(filename, info_dict)
1249 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1250 self.report_error('unable to download video data: %s' % str(err))
1252 except (OSError, IOError) as err:
1253 raise UnavailableVideoError(err)
1254 except (ContentTooShortError, ) as err:
1255 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1260 fixup_policy = self.params.get('fixup')
1261 if fixup_policy is None:
1262 fixup_policy = 'detect_or_warn'
1264 stretched_ratio = info_dict.get('stretched_ratio')
1265 if stretched_ratio is not None and stretched_ratio != 1:
1266 if fixup_policy == 'warn':
1267 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1268 info_dict['id'], stretched_ratio))
1269 elif fixup_policy == 'detect_or_warn':
1270 stretched_pp = FFmpegFixupStretchedPP(self)
1271 if stretched_pp.available:
1272 info_dict.setdefault('__postprocessors', [])
1273 info_dict['__postprocessors'].append(stretched_pp)
1275 self.report_warning(
1276 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1277 info_dict['id'], stretched_ratio))
1279 assert fixup_policy in ('ignore', 'never')
1281 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1282 if fixup_policy == 'warn':
1283 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1285 elif fixup_policy == 'detect_or_warn':
1286 fixup_pp = FFmpegFixupM4aPP(self)
1287 if fixup_pp.available:
1288 info_dict.setdefault('__postprocessors', [])
1289 info_dict['__postprocessors'].append(fixup_pp)
1291 self.report_warning(
1292 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1295 assert fixup_policy in ('ignore', 'never')
1298 self.post_process(filename, info_dict)
1299 except (PostProcessingError) as err:
1300 self.report_error('postprocessing: %s' % str(err))
1302 self.record_download_archive(info_dict)
1304 def download(self, url_list):
1305 """Download a given list of URLs."""
1306 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1307 if (len(url_list) > 1 and
1309 and self.params.get('max_downloads') != 1):
1310 raise SameFileError(outtmpl)
1312 for url in url_list:
1314 # It also downloads the videos
1315 res = self.extract_info(url)
1316 except UnavailableVideoError:
1317 self.report_error('unable to download video')
1318 except MaxDownloadsReached:
1319 self.to_screen('[info] Maximum number of downloaded files reached.')
1322 if self.params.get('dump_single_json', False):
1323 self.to_stdout(json.dumps(res))
1325 return self._download_retcode
1327 def download_with_info_file(self, info_filename):
1328 with io.open(info_filename, 'r', encoding='utf-8') as f:
1331 self.process_ie_result(info, download=True)
1332 except DownloadError:
1333 webpage_url = info.get('webpage_url')
1334 if webpage_url is not None:
1335 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1336 return self.download([webpage_url])
1339 return self._download_retcode
1341 def post_process(self, filename, ie_info):
1342 """Run all the postprocessors on the given file."""
1343 info = dict(ie_info)
1344 info['filepath'] = filename
1346 if ie_info.get('__postprocessors') is not None:
1347 pps_chain.extend(ie_info['__postprocessors'])
1348 pps_chain.extend(self._pps)
1349 for pp in pps_chain:
1351 old_filename = info['filepath']
1353 keep_video_wish, info = pp.run(info)
1354 if keep_video_wish is not None:
1356 keep_video = keep_video_wish
1357 elif keep_video is None:
1358 # No clear decision yet, let IE decide
1359 keep_video = keep_video_wish
1360 except PostProcessingError as e:
1361 self.report_error(e.msg)
1362 if keep_video is False and not self.params.get('keepvideo', False):
1364 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1365 os.remove(encodeFilename(old_filename))
1366 except (IOError, OSError):
1367 self.report_warning('Unable to remove downloaded video file')
1369 def _make_archive_id(self, info_dict):
1370 # Future-proof against any change in case
1371 # and backwards compatibility with prior versions
1372 extractor = info_dict.get('extractor_key')
1373 if extractor is None:
1374 if 'id' in info_dict:
1375 extractor = info_dict.get('ie_key') # key in a playlist
1376 if extractor is None:
1377 return None # Incomplete video information
1378 return extractor.lower() + ' ' + info_dict['id']
1380 def in_download_archive(self, info_dict):
1381 fn = self.params.get('download_archive')
1385 vid_id = self._make_archive_id(info_dict)
1387 return False # Incomplete video information
1390 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1391 for line in archive_file:
1392 if line.strip() == vid_id:
1394 except IOError as ioe:
1395 if ioe.errno != errno.ENOENT:
1399 def record_download_archive(self, info_dict):
1400 fn = self.params.get('download_archive')
1403 vid_id = self._make_archive_id(info_dict)
1405 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1406 archive_file.write(vid_id + '\n')
1409 def format_resolution(format, default='unknown'):
1410 if format.get('vcodec') == 'none':
1412 if format.get('resolution') is not None:
1413 return format['resolution']
1414 if format.get('height') is not None:
1415 if format.get('width') is not None:
1416 res = '%sx%s' % (format['width'], format['height'])
1418 res = '%sp' % format['height']
1419 elif format.get('width') is not None:
1420 res = '?x%d' % format['width']
1425 def _format_note(self, fdict):
1427 if fdict.get('ext') in ['f4f', 'f4m']:
1428 res += '(unsupported) '
1429 if fdict.get('format_note') is not None:
1430 res += fdict['format_note'] + ' '
1431 if fdict.get('tbr') is not None:
1432 res += '%4dk ' % fdict['tbr']
1433 if fdict.get('container') is not None:
1436 res += '%s container' % fdict['container']
1437 if (fdict.get('vcodec') is not None and
1438 fdict.get('vcodec') != 'none'):
1441 res += fdict['vcodec']
1442 if fdict.get('vbr') is not None:
1444 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1446 if fdict.get('vbr') is not None:
1447 res += '%4dk' % fdict['vbr']
1448 if fdict.get('fps') is not None:
1449 res += ', %sfps' % fdict['fps']
1450 if fdict.get('acodec') is not None:
1453 if fdict['acodec'] == 'none':
1456 res += '%-5s' % fdict['acodec']
1457 elif fdict.get('abr') is not None:
1461 if fdict.get('abr') is not None:
1462 res += '@%3dk' % fdict['abr']
1463 if fdict.get('asr') is not None:
1464 res += ' (%5dHz)' % fdict['asr']
1465 if fdict.get('filesize') is not None:
1468 res += format_bytes(fdict['filesize'])
1469 elif fdict.get('filesize_approx') is not None:
1472 res += '~' + format_bytes(fdict['filesize_approx'])
1475 def list_formats(self, info_dict):
1476 def line(format, idlen=20):
1477 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1478 format['format_id'],
1480 self.format_resolution(format),
1481 self._format_note(format),
1484 formats = info_dict.get('formats', [info_dict])
1485 idlen = max(len('format code'),
1486 max(len(f['format_id']) for f in formats))
1488 line(f, idlen) for f in formats
1489 if f.get('preference') is None or f['preference'] >= -1000]
1490 if len(formats) > 1:
1491 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1492 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1494 header_line = line({
1495 'format_id': 'format code', 'ext': 'extension',
1496 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1498 '[info] Available formats for %s:\n%s\n%s' %
1499 (info_dict['id'], header_line, '\n'.join(formats_s)))
1501 def list_thumbnails(self, info_dict):
1502 thumbnails = info_dict.get('thumbnails')
1504 tn_url = info_dict.get('thumbnail')
1506 thumbnails = [{'id': '0', 'url': tn_url}]
1509 '[info] No thumbnails present for %s' % info_dict['id'])
1513 '[info] Thumbnails for %s:' % info_dict['id'])
1514 self.to_screen(render_table(
1515 ['ID', 'width', 'height', 'URL'],
1516 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1518 def urlopen(self, req):
1519 """ Start an HTTP download """
1521 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1522 # always respected by websites, some tend to give out URLs with non percent-encoded
1523 # non-ASCII characters (see telemb.py, ard.py [#3412])
1524 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1525 # To work around aforementioned issue we will replace request's original URL with
1526 # percent-encoded one
1527 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1528 url = req if req_is_string else req.get_full_url()
1529 url_escaped = escape_url(url)
1531 # Substitute URL if any change after escaping
1532 if url != url_escaped:
1536 req = compat_urllib_request.Request(
1537 url_escaped, data=req.data, headers=req.headers,
1538 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1540 return self._opener.open(req, timeout=self._socket_timeout)
1542 def print_debug_header(self):
1543 if not self.params.get('verbose'):
1546 if type('') is not compat_str:
1547 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1548 self.report_warning(
1549 'Your Python is broken! Update to a newer and supported version')
1551 stdout_encoding = getattr(
1552 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1554 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1555 locale.getpreferredencoding(),
1556 sys.getfilesystemencoding(),
1558 self.get_encoding()))
1559 write_string(encoding_str, encoding=None)
1561 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1563 sp = subprocess.Popen(
1564 ['git', 'rev-parse', '--short', 'HEAD'],
1565 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1566 cwd=os.path.dirname(os.path.abspath(__file__)))
1567 out, err = sp.communicate()
1568 out = out.decode().strip()
1569 if re.match('[0-9a-f]+', out):
1570 self._write_string('[debug] Git HEAD: ' + out + '\n')
1576 self._write_string('[debug] Python version %s - %s\n' % (
1577 platform.python_version(), platform_name()))
1579 exe_versions = FFmpegPostProcessor.get_versions()
1580 exe_versions['rtmpdump'] = rtmpdump_version()
1581 exe_str = ', '.join(
1583 for exe, v in sorted(exe_versions.items())
1588 self._write_string('[debug] exe versions: %s\n' % exe_str)
1591 for handler in self._opener.handlers:
1592 if hasattr(handler, 'proxies'):
1593 proxy_map.update(handler.proxies)
1594 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1596 if self.params.get('call_home', False):
1597 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1598 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1599 latest_version = self.urlopen(
1600 'https://yt-dl.org/latest/version').read().decode('utf-8')
1601 if version_tuple(latest_version) > version_tuple(__version__):
1602 self.report_warning(
1603 'You are using an outdated version (newest version: %s)! '
1604 'See https://yt-dl.org/update if you need help updating.' %
1607 def _setup_opener(self):
1608 timeout_val = self.params.get('socket_timeout')
1609 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1611 opts_cookiefile = self.params.get('cookiefile')
1612 opts_proxy = self.params.get('proxy')
1614 if opts_cookiefile is None:
1615 self.cookiejar = compat_cookiejar.CookieJar()
1617 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1619 if os.access(opts_cookiefile, os.R_OK):
1620 self.cookiejar.load()
1622 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1624 if opts_proxy is not None:
1625 if opts_proxy == '':
1628 proxies = {'http': opts_proxy, 'https': opts_proxy}
1630 proxies = compat_urllib_request.getproxies()
1631 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1632 if 'http' in proxies and 'https' not in proxies:
1633 proxies['https'] = proxies['http']
1634 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1636 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1637 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1638 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1639 opener = compat_urllib_request.build_opener(
1640 https_handler, proxy_handler, cookie_processor, ydlh)
1641 # Delete the default user-agent header, which would otherwise apply in
1642 # cases where our custom HTTP handler doesn't come into play
1643 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1644 opener.addheaders = []
1645 self._opener = opener
1647 def encode(self, s):
1648 if isinstance(s, bytes):
1649 return s # Already encoded
1652 return s.encode(self.get_encoding())
1653 except UnicodeEncodeError as err:
1654 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1657 def get_encoding(self):
1658 encoding = self.params.get('encoding')
1659 if encoding is None:
1660 encoding = preferredencoding()
1663 def _write_thumbnails(self, info_dict, filename):
1664 if self.params.get('writethumbnail', False):
1665 thumbnails = info_dict.get('thumbnails')
1667 thumbnails = [thumbnails[-1]]
1668 elif self.params.get('write_all_thumbnails', False):
1669 thumbnails = info_dict.get('thumbnails')
1674 # No thumbnails present, so return immediately
1677 for t in thumbnails:
1678 thumb_ext = determine_ext(t['url'], 'jpg')
1679 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1680 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1681 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1683 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1684 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1685 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1687 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1688 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1690 uf = self.urlopen(t['url'])
1691 with open(thumb_filename, 'wb') as thumbf:
1692 shutil.copyfileobj(uf, thumbf)
1693 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1694 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1695 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1696 self.report_warning('Unable to download thumbnail "%s": %s' %
1697 (t['url'], compat_str(err)))