2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
35 compat_urllib_request,
64 UnavailableVideoError,
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
80 FFmpegFixupStretchedPP,
85 from .version import __version__
88 class YoutubeDL(object):
91 YoutubeDL objects are the ones responsible of downloading the
92 actual video file and writing it to disk if the user has requested
93 it, among some other tasks. In most cases there should be one per
94 program. As, given a video URL, the downloader doesn't know how to
95 extract all the needed information, task that InfoExtractors do, it
96 has to pass the URL to one of them.
98 For this, YoutubeDL objects have a method that allows
99 InfoExtractors to be registered in a given order. When it is passed
100 a URL, the YoutubeDL object handles it to the first InfoExtractor it
101 finds that reports being able to handle it. The InfoExtractor extracts
102 all the information about the video or videos the URL refers to, and
103 YoutubeDL process the extracted information, possibly using a File
104 Downloader to download the video.
106 YoutubeDL objects accept a lot of parameters. In order not to saturate
107 the object constructor with arguments, it receives a dictionary of
108 options instead. These options are available through the params
109 attribute for the InfoExtractors to use. The YoutubeDL also
110 registers itself as the downloader in charge for the InfoExtractors
111 that are added to it, so this is a "mutual registration".
115 username: Username for authentication purposes.
116 password: Password for authentication purposes.
117 videopassword: Password for acces a video.
118 usenetrc: Use netrc for authentication instead.
119 verbose: Print additional info to stdout.
120 quiet: Do not print messages to stdout.
121 no_warnings: Do not print out anything for warnings.
122 forceurl: Force printing final URL.
123 forcetitle: Force printing title.
124 forceid: Force printing ID.
125 forcethumbnail: Force printing thumbnail URL.
126 forcedescription: Force printing description.
127 forcefilename: Force printing final filename.
128 forceduration: Force printing duration.
129 forcejson: Force printing info_dict as JSON.
130 dump_single_json: Force printing the info_dict of the whole playlist
131 (or video) as a single JSON line.
132 simulate: Do not download the video files.
133 format: Video format code. See options.py for more information.
134 format_limit: Highest quality format to try.
135 outtmpl: Template for output names.
136 restrictfilenames: Do not allow "&" and spaces in file names
137 ignoreerrors: Do not stop on download errors.
138 nooverwrites: Prevent overwriting files.
139 playliststart: Playlist item to start at.
140 playlistend: Playlist item to end at.
141 playlist_items: Specific indices of playlist to download.
142 playlistreverse: Download playlist items in reverse order.
143 matchtitle: Download only matching titles.
144 rejecttitle: Reject downloads for matching titles.
145 logger: Log messages to a logging.Logger instance.
146 logtostderr: Log messages to stderr instead of stdout.
147 writedescription: Write the video description to a .description file
148 writeinfojson: Write the video description to a .info.json file
149 writeannotations: Write the video annotations to a .annotations.xml file
150 writethumbnail: Write the thumbnail image to a file
151 write_all_thumbnails: Write all thumbnail formats to files
152 writesubtitles: Write the video subtitles to a file
153 writeautomaticsub: Write the automatic subtitles to a file
154 allsubtitles: Downloads all the subtitles of the video
155 (requires writesubtitles or writeautomaticsub)
156 listsubtitles: Lists all available subtitles for the video
157 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
158 subtitleslangs: List of languages of the subtitles to download
159 keepvideo: Keep the video file after post-processing
160 daterange: A DateRange object, download only if the upload_date is in the range.
161 skip_download: Skip the actual download of the video file
162 cachedir: Location of the cache files in the filesystem.
163 False to disable filesystem cache.
164 noplaylist: Download single video instead of a playlist if in doubt.
165 age_limit: An integer representing the user's age in years.
166 Unsuitable videos for the given age are skipped.
167 min_views: An integer representing the minimum view count the video
168 must have in order to not be skipped.
169 Videos without view count information are always
170 downloaded. None for no limit.
171 max_views: An integer representing the maximum view count.
172 Videos that are more popular than that are not
174 Videos without view count information are always
175 downloaded. None for no limit.
176 download_archive: File name of a file where all downloads are recorded.
177 Videos already present in the file are not downloaded
179 cookiefile: File name where cookies should be read from and dumped to.
180 nocheckcertificate:Do not verify SSL certificates
181 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
182 At the moment, this is only supported by YouTube.
183 proxy: URL of the proxy server to use
184 socket_timeout: Time to wait for unresponsive hosts, in seconds
185 bidi_workaround: Work around buggy terminals without bidirectional text
186 support, using fridibi
187 debug_printtraffic:Print out sent and received HTTP traffic
188 include_ads: Download ads as well
189 default_search: Prepend this string if an input url is not valid.
190 'auto' for elaborate guessing
191 encoding: Use this encoding instead of the system-specified.
192 extract_flat: Do not resolve URLs, return the immediate result.
193 Pass in 'in_playlist' to only show this behavior for
195 postprocessors: A list of dictionaries, each with an entry
196 * key: The name of the postprocessor. See
197 youtube_dl/postprocessor/__init__.py for a list.
198 as well as any further keyword arguments for the
200 progress_hooks: A list of functions that get called on download
201 progress, with a dictionary with the entries
202 * status: One of "downloading" and "finished".
203 Check this first and ignore unknown values.
205 If status is one of "downloading" or "finished", the
206 following properties may also be present:
207 * filename: The final filename (always present)
208 * downloaded_bytes: Bytes on disk
209 * total_bytes: Size of the whole file, None if unknown
210 * tmpfilename: The filename we're currently writing to
211 * eta: The estimated time in seconds, None if unknown
212 * speed: The download speed in bytes/second, None if
215 Progress hooks are guaranteed to be called at least once
216 (with status "finished") if the download is successful.
217 merge_output_format: Extension to use when merging formats.
218 fixup: Automatically correct known faults of the file.
220 - "never": do nothing
221 - "warn": only emit a warning
222 - "detect_or_warn": check whether we can do anything
223 about it, warn otherwise (default)
224 source_address: (Experimental) Client-side IP address to bind to.
225 call_home: Boolean, true iff we are allowed to contact the
226 youtube-dl servers for debugging.
227 sleep_interval: Number of seconds to sleep before each download.
228 listformats: Print an overview of available video formats and exit.
229 list_thumbnails: Print a table of all thumbnails and exit.
230 match_filter: A function that gets called with the info_dict of
232 If it returns a message, the video is ignored.
233 If it returns None, the video is downloaded.
234 match_filter_func in utils.py is one example for this.
235 no_color: Do not emit color codes in output.
237 The following options determine which downloader is picked:
238 external_downloader: Executable of the external downloader to call.
239 None or unset for standard (built-in) downloader.
240 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
242 The following parameters are not used by YoutubeDL itself, they are used by
244 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
245 noresizebuffer, retries, continuedl, noprogress, consoletitle,
248 The following options are used by the post processors:
249 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
250 otherwise prefer avconv.
251 exec_cmd: Arbitrary command to run after downloading
257 _download_retcode = None
258 _num_downloads = None
261 def __init__(self, params=None, auto_init=True):
262 """Create a FileDownloader object with the given options."""
266 self._ies_instances = {}
268 self._progress_hooks = []
269 self._download_retcode = 0
270 self._num_downloads = 0
271 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
272 self._err_file = sys.stderr
274 self.cache = Cache(self)
276 if params.get('bidi_workaround', False):
279 master, slave = pty.openpty()
280 width = get_term_width()
284 width_args = ['-w', str(width)]
286 stdin=subprocess.PIPE,
288 stderr=self._err_file)
290 self._output_process = subprocess.Popen(
291 ['bidiv'] + width_args, **sp_kwargs
294 self._output_process = subprocess.Popen(
295 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
296 self._output_channel = os.fdopen(master, 'rb')
297 except OSError as ose:
299 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
303 if (sys.version_info >= (3,) and sys.platform != 'win32' and
304 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
305 and not params.get('restrictfilenames', False)):
306 # On Python 3, the Unicode filesystem API will throw errors (#1474)
308 'Assuming --restrict-filenames since file system encoding '
309 'cannot encode all characters. '
310 'Set the LC_ALL environment variable to fix this.')
311 self.params['restrictfilenames'] = True
313 if '%(stitle)s' in self.params.get('outtmpl', ''):
314 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
319 self.print_debug_header()
320 self.add_default_info_extractors()
322 for pp_def_raw in self.params.get('postprocessors', []):
323 pp_class = get_postprocessor(pp_def_raw['key'])
324 pp_def = dict(pp_def_raw)
326 pp = pp_class(self, **compat_kwargs(pp_def))
327 self.add_post_processor(pp)
329 for ph in self.params.get('progress_hooks', []):
330 self.add_progress_hook(ph)
332 def warn_if_short_id(self, argv):
333 # short YouTube ID starting with dash?
335 i for i, a in enumerate(argv)
336 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
340 [a for i, a in enumerate(argv) if i not in idxs] +
341 ['--'] + [argv[i] for i in idxs]
344 'Long argument string detected. '
345 'Use -- to separate parameters and URLs, like this:\n%s\n' %
346 args_to_str(correct_argv))
348 def add_info_extractor(self, ie):
349 """Add an InfoExtractor object to the end of the list."""
351 self._ies_instances[ie.ie_key()] = ie
352 ie.set_downloader(self)
354 def get_info_extractor(self, ie_key):
356 Get an instance of an IE with name ie_key, it will try to get one from
357 the _ies list, if there's no instance it will create a new one and add
358 it to the extractor list.
360 ie = self._ies_instances.get(ie_key)
362 ie = get_info_extractor(ie_key)()
363 self.add_info_extractor(ie)
366 def add_default_info_extractors(self):
368 Add the InfoExtractors returned by gen_extractors to the end of the list
370 for ie in gen_extractors():
371 self.add_info_extractor(ie)
373 def add_post_processor(self, pp):
374 """Add a PostProcessor object to the end of the chain."""
376 pp.set_downloader(self)
378 def add_progress_hook(self, ph):
379 """Add the progress hook (currently only for the file downloader)"""
380 self._progress_hooks.append(ph)
382 def _bidi_workaround(self, message):
383 if not hasattr(self, '_output_channel'):
386 assert hasattr(self, '_output_process')
387 assert isinstance(message, compat_str)
388 line_count = message.count('\n') + 1
389 self._output_process.stdin.write((message + '\n').encode('utf-8'))
390 self._output_process.stdin.flush()
391 res = ''.join(self._output_channel.readline().decode('utf-8')
392 for _ in range(line_count))
393 return res[:-len('\n')]
395 def to_screen(self, message, skip_eol=False):
396 """Print message to stdout if not in quiet mode."""
397 return self.to_stdout(message, skip_eol, check_quiet=True)
399 def _write_string(self, s, out=None):
400 write_string(s, out=out, encoding=self.params.get('encoding'))
402 def to_stdout(self, message, skip_eol=False, check_quiet=False):
403 """Print message to stdout if not in quiet mode."""
404 if self.params.get('logger'):
405 self.params['logger'].debug(message)
406 elif not check_quiet or not self.params.get('quiet', False):
407 message = self._bidi_workaround(message)
408 terminator = ['\n', ''][skip_eol]
409 output = message + terminator
411 self._write_string(output, self._screen_file)
413 def to_stderr(self, message):
414 """Print message to stderr."""
415 assert isinstance(message, compat_str)
416 if self.params.get('logger'):
417 self.params['logger'].error(message)
419 message = self._bidi_workaround(message)
420 output = message + '\n'
421 self._write_string(output, self._err_file)
423 def to_console_title(self, message):
424 if not self.params.get('consoletitle', False):
426 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
427 # c_wchar_p() might not be necessary if `message` is
428 # already of type unicode()
429 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
430 elif 'TERM' in os.environ:
431 self._write_string('\033]0;%s\007' % message, self._screen_file)
433 def save_console_title(self):
434 if not self.params.get('consoletitle', False):
436 if 'TERM' in os.environ:
437 # Save the title on stack
438 self._write_string('\033[22;0t', self._screen_file)
440 def restore_console_title(self):
441 if not self.params.get('consoletitle', False):
443 if 'TERM' in os.environ:
444 # Restore the title from stack
445 self._write_string('\033[23;0t', self._screen_file)
448 self.save_console_title()
451 def __exit__(self, *args):
452 self.restore_console_title()
454 if self.params.get('cookiefile') is not None:
455 self.cookiejar.save()
457 def trouble(self, message=None, tb=None):
458 """Determine action to take when a download problem appears.
460 Depending on if the downloader has been configured to ignore
461 download errors or not, this method may throw an exception or
462 not when errors are found, after printing the message.
464 tb, if given, is additional traceback information.
466 if message is not None:
467 self.to_stderr(message)
468 if self.params.get('verbose'):
470 if sys.exc_info()[0]: # if .trouble has been called from an except block
472 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
473 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
474 tb += compat_str(traceback.format_exc())
476 tb_data = traceback.format_list(traceback.extract_stack())
477 tb = ''.join(tb_data)
479 if not self.params.get('ignoreerrors', False):
480 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
481 exc_info = sys.exc_info()[1].exc_info
483 exc_info = sys.exc_info()
484 raise DownloadError(message, exc_info)
485 self._download_retcode = 1
487 def report_warning(self, message):
489 Print the message to stderr, it will be prefixed with 'WARNING:'
490 If stderr is a tty file the 'WARNING:' will be colored
492 if self.params.get('logger') is not None:
493 self.params['logger'].warning(message)
495 if self.params.get('no_warnings'):
497 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
498 _msg_header = '\033[0;33mWARNING:\033[0m'
500 _msg_header = 'WARNING:'
501 warning_message = '%s %s' % (_msg_header, message)
502 self.to_stderr(warning_message)
504 def report_error(self, message, tb=None):
506 Do the same as trouble, but prefixes the message with 'ERROR:', colored
507 in red if stderr is a tty file.
509 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
510 _msg_header = '\033[0;31mERROR:\033[0m'
512 _msg_header = 'ERROR:'
513 error_message = '%s %s' % (_msg_header, message)
514 self.trouble(error_message, tb)
516 def report_file_already_downloaded(self, file_name):
517 """Report file has already been fully downloaded."""
519 self.to_screen('[download] %s has already been downloaded' % file_name)
520 except UnicodeEncodeError:
521 self.to_screen('[download] The file has already been downloaded')
523 def prepare_filename(self, info_dict):
524 """Generate the output filename."""
526 template_dict = dict(info_dict)
528 template_dict['epoch'] = int(time.time())
529 autonumber_size = self.params.get('autonumber_size')
530 if autonumber_size is None:
532 autonumber_templ = '%0' + str(autonumber_size) + 'd'
533 template_dict['autonumber'] = autonumber_templ % self._num_downloads
534 if template_dict.get('playlist_index') is not None:
535 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
536 if template_dict.get('resolution') is None:
537 if template_dict.get('width') and template_dict.get('height'):
538 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
539 elif template_dict.get('height'):
540 template_dict['resolution'] = '%sp' % template_dict['height']
541 elif template_dict.get('width'):
542 template_dict['resolution'] = '?x%d' % template_dict['width']
544 sanitize = lambda k, v: sanitize_filename(
546 restricted=self.params.get('restrictfilenames'),
548 template_dict = dict((k, sanitize(k, v))
549 for k, v in template_dict.items()
551 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
553 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
554 tmpl = compat_expanduser(outtmpl)
555 filename = tmpl % template_dict
556 # Temporary fix for #4787
557 # 'Treat' all problem characters by passing filename through preferredencoding
558 # to workaround encoding issues with subprocess on python2 @ Windows
559 if sys.version_info < (3, 0) and sys.platform == 'win32':
560 filename = encodeFilename(filename, True).decode(preferredencoding())
562 except ValueError as err:
563 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
566 def _match_entry(self, info_dict, incomplete):
567 """ Returns None iff the file should be downloaded """
569 video_title = info_dict.get('title', info_dict.get('id', 'video'))
570 if 'title' in info_dict:
571 # This can happen when we're just evaluating the playlist
572 title = info_dict['title']
573 matchtitle = self.params.get('matchtitle', False)
575 if not re.search(matchtitle, title, re.IGNORECASE):
576 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
577 rejecttitle = self.params.get('rejecttitle', False)
579 if re.search(rejecttitle, title, re.IGNORECASE):
580 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
581 date = info_dict.get('upload_date', None)
583 dateRange = self.params.get('daterange', DateRange())
584 if date not in dateRange:
585 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
586 view_count = info_dict.get('view_count', None)
587 if view_count is not None:
588 min_views = self.params.get('min_views')
589 if min_views is not None and view_count < min_views:
590 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
591 max_views = self.params.get('max_views')
592 if max_views is not None and view_count > max_views:
593 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
594 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
595 return 'Skipping "%s" because it is age restricted' % video_title
596 if self.in_download_archive(info_dict):
597 return '%s has already been recorded in archive' % video_title
600 match_filter = self.params.get('match_filter')
601 if match_filter is not None:
602 ret = match_filter(info_dict)
609 def add_extra_info(info_dict, extra_info):
610 '''Set the keys from extra_info in info dict if they are missing'''
611 for key, value in extra_info.items():
612 info_dict.setdefault(key, value)
614 def extract_info(self, url, download=True, ie_key=None, extra_info={},
617 Returns a list with a dictionary for each video we find.
618 If 'download', also downloads the videos.
619 extra_info is a dict containing the extra values to add to each result
623 ies = [self.get_info_extractor(ie_key)]
628 if not ie.suitable(url):
632 self.report_warning('The program functionality for this site has been marked as broken, '
633 'and will probably not work.')
636 ie_result = ie.extract(url)
637 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
639 if isinstance(ie_result, list):
640 # Backwards compatibility: old IE result format
642 '_type': 'compat_list',
643 'entries': ie_result,
645 self.add_default_extra_info(ie_result, ie, url)
647 return self.process_ie_result(ie_result, download, extra_info)
650 except ExtractorError as de: # An error we somewhat expected
651 self.report_error(compat_str(de), de.format_traceback())
653 except MaxDownloadsReached:
655 except Exception as e:
656 if self.params.get('ignoreerrors', False):
657 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
662 self.report_error('no suitable InfoExtractor for URL %s' % url)
664 def add_default_extra_info(self, ie_result, ie, url):
665 self.add_extra_info(ie_result, {
666 'extractor': ie.IE_NAME,
668 'webpage_url_basename': url_basename(url),
669 'extractor_key': ie.ie_key(),
672 def process_ie_result(self, ie_result, download=True, extra_info={}):
674 Take the result of the ie(may be modified) and resolve all unresolved
675 references (URLs, playlist items).
677 It will also download the videos if 'download'.
678 Returns the resolved ie_result.
681 result_type = ie_result.get('_type', 'video')
683 if result_type in ('url', 'url_transparent'):
684 extract_flat = self.params.get('extract_flat', False)
685 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
686 extract_flat is True):
687 if self.params.get('forcejson', False):
688 self.to_stdout(json.dumps(ie_result))
691 if result_type == 'video':
692 self.add_extra_info(ie_result, extra_info)
693 return self.process_video_result(ie_result, download=download)
694 elif result_type == 'url':
695 # We have to add extra_info to the results because it may be
696 # contained in a playlist
697 return self.extract_info(ie_result['url'],
699 ie_key=ie_result.get('ie_key'),
700 extra_info=extra_info)
701 elif result_type == 'url_transparent':
702 # Use the information from the embedding page
703 info = self.extract_info(
704 ie_result['url'], ie_key=ie_result.get('ie_key'),
705 extra_info=extra_info, download=False, process=False)
707 force_properties = dict(
708 (k, v) for k, v in ie_result.items() if v is not None)
709 for f in ('_type', 'url'):
710 if f in force_properties:
711 del force_properties[f]
712 new_result = info.copy()
713 new_result.update(force_properties)
715 assert new_result.get('_type') != 'url_transparent'
717 return self.process_ie_result(
718 new_result, download=download, extra_info=extra_info)
719 elif result_type == 'playlist' or result_type == 'multi_video':
720 # We process each entry in the playlist
721 playlist = ie_result.get('title', None) or ie_result.get('id', None)
722 self.to_screen('[download] Downloading playlist: %s' % playlist)
724 playlist_results = []
726 playliststart = self.params.get('playliststart', 1) - 1
727 playlistend = self.params.get('playlistend', None)
728 # For backwards compatibility, interpret -1 as whole list
729 if playlistend == -1:
732 playlistitems_str = self.params.get('playlist_items', None)
734 if playlistitems_str is not None:
735 def iter_playlistitems(format):
736 for string_segment in format.split(','):
737 if '-' in string_segment:
738 start, end = string_segment.split('-')
739 for item in range(int(start), int(end) + 1):
742 yield int(string_segment)
743 playlistitems = iter_playlistitems(playlistitems_str)
745 ie_entries = ie_result['entries']
746 if isinstance(ie_entries, list):
747 n_all_entries = len(ie_entries)
749 entries = [ie_entries[i - 1] for i in playlistitems]
751 entries = ie_entries[playliststart:playlistend]
752 n_entries = len(entries)
754 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
755 (ie_result['extractor'], playlist, n_all_entries, n_entries))
756 elif isinstance(ie_entries, PagedList):
759 for item in playlistitems:
760 entries.extend(ie_entries.getslice(
764 entries = ie_entries.getslice(
765 playliststart, playlistend)
766 n_entries = len(entries)
768 "[%s] playlist %s: Downloading %d videos" %
769 (ie_result['extractor'], playlist, n_entries))
772 entry_list = list(ie_entries)
773 entries = [entry_list[i - 1] for i in playlistitems]
775 entries = list(itertools.islice(
776 ie_entries, playliststart, playlistend))
777 n_entries = len(entries)
779 "[%s] playlist %s: Downloading %d videos" %
780 (ie_result['extractor'], playlist, n_entries))
782 if self.params.get('playlistreverse', False):
783 entries = entries[::-1]
785 for i, entry in enumerate(entries, 1):
786 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
788 'n_entries': n_entries,
789 'playlist': playlist,
790 'playlist_id': ie_result.get('id'),
791 'playlist_title': ie_result.get('title'),
792 'playlist_index': i + playliststart,
793 'extractor': ie_result['extractor'],
794 'webpage_url': ie_result['webpage_url'],
795 'webpage_url_basename': url_basename(ie_result['webpage_url']),
796 'extractor_key': ie_result['extractor_key'],
799 reason = self._match_entry(entry, incomplete=True)
800 if reason is not None:
801 self.to_screen('[download] ' + reason)
804 entry_result = self.process_ie_result(entry,
807 playlist_results.append(entry_result)
808 ie_result['entries'] = playlist_results
810 elif result_type == 'compat_list':
812 'Extractor %s returned a compat_list result. '
813 'It needs to be updated.' % ie_result.get('extractor'))
819 'extractor': ie_result['extractor'],
820 'webpage_url': ie_result['webpage_url'],
821 'webpage_url_basename': url_basename(ie_result['webpage_url']),
822 'extractor_key': ie_result['extractor_key'],
826 ie_result['entries'] = [
827 self.process_ie_result(_fixup(r), download, extra_info)
828 for r in ie_result['entries']
832 raise Exception('Invalid result type: %s' % result_type)
834 def _apply_format_filter(self, format_spec, available_formats):
835 " Returns a tuple of the remaining format_spec and filtered formats "
845 operator_rex = re.compile(r'''(?x)\s*\[
846 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
847 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
848 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
850 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
851 m = operator_rex.search(format_spec)
854 comparison_value = int(m.group('value'))
856 comparison_value = parse_filesize(m.group('value'))
857 if comparison_value is None:
858 comparison_value = parse_filesize(m.group('value') + 'B')
859 if comparison_value is None:
861 'Invalid value %r in format specification %r' % (
862 m.group('value'), format_spec))
863 op = OPERATORS[m.group('op')]
870 str_operator_rex = re.compile(r'''(?x)\s*\[
871 \s*(?P<key>ext|acodec|vcodec|container|protocol)
872 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
873 \s*(?P<value>[a-zA-Z0-9_-]+)
875 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
876 m = str_operator_rex.search(format_spec)
878 comparison_value = m.group('value')
879 op = STR_OPERATORS[m.group('op')]
882 raise ValueError('Invalid format specification %r' % format_spec)
885 actual_value = f.get(m.group('key'))
886 if actual_value is None:
887 return m.group('none_inclusive')
888 return op(actual_value, comparison_value)
889 new_formats = [f for f in available_formats if _filter(f)]
891 new_format_spec = format_spec[:-len(m.group(0))]
892 if not new_format_spec:
893 new_format_spec = 'best'
895 return (new_format_spec, new_formats)
897 def select_format(self, format_spec, available_formats):
898 while format_spec.endswith(']'):
899 format_spec, available_formats = self._apply_format_filter(
900 format_spec, available_formats)
901 if not available_formats:
904 if format_spec == 'best' or format_spec is None:
905 return available_formats[-1]
906 elif format_spec == 'worst':
907 return available_formats[0]
908 elif format_spec == 'bestaudio':
910 f for f in available_formats
911 if f.get('vcodec') == 'none']
913 return audio_formats[-1]
914 elif format_spec == 'worstaudio':
916 f for f in available_formats
917 if f.get('vcodec') == 'none']
919 return audio_formats[0]
920 elif format_spec == 'bestvideo':
922 f for f in available_formats
923 if f.get('acodec') == 'none']
925 return video_formats[-1]
926 elif format_spec == 'worstvideo':
928 f for f in available_formats
929 if f.get('acodec') == 'none']
931 return video_formats[0]
933 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
934 if format_spec in extensions:
935 filter_f = lambda f: f['ext'] == format_spec
937 filter_f = lambda f: f['format_id'] == format_spec
938 matches = list(filter(filter_f, available_formats))
943 def _calc_headers(self, info_dict):
944 res = std_headers.copy()
946 add_headers = info_dict.get('http_headers')
948 res.update(add_headers)
950 cookies = self._calc_cookies(info_dict)
952 res['Cookie'] = cookies
956 def _calc_cookies(self, info_dict):
957 class _PseudoRequest(object):
958 def __init__(self, url):
961 self.unverifiable = False
963 def add_unredirected_header(self, k, v):
966 def get_full_url(self):
969 def is_unverifiable(self):
970 return self.unverifiable
972 def has_header(self, h):
973 return h in self.headers
975 def get_header(self, h, default=None):
976 return self.headers.get(h, default)
978 pr = _PseudoRequest(info_dict['url'])
979 self.cookiejar.add_cookie_header(pr)
980 return pr.headers.get('Cookie')
982 def process_video_result(self, info_dict, download=True):
983 assert info_dict.get('_type', 'video') == 'video'
985 if 'id' not in info_dict:
986 raise ExtractorError('Missing "id" field in extractor result')
987 if 'title' not in info_dict:
988 raise ExtractorError('Missing "title" field in extractor result')
990 if 'playlist' not in info_dict:
991 # It isn't part of a playlist
992 info_dict['playlist'] = None
993 info_dict['playlist_index'] = None
995 thumbnails = info_dict.get('thumbnails')
996 if thumbnails is None:
997 thumbnail = info_dict.get('thumbnail')
999 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1001 thumbnails.sort(key=lambda t: (
1002 t.get('preference'), t.get('width'), t.get('height'),
1003 t.get('id'), t.get('url')))
1004 for i, t in enumerate(thumbnails):
1005 if 'width' in t and 'height' in t:
1006 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1007 if t.get('id') is None:
1010 if thumbnails and 'thumbnail' not in info_dict:
1011 info_dict['thumbnail'] = thumbnails[-1]['url']
1013 if 'display_id' not in info_dict and 'id' in info_dict:
1014 info_dict['display_id'] = info_dict['id']
1016 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1017 # Working around negative timestamps in Windows
1018 # (see http://bugs.python.org/issue1646728)
1019 if info_dict['timestamp'] < 0 and os.name == 'nt':
1020 info_dict['timestamp'] = 0
1021 upload_date = datetime.datetime.utcfromtimestamp(
1022 info_dict['timestamp'])
1023 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1025 # This extractors handle format selection themselves
1026 if info_dict['extractor'] in ['Youku']:
1028 self.process_info(info_dict)
1031 # We now pick which formats have to be downloaded
1032 if info_dict.get('formats') is None:
1033 # There's only one format available
1034 formats = [info_dict]
1036 formats = info_dict['formats']
1039 raise ExtractorError('No video formats found!')
1041 # We check that all the formats have the format and format_id fields
1042 for i, format in enumerate(formats):
1043 if 'url' not in format:
1044 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1046 if format.get('format_id') is None:
1047 format['format_id'] = compat_str(i)
1048 if format.get('format') is None:
1049 format['format'] = '{id} - {res}{note}'.format(
1050 id=format['format_id'],
1051 res=self.format_resolution(format),
1052 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1054 # Automatically determine file extension if missing
1055 if 'ext' not in format:
1056 format['ext'] = determine_ext(format['url']).lower()
1057 # Add HTTP headers, so that external programs can use them from the
1059 full_format_info = info_dict.copy()
1060 full_format_info.update(format)
1061 format['http_headers'] = self._calc_headers(full_format_info)
1063 format_limit = self.params.get('format_limit', None)
1065 formats = list(takewhile_inclusive(
1066 lambda f: f['format_id'] != format_limit, formats
1069 # TODO Central sorting goes here
1071 if formats[0] is not info_dict:
1072 # only set the 'formats' fields if the original info_dict list them
1073 # otherwise we end up with a circular reference, the first (and unique)
1074 # element in the 'formats' field in info_dict is info_dict itself,
1075 # wich can't be exported to json
1076 info_dict['formats'] = formats
1077 if self.params.get('listformats'):
1078 self.list_formats(info_dict)
1080 if self.params.get('list_thumbnails'):
1081 self.list_thumbnails(info_dict)
1084 req_format = self.params.get('format')
1085 if req_format is None:
1087 formats_to_download = []
1088 # The -1 is for supporting YoutubeIE
1089 if req_format in ('-1', 'all'):
1090 formats_to_download = formats
1092 for rfstr in req_format.split(','):
1093 # We can accept formats requested in the format: 34/5/best, we pick
1094 # the first that is available, starting from left
1095 req_formats = rfstr.split('/')
1096 for rf in req_formats:
1097 if re.match(r'.+?\+.+?', rf) is not None:
1098 # Two formats have been requested like '137+139'
1099 format_1, format_2 = rf.split('+')
1100 formats_info = (self.select_format(format_1, formats),
1101 self.select_format(format_2, formats))
1102 if all(formats_info):
1103 # The first format must contain the video and the
1105 if formats_info[0].get('vcodec') == 'none':
1106 self.report_error('The first format must '
1107 'contain the video, try using '
1108 '"-f %s+%s"' % (format_2, format_1))
1111 formats_info[0]['ext']
1112 if self.params.get('merge_output_format') is None
1113 else self.params['merge_output_format'])
1115 'requested_formats': formats_info,
1116 'format': '%s+%s' % (formats_info[0].get('format'),
1117 formats_info[1].get('format')),
1118 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1119 formats_info[1].get('format_id')),
1120 'width': formats_info[0].get('width'),
1121 'height': formats_info[0].get('height'),
1122 'resolution': formats_info[0].get('resolution'),
1123 'fps': formats_info[0].get('fps'),
1124 'vcodec': formats_info[0].get('vcodec'),
1125 'vbr': formats_info[0].get('vbr'),
1126 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1127 'acodec': formats_info[1].get('acodec'),
1128 'abr': formats_info[1].get('abr'),
1132 selected_format = None
1134 selected_format = self.select_format(rf, formats)
1135 if selected_format is not None:
1136 formats_to_download.append(selected_format)
1138 if not formats_to_download:
1139 raise ExtractorError('requested format not available',
1143 if len(formats_to_download) > 1:
1144 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1145 for format in formats_to_download:
1146 new_info = dict(info_dict)
1147 new_info.update(format)
1148 self.process_info(new_info)
1149 # We update the info dict with the best quality format (backwards compatibility)
1150 info_dict.update(formats_to_download[-1])
1153 def process_info(self, info_dict):
1154 """Process a single resolved IE result."""
1156 assert info_dict.get('_type', 'video') == 'video'
1158 max_downloads = self.params.get('max_downloads')
1159 if max_downloads is not None:
1160 if self._num_downloads >= int(max_downloads):
1161 raise MaxDownloadsReached()
1163 info_dict['fulltitle'] = info_dict['title']
1164 if len(info_dict['title']) > 200:
1165 info_dict['title'] = info_dict['title'][:197] + '...'
1167 # Keep for backwards compatibility
1168 info_dict['stitle'] = info_dict['title']
1170 if 'format' not in info_dict:
1171 info_dict['format'] = info_dict['ext']
1173 reason = self._match_entry(info_dict, incomplete=False)
1174 if reason is not None:
1175 self.to_screen('[download] ' + reason)
1178 self._num_downloads += 1
1180 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1183 if self.params.get('forcetitle', False):
1184 self.to_stdout(info_dict['fulltitle'])
1185 if self.params.get('forceid', False):
1186 self.to_stdout(info_dict['id'])
1187 if self.params.get('forceurl', False):
1188 if info_dict.get('requested_formats') is not None:
1189 for f in info_dict['requested_formats']:
1190 self.to_stdout(f['url'] + f.get('play_path', ''))
1192 # For RTMP URLs, also include the playpath
1193 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1194 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1195 self.to_stdout(info_dict['thumbnail'])
1196 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1197 self.to_stdout(info_dict['description'])
1198 if self.params.get('forcefilename', False) and filename is not None:
1199 self.to_stdout(filename)
1200 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1201 self.to_stdout(formatSeconds(info_dict['duration']))
1202 if self.params.get('forceformat', False):
1203 self.to_stdout(info_dict['format'])
1204 if self.params.get('forcejson', False):
1205 self.to_stdout(json.dumps(info_dict))
1207 # Do nothing else if in simulate mode
1208 if self.params.get('simulate', False):
1211 if filename is None:
1215 dn = os.path.dirname(encodeFilename(filename))
1216 if dn and not os.path.exists(dn):
1218 except (OSError, IOError) as err:
1219 self.report_error('unable to create directory ' + compat_str(err))
1222 if self.params.get('writedescription', False):
1223 descfn = filename + '.description'
1224 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1225 self.to_screen('[info] Video description is already present')
1226 elif info_dict.get('description') is None:
1227 self.report_warning('There\'s no description to write.')
1230 self.to_screen('[info] Writing video description to: ' + descfn)
1231 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1232 descfile.write(info_dict['description'])
1233 except (OSError, IOError):
1234 self.report_error('Cannot write description file ' + descfn)
1237 if self.params.get('writeannotations', False):
1238 annofn = filename + '.annotations.xml'
1239 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1240 self.to_screen('[info] Video annotations are already present')
1243 self.to_screen('[info] Writing video annotations to: ' + annofn)
1244 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1245 annofile.write(info_dict['annotations'])
1246 except (KeyError, TypeError):
1247 self.report_warning('There are no annotations to write.')
1248 except (OSError, IOError):
1249 self.report_error('Cannot write annotations file: ' + annofn)
1252 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1253 self.params.get('writeautomaticsub')])
1255 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1256 # subtitles download errors are already managed as troubles in relevant IE
1257 # that way it will silently go on when used with unsupporting IE
1258 subtitles = info_dict['subtitles']
1259 sub_format = self.params.get('subtitlesformat', 'srt')
1260 for sub_lang in subtitles.keys():
1261 sub = subtitles[sub_lang]
1265 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1266 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1267 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1269 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1270 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1272 except (OSError, IOError):
1273 self.report_error('Cannot write subtitles file ' + sub_filename)
1276 if self.params.get('writeinfojson', False):
1277 infofn = os.path.splitext(filename)[0] + '.info.json'
1278 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1279 self.to_screen('[info] Video description metadata is already present')
1281 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1283 write_json_file(info_dict, infofn)
1284 except (OSError, IOError):
1285 self.report_error('Cannot write metadata to JSON file ' + infofn)
1288 self._write_thumbnails(info_dict, filename)
1290 if not self.params.get('skip_download', False):
1293 fd = get_suitable_downloader(info, self.params)(self, self.params)
1294 for ph in self._progress_hooks:
1295 fd.add_progress_hook(ph)
1296 if self.params.get('verbose'):
1297 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1298 return fd.download(name, info)
1300 if info_dict.get('requested_formats') is not None:
1303 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1304 if not merger.available:
1306 self.report_warning('You have requested multiple '
1307 'formats but ffmpeg or avconv are not installed.'
1308 ' The formats won\'t be merged')
1310 postprocessors = [merger]
1311 for f in info_dict['requested_formats']:
1312 new_info = dict(info_dict)
1314 fname = self.prepare_filename(new_info)
1315 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1316 downloaded.append(fname)
1317 partial_success = dl(fname, new_info)
1318 success = success and partial_success
1319 info_dict['__postprocessors'] = postprocessors
1320 info_dict['__files_to_merge'] = downloaded
1322 # Just a single file
1323 success = dl(filename, info_dict)
1324 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1325 self.report_error('unable to download video data: %s' % str(err))
1327 except (OSError, IOError) as err:
1328 raise UnavailableVideoError(err)
1329 except (ContentTooShortError, ) as err:
1330 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1335 fixup_policy = self.params.get('fixup')
1336 if fixup_policy is None:
1337 fixup_policy = 'detect_or_warn'
1339 stretched_ratio = info_dict.get('stretched_ratio')
1340 if stretched_ratio is not None and stretched_ratio != 1:
1341 if fixup_policy == 'warn':
1342 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1343 info_dict['id'], stretched_ratio))
1344 elif fixup_policy == 'detect_or_warn':
1345 stretched_pp = FFmpegFixupStretchedPP(self)
1346 if stretched_pp.available:
1347 info_dict.setdefault('__postprocessors', [])
1348 info_dict['__postprocessors'].append(stretched_pp)
1350 self.report_warning(
1351 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1352 info_dict['id'], stretched_ratio))
1354 assert fixup_policy in ('ignore', 'never')
1356 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1357 if fixup_policy == 'warn':
1358 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1360 elif fixup_policy == 'detect_or_warn':
1361 fixup_pp = FFmpegFixupM4aPP(self)
1362 if fixup_pp.available:
1363 info_dict.setdefault('__postprocessors', [])
1364 info_dict['__postprocessors'].append(fixup_pp)
1366 self.report_warning(
1367 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1370 assert fixup_policy in ('ignore', 'never')
1373 self.post_process(filename, info_dict)
1374 except (PostProcessingError) as err:
1375 self.report_error('postprocessing: %s' % str(err))
1377 self.record_download_archive(info_dict)
1379 def download(self, url_list):
1380 """Download a given list of URLs."""
1381 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1382 if (len(url_list) > 1 and
1384 and self.params.get('max_downloads') != 1):
1385 raise SameFileError(outtmpl)
1387 for url in url_list:
1389 # It also downloads the videos
1390 res = self.extract_info(url)
1391 except UnavailableVideoError:
1392 self.report_error('unable to download video')
1393 except MaxDownloadsReached:
1394 self.to_screen('[info] Maximum number of downloaded files reached.')
1397 if self.params.get('dump_single_json', False):
1398 self.to_stdout(json.dumps(res))
1400 return self._download_retcode
1402 def download_with_info_file(self, info_filename):
1403 with io.open(info_filename, 'r', encoding='utf-8') as f:
1406 self.process_ie_result(info, download=True)
1407 except DownloadError:
1408 webpage_url = info.get('webpage_url')
1409 if webpage_url is not None:
1410 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1411 return self.download([webpage_url])
1414 return self._download_retcode
1416 def post_process(self, filename, ie_info):
1417 """Run all the postprocessors on the given file."""
1418 info = dict(ie_info)
1419 info['filepath'] = filename
1421 if ie_info.get('__postprocessors') is not None:
1422 pps_chain.extend(ie_info['__postprocessors'])
1423 pps_chain.extend(self._pps)
1424 for pp in pps_chain:
1426 old_filename = info['filepath']
1428 keep_video_wish, info = pp.run(info)
1429 if keep_video_wish is not None:
1431 keep_video = keep_video_wish
1432 elif keep_video is None:
1433 # No clear decision yet, let IE decide
1434 keep_video = keep_video_wish
1435 except PostProcessingError as e:
1436 self.report_error(e.msg)
1437 if keep_video is False and not self.params.get('keepvideo', False):
1439 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1440 os.remove(encodeFilename(old_filename))
1441 except (IOError, OSError):
1442 self.report_warning('Unable to remove downloaded video file')
1444 def _make_archive_id(self, info_dict):
1445 # Future-proof against any change in case
1446 # and backwards compatibility with prior versions
1447 extractor = info_dict.get('extractor_key')
1448 if extractor is None:
1449 if 'id' in info_dict:
1450 extractor = info_dict.get('ie_key') # key in a playlist
1451 if extractor is None:
1452 return None # Incomplete video information
1453 return extractor.lower() + ' ' + info_dict['id']
1455 def in_download_archive(self, info_dict):
1456 fn = self.params.get('download_archive')
1460 vid_id = self._make_archive_id(info_dict)
1462 return False # Incomplete video information
1465 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1466 for line in archive_file:
1467 if line.strip() == vid_id:
1469 except IOError as ioe:
1470 if ioe.errno != errno.ENOENT:
1474 def record_download_archive(self, info_dict):
1475 fn = self.params.get('download_archive')
1478 vid_id = self._make_archive_id(info_dict)
1480 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1481 archive_file.write(vid_id + '\n')
1484 def format_resolution(format, default='unknown'):
1485 if format.get('vcodec') == 'none':
1487 if format.get('resolution') is not None:
1488 return format['resolution']
1489 if format.get('height') is not None:
1490 if format.get('width') is not None:
1491 res = '%sx%s' % (format['width'], format['height'])
1493 res = '%sp' % format['height']
1494 elif format.get('width') is not None:
1495 res = '?x%d' % format['width']
1500 def _format_note(self, fdict):
1502 if fdict.get('ext') in ['f4f', 'f4m']:
1503 res += '(unsupported) '
1504 if fdict.get('format_note') is not None:
1505 res += fdict['format_note'] + ' '
1506 if fdict.get('tbr') is not None:
1507 res += '%4dk ' % fdict['tbr']
1508 if fdict.get('container') is not None:
1511 res += '%s container' % fdict['container']
1512 if (fdict.get('vcodec') is not None and
1513 fdict.get('vcodec') != 'none'):
1516 res += fdict['vcodec']
1517 if fdict.get('vbr') is not None:
1519 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1521 if fdict.get('vbr') is not None:
1522 res += '%4dk' % fdict['vbr']
1523 if fdict.get('fps') is not None:
1524 res += ', %sfps' % fdict['fps']
1525 if fdict.get('acodec') is not None:
1528 if fdict['acodec'] == 'none':
1531 res += '%-5s' % fdict['acodec']
1532 elif fdict.get('abr') is not None:
1536 if fdict.get('abr') is not None:
1537 res += '@%3dk' % fdict['abr']
1538 if fdict.get('asr') is not None:
1539 res += ' (%5dHz)' % fdict['asr']
1540 if fdict.get('filesize') is not None:
1543 res += format_bytes(fdict['filesize'])
1544 elif fdict.get('filesize_approx') is not None:
1547 res += '~' + format_bytes(fdict['filesize_approx'])
1550 def list_formats(self, info_dict):
1551 def line(format, idlen=20):
1552 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1553 format['format_id'],
1555 self.format_resolution(format),
1556 self._format_note(format),
1559 formats = info_dict.get('formats', [info_dict])
1560 idlen = max(len('format code'),
1561 max(len(f['format_id']) for f in formats))
1563 line(f, idlen) for f in formats
1564 if f.get('preference') is None or f['preference'] >= -1000]
1565 if len(formats) > 1:
1566 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1568 header_line = line({
1569 'format_id': 'format code', 'ext': 'extension',
1570 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1572 '[info] Available formats for %s:\n%s\n%s' %
1573 (info_dict['id'], header_line, '\n'.join(formats_s)))
1575 def list_thumbnails(self, info_dict):
1576 thumbnails = info_dict.get('thumbnails')
1578 tn_url = info_dict.get('thumbnail')
1580 thumbnails = [{'id': '0', 'url': tn_url}]
1583 '[info] No thumbnails present for %s' % info_dict['id'])
1587 '[info] Thumbnails for %s:' % info_dict['id'])
1588 self.to_screen(render_table(
1589 ['ID', 'width', 'height', 'URL'],
1590 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1592 def urlopen(self, req):
1593 """ Start an HTTP download """
1595 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1596 # always respected by websites, some tend to give out URLs with non percent-encoded
1597 # non-ASCII characters (see telemb.py, ard.py [#3412])
1598 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1599 # To work around aforementioned issue we will replace request's original URL with
1600 # percent-encoded one
1601 req_is_string = isinstance(req, compat_basestring)
1602 url = req if req_is_string else req.get_full_url()
1603 url_escaped = escape_url(url)
1605 # Substitute URL if any change after escaping
1606 if url != url_escaped:
1610 req = compat_urllib_request.Request(
1611 url_escaped, data=req.data, headers=req.headers,
1612 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1614 return self._opener.open(req, timeout=self._socket_timeout)
1616 def print_debug_header(self):
1617 if not self.params.get('verbose'):
1620 if type('') is not compat_str:
1621 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1622 self.report_warning(
1623 'Your Python is broken! Update to a newer and supported version')
1625 stdout_encoding = getattr(
1626 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1628 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1629 locale.getpreferredencoding(),
1630 sys.getfilesystemencoding(),
1632 self.get_encoding()))
1633 write_string(encoding_str, encoding=None)
1635 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1637 sp = subprocess.Popen(
1638 ['git', 'rev-parse', '--short', 'HEAD'],
1639 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1640 cwd=os.path.dirname(os.path.abspath(__file__)))
1641 out, err = sp.communicate()
1642 out = out.decode().strip()
1643 if re.match('[0-9a-f]+', out):
1644 self._write_string('[debug] Git HEAD: ' + out + '\n')
1650 self._write_string('[debug] Python version %s - %s\n' % (
1651 platform.python_version(), platform_name()))
1653 exe_versions = FFmpegPostProcessor.get_versions(self)
1654 exe_versions['rtmpdump'] = rtmpdump_version()
1655 exe_str = ', '.join(
1657 for exe, v in sorted(exe_versions.items())
1662 self._write_string('[debug] exe versions: %s\n' % exe_str)
1665 for handler in self._opener.handlers:
1666 if hasattr(handler, 'proxies'):
1667 proxy_map.update(handler.proxies)
1668 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1670 if self.params.get('call_home', False):
1671 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1672 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1673 latest_version = self.urlopen(
1674 'https://yt-dl.org/latest/version').read().decode('utf-8')
1675 if version_tuple(latest_version) > version_tuple(__version__):
1676 self.report_warning(
1677 'You are using an outdated version (newest version: %s)! '
1678 'See https://yt-dl.org/update if you need help updating.' %
1681 def _setup_opener(self):
1682 timeout_val = self.params.get('socket_timeout')
1683 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1685 opts_cookiefile = self.params.get('cookiefile')
1686 opts_proxy = self.params.get('proxy')
1688 if opts_cookiefile is None:
1689 self.cookiejar = compat_cookiejar.CookieJar()
1691 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1693 if os.access(opts_cookiefile, os.R_OK):
1694 self.cookiejar.load()
1696 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1698 if opts_proxy is not None:
1699 if opts_proxy == '':
1702 proxies = {'http': opts_proxy, 'https': opts_proxy}
1704 proxies = compat_urllib_request.getproxies()
1705 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1706 if 'http' in proxies and 'https' not in proxies:
1707 proxies['https'] = proxies['http']
1708 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1710 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1711 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1712 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1713 opener = compat_urllib_request.build_opener(
1714 https_handler, proxy_handler, cookie_processor, ydlh)
1715 # Delete the default user-agent header, which would otherwise apply in
1716 # cases where our custom HTTP handler doesn't come into play
1717 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1718 opener.addheaders = []
1719 self._opener = opener
1721 def encode(self, s):
1722 if isinstance(s, bytes):
1723 return s # Already encoded
1726 return s.encode(self.get_encoding())
1727 except UnicodeEncodeError as err:
1728 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1731 def get_encoding(self):
1732 encoding = self.params.get('encoding')
1733 if encoding is None:
1734 encoding = preferredencoding()
1737 def _write_thumbnails(self, info_dict, filename):
1738 if self.params.get('writethumbnail', False):
1739 thumbnails = info_dict.get('thumbnails')
1741 thumbnails = [thumbnails[-1]]
1742 elif self.params.get('write_all_thumbnails', False):
1743 thumbnails = info_dict.get('thumbnails')
1748 # No thumbnails present, so return immediately
1751 for t in thumbnails:
1752 thumb_ext = determine_ext(t['url'], 'jpg')
1753 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1754 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1755 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1757 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1758 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1759 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1761 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1762 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1764 uf = self.urlopen(t['url'])
1765 with open(thumb_filename, 'wb') as thumbf:
1766 shutil.copyfileobj(uf, thumbf)
1767 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1768 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1769 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1770 self.report_warning('Unable to download thumbnail "%s": %s' %
1771 (t['url'], compat_str(err)))