2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
35 compat_urllib_request,
64 UnavailableVideoError,
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
80 FFmpegFixupStretchedPP,
85 from .version import __version__
88 class YoutubeDL(object):
91 YoutubeDL objects are the ones responsible of downloading the
92 actual video file and writing it to disk if the user has requested
93 it, among some other tasks. In most cases there should be one per
94 program. As, given a video URL, the downloader doesn't know how to
95 extract all the needed information, task that InfoExtractors do, it
96 has to pass the URL to one of them.
98 For this, YoutubeDL objects have a method that allows
99 InfoExtractors to be registered in a given order. When it is passed
100 a URL, the YoutubeDL object handles it to the first InfoExtractor it
101 finds that reports being able to handle it. The InfoExtractor extracts
102 all the information about the video or videos the URL refers to, and
103 YoutubeDL process the extracted information, possibly using a File
104 Downloader to download the video.
106 YoutubeDL objects accept a lot of parameters. In order not to saturate
107 the object constructor with arguments, it receives a dictionary of
108 options instead. These options are available through the params
109 attribute for the InfoExtractors to use. The YoutubeDL also
110 registers itself as the downloader in charge for the InfoExtractors
111 that are added to it, so this is a "mutual registration".
115 username: Username for authentication purposes.
116 password: Password for authentication purposes.
117 videopassword: Password for acces a video.
118 usenetrc: Use netrc for authentication instead.
119 verbose: Print additional info to stdout.
120 quiet: Do not print messages to stdout.
121 no_warnings: Do not print out anything for warnings.
122 forceurl: Force printing final URL.
123 forcetitle: Force printing title.
124 forceid: Force printing ID.
125 forcethumbnail: Force printing thumbnail URL.
126 forcedescription: Force printing description.
127 forcefilename: Force printing final filename.
128 forceduration: Force printing duration.
129 forcejson: Force printing info_dict as JSON.
130 dump_single_json: Force printing the info_dict of the whole playlist
131 (or video) as a single JSON line.
132 simulate: Do not download the video files.
133 format: Video format code. See options.py for more information.
134 format_limit: Highest quality format to try.
135 outtmpl: Template for output names.
136 restrictfilenames: Do not allow "&" and spaces in file names
137 ignoreerrors: Do not stop on download errors.
138 nooverwrites: Prevent overwriting files.
139 playliststart: Playlist item to start at.
140 playlistend: Playlist item to end at.
141 playlist_items: Specific indices of playlist to download.
142 playlistreverse: Download playlist items in reverse order.
143 matchtitle: Download only matching titles.
144 rejecttitle: Reject downloads for matching titles.
145 logger: Log messages to a logging.Logger instance.
146 logtostderr: Log messages to stderr instead of stdout.
147 writedescription: Write the video description to a .description file
148 writeinfojson: Write the video description to a .info.json file
149 writeannotations: Write the video annotations to a .annotations.xml file
150 writethumbnail: Write the thumbnail image to a file
151 write_all_thumbnails: Write all thumbnail formats to files
152 writesubtitles: Write the video subtitles to a file
153 writeautomaticsub: Write the automatic subtitles to a file
154 allsubtitles: Downloads all the subtitles of the video
155 (requires writesubtitles or writeautomaticsub)
156 listsubtitles: Lists all available subtitles for the video
157 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
158 subtitleslangs: List of languages of the subtitles to download
159 keepvideo: Keep the video file after post-processing
160 daterange: A DateRange object, download only if the upload_date is in the range.
161 skip_download: Skip the actual download of the video file
162 cachedir: Location of the cache files in the filesystem.
163 False to disable filesystem cache.
164 noplaylist: Download single video instead of a playlist if in doubt.
165 age_limit: An integer representing the user's age in years.
166 Unsuitable videos for the given age are skipped.
167 min_views: An integer representing the minimum view count the video
168 must have in order to not be skipped.
169 Videos without view count information are always
170 downloaded. None for no limit.
171 max_views: An integer representing the maximum view count.
172 Videos that are more popular than that are not
174 Videos without view count information are always
175 downloaded. None for no limit.
176 download_archive: File name of a file where all downloads are recorded.
177 Videos already present in the file are not downloaded
179 cookiefile: File name where cookies should be read from and dumped to.
180 nocheckcertificate:Do not verify SSL certificates
181 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
182 At the moment, this is only supported by YouTube.
183 proxy: URL of the proxy server to use
184 socket_timeout: Time to wait for unresponsive hosts, in seconds
185 bidi_workaround: Work around buggy terminals without bidirectional text
186 support, using fridibi
187 debug_printtraffic:Print out sent and received HTTP traffic
188 include_ads: Download ads as well
189 default_search: Prepend this string if an input url is not valid.
190 'auto' for elaborate guessing
191 encoding: Use this encoding instead of the system-specified.
192 extract_flat: Do not resolve URLs, return the immediate result.
193 Pass in 'in_playlist' to only show this behavior for
195 postprocessors: A list of dictionaries, each with an entry
196 * key: The name of the postprocessor. See
197 youtube_dl/postprocessor/__init__.py for a list.
198 as well as any further keyword arguments for the
200 progress_hooks: A list of functions that get called on download
201 progress, with a dictionary with the entries
202 * status: One of "downloading" and "finished".
203 Check this first and ignore unknown values.
205 If status is one of "downloading" or "finished", the
206 following properties may also be present:
207 * filename: The final filename (always present)
208 * downloaded_bytes: Bytes on disk
209 * total_bytes: Size of the whole file, None if unknown
210 * tmpfilename: The filename we're currently writing to
211 * eta: The estimated time in seconds, None if unknown
212 * speed: The download speed in bytes/second, None if
215 Progress hooks are guaranteed to be called at least once
216 (with status "finished") if the download is successful.
217 merge_output_format: Extension to use when merging formats.
218 fixup: Automatically correct known faults of the file.
220 - "never": do nothing
221 - "warn": only emit a warning
222 - "detect_or_warn": check whether we can do anything
223 about it, warn otherwise (default)
224 source_address: (Experimental) Client-side IP address to bind to.
225 call_home: Boolean, true iff we are allowed to contact the
226 youtube-dl servers for debugging.
227 sleep_interval: Number of seconds to sleep before each download.
228 listformats: Print an overview of available video formats and exit.
229 list_thumbnails: Print a table of all thumbnails and exit.
230 match_filter: A function that gets called with the info_dict of
232 If it returns a message, the video is ignored.
233 If it returns None, the video is downloaded.
234 match_filter_func in utils.py is one example for this.
235 no_color: Do not emit color codes in output.
237 The following options determine which downloader is picked:
238 external_downloader: Executable of the external downloader to call.
239 None or unset for standard (built-in) downloader.
240 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
242 The following parameters are not used by YoutubeDL itself, they are used by
244 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
245 noresizebuffer, retries, continuedl, noprogress, consoletitle,
248 The following options are used by the post processors:
249 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
250 otherwise prefer avconv.
251 exec_cmd: Arbitrary command to run after downloading
257 _download_retcode = None
258 _num_downloads = None
261 def __init__(self, params=None, auto_init=True):
262 """Create a FileDownloader object with the given options."""
266 self._ies_instances = {}
268 self._progress_hooks = []
269 self._download_retcode = 0
270 self._num_downloads = 0
271 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
272 self._err_file = sys.stderr
274 self.cache = Cache(self)
276 if params.get('bidi_workaround', False):
279 master, slave = pty.openpty()
280 width = get_term_width()
284 width_args = ['-w', str(width)]
286 stdin=subprocess.PIPE,
288 stderr=self._err_file)
290 self._output_process = subprocess.Popen(
291 ['bidiv'] + width_args, **sp_kwargs
294 self._output_process = subprocess.Popen(
295 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
296 self._output_channel = os.fdopen(master, 'rb')
297 except OSError as ose:
299 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
303 if (sys.version_info >= (3,) and sys.platform != 'win32' and
304 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
305 and not params.get('restrictfilenames', False)):
306 # On Python 3, the Unicode filesystem API will throw errors (#1474)
308 'Assuming --restrict-filenames since file system encoding '
309 'cannot encode all characters. '
310 'Set the LC_ALL environment variable to fix this.')
311 self.params['restrictfilenames'] = True
313 if '%(stitle)s' in self.params.get('outtmpl', ''):
314 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
319 self.print_debug_header()
320 self.add_default_info_extractors()
322 for pp_def_raw in self.params.get('postprocessors', []):
323 pp_class = get_postprocessor(pp_def_raw['key'])
324 pp_def = dict(pp_def_raw)
326 pp = pp_class(self, **compat_kwargs(pp_def))
327 self.add_post_processor(pp)
329 for ph in self.params.get('progress_hooks', []):
330 self.add_progress_hook(ph)
332 def warn_if_short_id(self, argv):
333 # short YouTube ID starting with dash?
335 i for i, a in enumerate(argv)
336 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
340 [a for i, a in enumerate(argv) if i not in idxs] +
341 ['--'] + [argv[i] for i in idxs]
344 'Long argument string detected. '
345 'Use -- to separate parameters and URLs, like this:\n%s\n' %
346 args_to_str(correct_argv))
348 def add_info_extractor(self, ie):
349 """Add an InfoExtractor object to the end of the list."""
351 self._ies_instances[ie.ie_key()] = ie
352 ie.set_downloader(self)
354 def get_info_extractor(self, ie_key):
356 Get an instance of an IE with name ie_key, it will try to get one from
357 the _ies list, if there's no instance it will create a new one and add
358 it to the extractor list.
360 ie = self._ies_instances.get(ie_key)
362 ie = get_info_extractor(ie_key)()
363 self.add_info_extractor(ie)
366 def add_default_info_extractors(self):
368 Add the InfoExtractors returned by gen_extractors to the end of the list
370 for ie in gen_extractors():
371 self.add_info_extractor(ie)
373 def add_post_processor(self, pp):
374 """Add a PostProcessor object to the end of the chain."""
376 pp.set_downloader(self)
378 def add_progress_hook(self, ph):
379 """Add the progress hook (currently only for the file downloader)"""
380 self._progress_hooks.append(ph)
382 def _bidi_workaround(self, message):
383 if not hasattr(self, '_output_channel'):
386 assert hasattr(self, '_output_process')
387 assert isinstance(message, compat_str)
388 line_count = message.count('\n') + 1
389 self._output_process.stdin.write((message + '\n').encode('utf-8'))
390 self._output_process.stdin.flush()
391 res = ''.join(self._output_channel.readline().decode('utf-8')
392 for _ in range(line_count))
393 return res[:-len('\n')]
395 def to_screen(self, message, skip_eol=False):
396 """Print message to stdout if not in quiet mode."""
397 return self.to_stdout(message, skip_eol, check_quiet=True)
399 def _write_string(self, s, out=None):
400 write_string(s, out=out, encoding=self.params.get('encoding'))
402 def to_stdout(self, message, skip_eol=False, check_quiet=False):
403 """Print message to stdout if not in quiet mode."""
404 if self.params.get('logger'):
405 self.params['logger'].debug(message)
406 elif not check_quiet or not self.params.get('quiet', False):
407 message = self._bidi_workaround(message)
408 terminator = ['\n', ''][skip_eol]
409 output = message + terminator
411 self._write_string(output, self._screen_file)
413 def to_stderr(self, message):
414 """Print message to stderr."""
415 assert isinstance(message, compat_str)
416 if self.params.get('logger'):
417 self.params['logger'].error(message)
419 message = self._bidi_workaround(message)
420 output = message + '\n'
421 self._write_string(output, self._err_file)
423 def to_console_title(self, message):
424 if not self.params.get('consoletitle', False):
426 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
427 # c_wchar_p() might not be necessary if `message` is
428 # already of type unicode()
429 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
430 elif 'TERM' in os.environ:
431 self._write_string('\033]0;%s\007' % message, self._screen_file)
433 def save_console_title(self):
434 if not self.params.get('consoletitle', False):
436 if 'TERM' in os.environ:
437 # Save the title on stack
438 self._write_string('\033[22;0t', self._screen_file)
440 def restore_console_title(self):
441 if not self.params.get('consoletitle', False):
443 if 'TERM' in os.environ:
444 # Restore the title from stack
445 self._write_string('\033[23;0t', self._screen_file)
448 self.save_console_title()
451 def __exit__(self, *args):
452 self.restore_console_title()
454 if self.params.get('cookiefile') is not None:
455 self.cookiejar.save()
457 def trouble(self, message=None, tb=None):
458 """Determine action to take when a download problem appears.
460 Depending on if the downloader has been configured to ignore
461 download errors or not, this method may throw an exception or
462 not when errors are found, after printing the message.
464 tb, if given, is additional traceback information.
466 if message is not None:
467 self.to_stderr(message)
468 if self.params.get('verbose'):
470 if sys.exc_info()[0]: # if .trouble has been called from an except block
472 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
473 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
474 tb += compat_str(traceback.format_exc())
476 tb_data = traceback.format_list(traceback.extract_stack())
477 tb = ''.join(tb_data)
479 if not self.params.get('ignoreerrors', False):
480 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
481 exc_info = sys.exc_info()[1].exc_info
483 exc_info = sys.exc_info()
484 raise DownloadError(message, exc_info)
485 self._download_retcode = 1
487 def report_warning(self, message):
489 Print the message to stderr, it will be prefixed with 'WARNING:'
490 If stderr is a tty file the 'WARNING:' will be colored
492 if self.params.get('logger') is not None:
493 self.params['logger'].warning(message)
495 if self.params.get('no_warnings'):
497 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
498 _msg_header = '\033[0;33mWARNING:\033[0m'
500 _msg_header = 'WARNING:'
501 warning_message = '%s %s' % (_msg_header, message)
502 self.to_stderr(warning_message)
504 def report_error(self, message, tb=None):
506 Do the same as trouble, but prefixes the message with 'ERROR:', colored
507 in red if stderr is a tty file.
509 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
510 _msg_header = '\033[0;31mERROR:\033[0m'
512 _msg_header = 'ERROR:'
513 error_message = '%s %s' % (_msg_header, message)
514 self.trouble(error_message, tb)
516 def report_file_already_downloaded(self, file_name):
517 """Report file has already been fully downloaded."""
519 self.to_screen('[download] %s has already been downloaded' % file_name)
520 except UnicodeEncodeError:
521 self.to_screen('[download] The file has already been downloaded')
523 def prepare_filename(self, info_dict):
524 """Generate the output filename."""
526 template_dict = dict(info_dict)
528 template_dict['epoch'] = int(time.time())
529 autonumber_size = self.params.get('autonumber_size')
530 if autonumber_size is None:
532 autonumber_templ = '%0' + str(autonumber_size) + 'd'
533 template_dict['autonumber'] = autonumber_templ % self._num_downloads
534 if template_dict.get('playlist_index') is not None:
535 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
536 if template_dict.get('resolution') is None:
537 if template_dict.get('width') and template_dict.get('height'):
538 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
539 elif template_dict.get('height'):
540 template_dict['resolution'] = '%sp' % template_dict['height']
541 elif template_dict.get('width'):
542 template_dict['resolution'] = '?x%d' % template_dict['width']
544 sanitize = lambda k, v: sanitize_filename(
546 restricted=self.params.get('restrictfilenames'),
548 template_dict = dict((k, sanitize(k, v))
549 for k, v in template_dict.items()
551 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
553 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
554 tmpl = compat_expanduser(outtmpl)
555 filename = tmpl % template_dict
556 # Temporary fix for #4787
557 # 'Treat' all problem characters by passing filename through preferredencoding
558 # to workaround encoding issues with subprocess on python2 @ Windows
559 if sys.version_info < (3, 0) and sys.platform == 'win32':
560 filename = encodeFilename(filename, True).decode(preferredencoding())
562 except ValueError as err:
563 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
566 def _match_entry(self, info_dict, incomplete):
567 """ Returns None iff the file should be downloaded """
569 video_title = info_dict.get('title', info_dict.get('id', 'video'))
570 if 'title' in info_dict:
571 # This can happen when we're just evaluating the playlist
572 title = info_dict['title']
573 matchtitle = self.params.get('matchtitle', False)
575 if not re.search(matchtitle, title, re.IGNORECASE):
576 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
577 rejecttitle = self.params.get('rejecttitle', False)
579 if re.search(rejecttitle, title, re.IGNORECASE):
580 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
581 date = info_dict.get('upload_date', None)
583 dateRange = self.params.get('daterange', DateRange())
584 if date not in dateRange:
585 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
586 view_count = info_dict.get('view_count', None)
587 if view_count is not None:
588 min_views = self.params.get('min_views')
589 if min_views is not None and view_count < min_views:
590 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
591 max_views = self.params.get('max_views')
592 if max_views is not None and view_count > max_views:
593 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
594 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
595 return 'Skipping "%s" because it is age restricted' % video_title
596 if self.in_download_archive(info_dict):
597 return '%s has already been recorded in archive' % video_title
600 match_filter = self.params.get('match_filter')
601 if match_filter is not None:
602 ret = match_filter(info_dict)
609 def add_extra_info(info_dict, extra_info):
610 '''Set the keys from extra_info in info dict if they are missing'''
611 for key, value in extra_info.items():
612 info_dict.setdefault(key, value)
614 def extract_info(self, url, download=True, ie_key=None, extra_info={},
617 Returns a list with a dictionary for each video we find.
618 If 'download', also downloads the videos.
619 extra_info is a dict containing the extra values to add to each result
623 ies = [self.get_info_extractor(ie_key)]
628 if not ie.suitable(url):
632 self.report_warning('The program functionality for this site has been marked as broken, '
633 'and will probably not work.')
636 ie_result = ie.extract(url)
637 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
639 if isinstance(ie_result, list):
640 # Backwards compatibility: old IE result format
642 '_type': 'compat_list',
643 'entries': ie_result,
645 self.add_default_extra_info(ie_result, ie, url)
647 return self.process_ie_result(ie_result, download, extra_info)
650 except ExtractorError as de: # An error we somewhat expected
651 self.report_error(compat_str(de), de.format_traceback())
653 except MaxDownloadsReached:
655 except Exception as e:
656 if self.params.get('ignoreerrors', False):
657 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
662 self.report_error('no suitable InfoExtractor for URL %s' % url)
664 def add_default_extra_info(self, ie_result, ie, url):
665 self.add_extra_info(ie_result, {
666 'extractor': ie.IE_NAME,
668 'webpage_url_basename': url_basename(url),
669 'extractor_key': ie.ie_key(),
672 def process_ie_result(self, ie_result, download=True, extra_info={}):
674 Take the result of the ie(may be modified) and resolve all unresolved
675 references (URLs, playlist items).
677 It will also download the videos if 'download'.
678 Returns the resolved ie_result.
681 result_type = ie_result.get('_type', 'video')
683 if result_type in ('url', 'url_transparent'):
684 extract_flat = self.params.get('extract_flat', False)
685 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
686 extract_flat is True):
687 if self.params.get('forcejson', False):
688 self.to_stdout(json.dumps(ie_result))
691 if result_type == 'video':
692 self.add_extra_info(ie_result, extra_info)
693 return self.process_video_result(ie_result, download=download)
694 elif result_type == 'url':
695 # We have to add extra_info to the results because it may be
696 # contained in a playlist
697 return self.extract_info(ie_result['url'],
699 ie_key=ie_result.get('ie_key'),
700 extra_info=extra_info)
701 elif result_type == 'url_transparent':
702 # Use the information from the embedding page
703 info = self.extract_info(
704 ie_result['url'], ie_key=ie_result.get('ie_key'),
705 extra_info=extra_info, download=False, process=False)
707 force_properties = dict(
708 (k, v) for k, v in ie_result.items() if v is not None)
709 for f in ('_type', 'url'):
710 if f in force_properties:
711 del force_properties[f]
712 new_result = info.copy()
713 new_result.update(force_properties)
715 assert new_result.get('_type') != 'url_transparent'
717 return self.process_ie_result(
718 new_result, download=download, extra_info=extra_info)
719 elif result_type == 'playlist' or result_type == 'multi_video':
720 # We process each entry in the playlist
721 playlist = ie_result.get('title', None) or ie_result.get('id', None)
722 self.to_screen('[download] Downloading playlist: %s' % playlist)
724 playlist_results = []
726 playliststart = self.params.get('playliststart', 1) - 1
727 playlistend = self.params.get('playlistend', None)
728 # For backwards compatibility, interpret -1 as whole list
729 if playlistend == -1:
732 playlistitems_str = self.params.get('playlist_items', None)
734 if playlistitems_str is not None:
735 def iter_playlistitems(format):
736 for string_segment in format.split(','):
737 if '-' in string_segment:
738 start, end = string_segment.split('-')
739 for item in range(int(start), int(end) + 1):
742 yield int(string_segment)
743 playlistitems = iter_playlistitems(playlistitems_str)
745 ie_entries = ie_result['entries']
746 if isinstance(ie_entries, list):
747 n_all_entries = len(ie_entries)
749 entries = [ie_entries[i - 1] for i in playlistitems]
751 entries = ie_entries[playliststart:playlistend]
752 n_entries = len(entries)
754 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
755 (ie_result['extractor'], playlist, n_all_entries, n_entries))
756 elif isinstance(ie_entries, PagedList):
759 for item in playlistitems:
760 entries.extend(ie_entries.getslice(
764 entries = ie_entries.getslice(
765 playliststart, playlistend)
766 n_entries = len(entries)
768 "[%s] playlist %s: Downloading %d videos" %
769 (ie_result['extractor'], playlist, n_entries))
772 entry_list = list(ie_entries)
773 entries = [entry_list[i - 1] for i in playlistitems]
775 entries = list(itertools.islice(
776 ie_entries, playliststart, playlistend))
777 n_entries = len(entries)
779 "[%s] playlist %s: Downloading %d videos" %
780 (ie_result['extractor'], playlist, n_entries))
782 if self.params.get('playlistreverse', False):
783 entries = entries[::-1]
785 for i, entry in enumerate(entries, 1):
786 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
788 'n_entries': n_entries,
789 'playlist': playlist,
790 'playlist_id': ie_result.get('id'),
791 'playlist_title': ie_result.get('title'),
792 'playlist_index': i + playliststart,
793 'extractor': ie_result['extractor'],
794 'webpage_url': ie_result['webpage_url'],
795 'webpage_url_basename': url_basename(ie_result['webpage_url']),
796 'extractor_key': ie_result['extractor_key'],
799 reason = self._match_entry(entry, incomplete=True)
800 if reason is not None:
801 self.to_screen('[download] ' + reason)
804 entry_result = self.process_ie_result(entry,
807 playlist_results.append(entry_result)
808 ie_result['entries'] = playlist_results
810 elif result_type == 'compat_list':
812 'Extractor %s returned a compat_list result. '
813 'It needs to be updated.' % ie_result.get('extractor'))
819 'extractor': ie_result['extractor'],
820 'webpage_url': ie_result['webpage_url'],
821 'webpage_url_basename': url_basename(ie_result['webpage_url']),
822 'extractor_key': ie_result['extractor_key'],
826 ie_result['entries'] = [
827 self.process_ie_result(_fixup(r), download, extra_info)
828 for r in ie_result['entries']
832 raise Exception('Invalid result type: %s' % result_type)
834 def _apply_format_filter(self, format_spec, available_formats):
835 " Returns a tuple of the remaining format_spec and filtered formats "
845 operator_rex = re.compile(r'''(?x)\s*\[
846 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
847 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
848 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
850 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
851 m = operator_rex.search(format_spec)
854 comparison_value = int(m.group('value'))
856 comparison_value = parse_filesize(m.group('value'))
857 if comparison_value is None:
858 comparison_value = parse_filesize(m.group('value') + 'B')
859 if comparison_value is None:
861 'Invalid value %r in format specification %r' % (
862 m.group('value'), format_spec))
863 op = OPERATORS[m.group('op')]
870 str_operator_rex = re.compile(r'''(?x)\s*\[
871 \s*(?P<key>ext|acodec|vcodec|container|protocol)
872 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
873 \s*(?P<value>[a-zA-Z0-9_-]+)
875 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
876 m = str_operator_rex.search(format_spec)
878 comparison_value = m.group('value')
879 op = STR_OPERATORS[m.group('op')]
882 raise ValueError('Invalid format specification %r' % format_spec)
885 actual_value = f.get(m.group('key'))
886 if actual_value is None:
887 return m.group('none_inclusive')
888 return op(actual_value, comparison_value)
889 new_formats = [f for f in available_formats if _filter(f)]
891 new_format_spec = format_spec[:-len(m.group(0))]
892 if not new_format_spec:
893 new_format_spec = 'best'
895 return (new_format_spec, new_formats)
897 def select_format(self, format_spec, available_formats):
898 while format_spec.endswith(']'):
899 format_spec, available_formats = self._apply_format_filter(
900 format_spec, available_formats)
901 if not available_formats:
904 if format_spec == 'best' or format_spec is None:
905 return available_formats[-1]
906 elif format_spec == 'worst':
907 return available_formats[0]
908 elif format_spec == 'bestaudio':
910 f for f in available_formats
911 if f.get('vcodec') == 'none']
913 return audio_formats[-1]
914 elif format_spec == 'worstaudio':
916 f for f in available_formats
917 if f.get('vcodec') == 'none']
919 return audio_formats[0]
920 elif format_spec == 'bestvideo':
922 f for f in available_formats
923 if f.get('acodec') == 'none']
925 return video_formats[-1]
926 elif format_spec == 'worstvideo':
928 f for f in available_formats
929 if f.get('acodec') == 'none']
931 return video_formats[0]
933 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
934 if format_spec in extensions:
935 filter_f = lambda f: f['ext'] == format_spec
937 filter_f = lambda f: f['format_id'] == format_spec
938 matches = list(filter(filter_f, available_formats))
943 def _calc_headers(self, info_dict):
944 res = std_headers.copy()
946 add_headers = info_dict.get('http_headers')
948 res.update(add_headers)
950 cookies = self._calc_cookies(info_dict)
952 res['Cookie'] = cookies
956 def _calc_cookies(self, info_dict):
957 pr = compat_urllib_request.Request(info_dict['url'])
958 self.cookiejar.add_cookie_header(pr)
959 return pr.get_header('Cookie')
961 def process_video_result(self, info_dict, download=True):
962 assert info_dict.get('_type', 'video') == 'video'
964 if 'id' not in info_dict:
965 raise ExtractorError('Missing "id" field in extractor result')
966 if 'title' not in info_dict:
967 raise ExtractorError('Missing "title" field in extractor result')
969 if 'playlist' not in info_dict:
970 # It isn't part of a playlist
971 info_dict['playlist'] = None
972 info_dict['playlist_index'] = None
974 thumbnails = info_dict.get('thumbnails')
975 if thumbnails is None:
976 thumbnail = info_dict.get('thumbnail')
978 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
980 thumbnails.sort(key=lambda t: (
981 t.get('preference'), t.get('width'), t.get('height'),
982 t.get('id'), t.get('url')))
983 for i, t in enumerate(thumbnails):
984 if 'width' in t and 'height' in t:
985 t['resolution'] = '%dx%d' % (t['width'], t['height'])
986 if t.get('id') is None:
989 if thumbnails and 'thumbnail' not in info_dict:
990 info_dict['thumbnail'] = thumbnails[-1]['url']
992 if 'display_id' not in info_dict and 'id' in info_dict:
993 info_dict['display_id'] = info_dict['id']
995 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
996 # Working around negative timestamps in Windows
997 # (see http://bugs.python.org/issue1646728)
998 if info_dict['timestamp'] < 0 and os.name == 'nt':
999 info_dict['timestamp'] = 0
1000 upload_date = datetime.datetime.utcfromtimestamp(
1001 info_dict['timestamp'])
1002 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1004 # This extractors handle format selection themselves
1005 if info_dict['extractor'] in ['Youku']:
1007 self.process_info(info_dict)
1010 # We now pick which formats have to be downloaded
1011 if info_dict.get('formats') is None:
1012 # There's only one format available
1013 formats = [info_dict]
1015 formats = info_dict['formats']
1018 raise ExtractorError('No video formats found!')
1020 # We check that all the formats have the format and format_id fields
1021 for i, format in enumerate(formats):
1022 if 'url' not in format:
1023 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1025 if format.get('format_id') is None:
1026 format['format_id'] = compat_str(i)
1027 if format.get('format') is None:
1028 format['format'] = '{id} - {res}{note}'.format(
1029 id=format['format_id'],
1030 res=self.format_resolution(format),
1031 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1033 # Automatically determine file extension if missing
1034 if 'ext' not in format:
1035 format['ext'] = determine_ext(format['url']).lower()
1036 # Add HTTP headers, so that external programs can use them from the
1038 full_format_info = info_dict.copy()
1039 full_format_info.update(format)
1040 format['http_headers'] = self._calc_headers(full_format_info)
1042 format_limit = self.params.get('format_limit', None)
1044 formats = list(takewhile_inclusive(
1045 lambda f: f['format_id'] != format_limit, formats
1048 # TODO Central sorting goes here
1050 if formats[0] is not info_dict:
1051 # only set the 'formats' fields if the original info_dict list them
1052 # otherwise we end up with a circular reference, the first (and unique)
1053 # element in the 'formats' field in info_dict is info_dict itself,
1054 # wich can't be exported to json
1055 info_dict['formats'] = formats
1056 if self.params.get('listformats'):
1057 self.list_formats(info_dict)
1059 if self.params.get('list_thumbnails'):
1060 self.list_thumbnails(info_dict)
1063 req_format = self.params.get('format')
1064 if req_format is None:
1066 formats_to_download = []
1067 # The -1 is for supporting YoutubeIE
1068 if req_format in ('-1', 'all'):
1069 formats_to_download = formats
1071 for rfstr in req_format.split(','):
1072 # We can accept formats requested in the format: 34/5/best, we pick
1073 # the first that is available, starting from left
1074 req_formats = rfstr.split('/')
1075 for rf in req_formats:
1076 if re.match(r'.+?\+.+?', rf) is not None:
1077 # Two formats have been requested like '137+139'
1078 format_1, format_2 = rf.split('+')
1079 formats_info = (self.select_format(format_1, formats),
1080 self.select_format(format_2, formats))
1081 if all(formats_info):
1082 # The first format must contain the video and the
1084 if formats_info[0].get('vcodec') == 'none':
1085 self.report_error('The first format must '
1086 'contain the video, try using '
1087 '"-f %s+%s"' % (format_2, format_1))
1090 formats_info[0]['ext']
1091 if self.params.get('merge_output_format') is None
1092 else self.params['merge_output_format'])
1094 'requested_formats': formats_info,
1095 'format': '%s+%s' % (formats_info[0].get('format'),
1096 formats_info[1].get('format')),
1097 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1098 formats_info[1].get('format_id')),
1099 'width': formats_info[0].get('width'),
1100 'height': formats_info[0].get('height'),
1101 'resolution': formats_info[0].get('resolution'),
1102 'fps': formats_info[0].get('fps'),
1103 'vcodec': formats_info[0].get('vcodec'),
1104 'vbr': formats_info[0].get('vbr'),
1105 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1106 'acodec': formats_info[1].get('acodec'),
1107 'abr': formats_info[1].get('abr'),
1111 selected_format = None
1113 selected_format = self.select_format(rf, formats)
1114 if selected_format is not None:
1115 formats_to_download.append(selected_format)
1117 if not formats_to_download:
1118 raise ExtractorError('requested format not available',
1122 if len(formats_to_download) > 1:
1123 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1124 for format in formats_to_download:
1125 new_info = dict(info_dict)
1126 new_info.update(format)
1127 self.process_info(new_info)
1128 # We update the info dict with the best quality format (backwards compatibility)
1129 info_dict.update(formats_to_download[-1])
1132 def process_info(self, info_dict):
1133 """Process a single resolved IE result."""
1135 assert info_dict.get('_type', 'video') == 'video'
1137 max_downloads = self.params.get('max_downloads')
1138 if max_downloads is not None:
1139 if self._num_downloads >= int(max_downloads):
1140 raise MaxDownloadsReached()
1142 info_dict['fulltitle'] = info_dict['title']
1143 if len(info_dict['title']) > 200:
1144 info_dict['title'] = info_dict['title'][:197] + '...'
1146 # Keep for backwards compatibility
1147 info_dict['stitle'] = info_dict['title']
1149 if 'format' not in info_dict:
1150 info_dict['format'] = info_dict['ext']
1152 reason = self._match_entry(info_dict, incomplete=False)
1153 if reason is not None:
1154 self.to_screen('[download] ' + reason)
1157 self._num_downloads += 1
1159 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1162 if self.params.get('forcetitle', False):
1163 self.to_stdout(info_dict['fulltitle'])
1164 if self.params.get('forceid', False):
1165 self.to_stdout(info_dict['id'])
1166 if self.params.get('forceurl', False):
1167 if info_dict.get('requested_formats') is not None:
1168 for f in info_dict['requested_formats']:
1169 self.to_stdout(f['url'] + f.get('play_path', ''))
1171 # For RTMP URLs, also include the playpath
1172 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1173 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1174 self.to_stdout(info_dict['thumbnail'])
1175 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1176 self.to_stdout(info_dict['description'])
1177 if self.params.get('forcefilename', False) and filename is not None:
1178 self.to_stdout(filename)
1179 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1180 self.to_stdout(formatSeconds(info_dict['duration']))
1181 if self.params.get('forceformat', False):
1182 self.to_stdout(info_dict['format'])
1183 if self.params.get('forcejson', False):
1184 self.to_stdout(json.dumps(info_dict))
1186 # Do nothing else if in simulate mode
1187 if self.params.get('simulate', False):
1190 if filename is None:
1194 dn = os.path.dirname(encodeFilename(filename))
1195 if dn and not os.path.exists(dn):
1197 except (OSError, IOError) as err:
1198 self.report_error('unable to create directory ' + compat_str(err))
1201 if self.params.get('writedescription', False):
1202 descfn = filename + '.description'
1203 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1204 self.to_screen('[info] Video description is already present')
1205 elif info_dict.get('description') is None:
1206 self.report_warning('There\'s no description to write.')
1209 self.to_screen('[info] Writing video description to: ' + descfn)
1210 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1211 descfile.write(info_dict['description'])
1212 except (OSError, IOError):
1213 self.report_error('Cannot write description file ' + descfn)
1216 if self.params.get('writeannotations', False):
1217 annofn = filename + '.annotations.xml'
1218 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1219 self.to_screen('[info] Video annotations are already present')
1222 self.to_screen('[info] Writing video annotations to: ' + annofn)
1223 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1224 annofile.write(info_dict['annotations'])
1225 except (KeyError, TypeError):
1226 self.report_warning('There are no annotations to write.')
1227 except (OSError, IOError):
1228 self.report_error('Cannot write annotations file: ' + annofn)
1231 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1232 self.params.get('writeautomaticsub')])
1234 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1235 # subtitles download errors are already managed as troubles in relevant IE
1236 # that way it will silently go on when used with unsupporting IE
1237 subtitles = info_dict['subtitles']
1238 sub_format = self.params.get('subtitlesformat', 'srt')
1239 for sub_lang in subtitles.keys():
1240 sub = subtitles[sub_lang]
1244 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1245 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1246 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1248 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1249 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1251 except (OSError, IOError):
1252 self.report_error('Cannot write subtitles file ' + sub_filename)
1255 if self.params.get('writeinfojson', False):
1256 infofn = os.path.splitext(filename)[0] + '.info.json'
1257 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1258 self.to_screen('[info] Video description metadata is already present')
1260 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1262 write_json_file(info_dict, infofn)
1263 except (OSError, IOError):
1264 self.report_error('Cannot write metadata to JSON file ' + infofn)
1267 self._write_thumbnails(info_dict, filename)
1269 if not self.params.get('skip_download', False):
1272 fd = get_suitable_downloader(info, self.params)(self, self.params)
1273 for ph in self._progress_hooks:
1274 fd.add_progress_hook(ph)
1275 if self.params.get('verbose'):
1276 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1277 return fd.download(name, info)
1279 if info_dict.get('requested_formats') is not None:
1282 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1283 if not merger.available:
1285 self.report_warning('You have requested multiple '
1286 'formats but ffmpeg or avconv are not installed.'
1287 ' The formats won\'t be merged')
1289 postprocessors = [merger]
1290 for f in info_dict['requested_formats']:
1291 new_info = dict(info_dict)
1293 fname = self.prepare_filename(new_info)
1294 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1295 downloaded.append(fname)
1296 partial_success = dl(fname, new_info)
1297 success = success and partial_success
1298 info_dict['__postprocessors'] = postprocessors
1299 info_dict['__files_to_merge'] = downloaded
1301 # Just a single file
1302 success = dl(filename, info_dict)
1303 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1304 self.report_error('unable to download video data: %s' % str(err))
1306 except (OSError, IOError) as err:
1307 raise UnavailableVideoError(err)
1308 except (ContentTooShortError, ) as err:
1309 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1314 fixup_policy = self.params.get('fixup')
1315 if fixup_policy is None:
1316 fixup_policy = 'detect_or_warn'
1318 stretched_ratio = info_dict.get('stretched_ratio')
1319 if stretched_ratio is not None and stretched_ratio != 1:
1320 if fixup_policy == 'warn':
1321 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1322 info_dict['id'], stretched_ratio))
1323 elif fixup_policy == 'detect_or_warn':
1324 stretched_pp = FFmpegFixupStretchedPP(self)
1325 if stretched_pp.available:
1326 info_dict.setdefault('__postprocessors', [])
1327 info_dict['__postprocessors'].append(stretched_pp)
1329 self.report_warning(
1330 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1331 info_dict['id'], stretched_ratio))
1333 assert fixup_policy in ('ignore', 'never')
1335 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1336 if fixup_policy == 'warn':
1337 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1339 elif fixup_policy == 'detect_or_warn':
1340 fixup_pp = FFmpegFixupM4aPP(self)
1341 if fixup_pp.available:
1342 info_dict.setdefault('__postprocessors', [])
1343 info_dict['__postprocessors'].append(fixup_pp)
1345 self.report_warning(
1346 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1349 assert fixup_policy in ('ignore', 'never')
1352 self.post_process(filename, info_dict)
1353 except (PostProcessingError) as err:
1354 self.report_error('postprocessing: %s' % str(err))
1356 self.record_download_archive(info_dict)
1358 def download(self, url_list):
1359 """Download a given list of URLs."""
1360 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1361 if (len(url_list) > 1 and
1363 and self.params.get('max_downloads') != 1):
1364 raise SameFileError(outtmpl)
1366 for url in url_list:
1368 # It also downloads the videos
1369 res = self.extract_info(url)
1370 except UnavailableVideoError:
1371 self.report_error('unable to download video')
1372 except MaxDownloadsReached:
1373 self.to_screen('[info] Maximum number of downloaded files reached.')
1376 if self.params.get('dump_single_json', False):
1377 self.to_stdout(json.dumps(res))
1379 return self._download_retcode
1381 def download_with_info_file(self, info_filename):
1382 with io.open(info_filename, 'r', encoding='utf-8') as f:
1385 self.process_ie_result(info, download=True)
1386 except DownloadError:
1387 webpage_url = info.get('webpage_url')
1388 if webpage_url is not None:
1389 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1390 return self.download([webpage_url])
1393 return self._download_retcode
1395 def post_process(self, filename, ie_info):
1396 """Run all the postprocessors on the given file."""
1397 info = dict(ie_info)
1398 info['filepath'] = filename
1400 if ie_info.get('__postprocessors') is not None:
1401 pps_chain.extend(ie_info['__postprocessors'])
1402 pps_chain.extend(self._pps)
1403 for pp in pps_chain:
1405 old_filename = info['filepath']
1407 keep_video_wish, info = pp.run(info)
1408 if keep_video_wish is not None:
1410 keep_video = keep_video_wish
1411 elif keep_video is None:
1412 # No clear decision yet, let IE decide
1413 keep_video = keep_video_wish
1414 except PostProcessingError as e:
1415 self.report_error(e.msg)
1416 if keep_video is False and not self.params.get('keepvideo', False):
1418 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1419 os.remove(encodeFilename(old_filename))
1420 except (IOError, OSError):
1421 self.report_warning('Unable to remove downloaded video file')
1423 def _make_archive_id(self, info_dict):
1424 # Future-proof against any change in case
1425 # and backwards compatibility with prior versions
1426 extractor = info_dict.get('extractor_key')
1427 if extractor is None:
1428 if 'id' in info_dict:
1429 extractor = info_dict.get('ie_key') # key in a playlist
1430 if extractor is None:
1431 return None # Incomplete video information
1432 return extractor.lower() + ' ' + info_dict['id']
1434 def in_download_archive(self, info_dict):
1435 fn = self.params.get('download_archive')
1439 vid_id = self._make_archive_id(info_dict)
1441 return False # Incomplete video information
1444 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1445 for line in archive_file:
1446 if line.strip() == vid_id:
1448 except IOError as ioe:
1449 if ioe.errno != errno.ENOENT:
1453 def record_download_archive(self, info_dict):
1454 fn = self.params.get('download_archive')
1457 vid_id = self._make_archive_id(info_dict)
1459 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1460 archive_file.write(vid_id + '\n')
1463 def format_resolution(format, default='unknown'):
1464 if format.get('vcodec') == 'none':
1466 if format.get('resolution') is not None:
1467 return format['resolution']
1468 if format.get('height') is not None:
1469 if format.get('width') is not None:
1470 res = '%sx%s' % (format['width'], format['height'])
1472 res = '%sp' % format['height']
1473 elif format.get('width') is not None:
1474 res = '?x%d' % format['width']
1479 def _format_note(self, fdict):
1481 if fdict.get('ext') in ['f4f', 'f4m']:
1482 res += '(unsupported) '
1483 if fdict.get('format_note') is not None:
1484 res += fdict['format_note'] + ' '
1485 if fdict.get('tbr') is not None:
1486 res += '%4dk ' % fdict['tbr']
1487 if fdict.get('container') is not None:
1490 res += '%s container' % fdict['container']
1491 if (fdict.get('vcodec') is not None and
1492 fdict.get('vcodec') != 'none'):
1495 res += fdict['vcodec']
1496 if fdict.get('vbr') is not None:
1498 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1500 if fdict.get('vbr') is not None:
1501 res += '%4dk' % fdict['vbr']
1502 if fdict.get('fps') is not None:
1503 res += ', %sfps' % fdict['fps']
1504 if fdict.get('acodec') is not None:
1507 if fdict['acodec'] == 'none':
1510 res += '%-5s' % fdict['acodec']
1511 elif fdict.get('abr') is not None:
1515 if fdict.get('abr') is not None:
1516 res += '@%3dk' % fdict['abr']
1517 if fdict.get('asr') is not None:
1518 res += ' (%5dHz)' % fdict['asr']
1519 if fdict.get('filesize') is not None:
1522 res += format_bytes(fdict['filesize'])
1523 elif fdict.get('filesize_approx') is not None:
1526 res += '~' + format_bytes(fdict['filesize_approx'])
1529 def list_formats(self, info_dict):
1530 def line(format, idlen=20):
1531 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1532 format['format_id'],
1534 self.format_resolution(format),
1535 self._format_note(format),
1538 formats = info_dict.get('formats', [info_dict])
1539 idlen = max(len('format code'),
1540 max(len(f['format_id']) for f in formats))
1542 line(f, idlen) for f in formats
1543 if f.get('preference') is None or f['preference'] >= -1000]
1544 if len(formats) > 1:
1545 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1547 header_line = line({
1548 'format_id': 'format code', 'ext': 'extension',
1549 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1551 '[info] Available formats for %s:\n%s\n%s' %
1552 (info_dict['id'], header_line, '\n'.join(formats_s)))
1554 def list_thumbnails(self, info_dict):
1555 thumbnails = info_dict.get('thumbnails')
1557 tn_url = info_dict.get('thumbnail')
1559 thumbnails = [{'id': '0', 'url': tn_url}]
1562 '[info] No thumbnails present for %s' % info_dict['id'])
1566 '[info] Thumbnails for %s:' % info_dict['id'])
1567 self.to_screen(render_table(
1568 ['ID', 'width', 'height', 'URL'],
1569 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1571 def urlopen(self, req):
1572 """ Start an HTTP download """
1574 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1575 # always respected by websites, some tend to give out URLs with non percent-encoded
1576 # non-ASCII characters (see telemb.py, ard.py [#3412])
1577 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1578 # To work around aforementioned issue we will replace request's original URL with
1579 # percent-encoded one
1580 req_is_string = isinstance(req, compat_basestring)
1581 url = req if req_is_string else req.get_full_url()
1582 url_escaped = escape_url(url)
1584 # Substitute URL if any change after escaping
1585 if url != url_escaped:
1589 req = compat_urllib_request.Request(
1590 url_escaped, data=req.data, headers=req.headers,
1591 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1593 return self._opener.open(req, timeout=self._socket_timeout)
1595 def print_debug_header(self):
1596 if not self.params.get('verbose'):
1599 if type('') is not compat_str:
1600 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1601 self.report_warning(
1602 'Your Python is broken! Update to a newer and supported version')
1604 stdout_encoding = getattr(
1605 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1607 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1608 locale.getpreferredencoding(),
1609 sys.getfilesystemencoding(),
1611 self.get_encoding()))
1612 write_string(encoding_str, encoding=None)
1614 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1616 sp = subprocess.Popen(
1617 ['git', 'rev-parse', '--short', 'HEAD'],
1618 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1619 cwd=os.path.dirname(os.path.abspath(__file__)))
1620 out, err = sp.communicate()
1621 out = out.decode().strip()
1622 if re.match('[0-9a-f]+', out):
1623 self._write_string('[debug] Git HEAD: ' + out + '\n')
1629 self._write_string('[debug] Python version %s - %s\n' % (
1630 platform.python_version(), platform_name()))
1632 exe_versions = FFmpegPostProcessor.get_versions(self)
1633 exe_versions['rtmpdump'] = rtmpdump_version()
1634 exe_str = ', '.join(
1636 for exe, v in sorted(exe_versions.items())
1641 self._write_string('[debug] exe versions: %s\n' % exe_str)
1644 for handler in self._opener.handlers:
1645 if hasattr(handler, 'proxies'):
1646 proxy_map.update(handler.proxies)
1647 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1649 if self.params.get('call_home', False):
1650 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1651 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1652 latest_version = self.urlopen(
1653 'https://yt-dl.org/latest/version').read().decode('utf-8')
1654 if version_tuple(latest_version) > version_tuple(__version__):
1655 self.report_warning(
1656 'You are using an outdated version (newest version: %s)! '
1657 'See https://yt-dl.org/update if you need help updating.' %
1660 def _setup_opener(self):
1661 timeout_val = self.params.get('socket_timeout')
1662 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1664 opts_cookiefile = self.params.get('cookiefile')
1665 opts_proxy = self.params.get('proxy')
1667 if opts_cookiefile is None:
1668 self.cookiejar = compat_cookiejar.CookieJar()
1670 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1672 if os.access(opts_cookiefile, os.R_OK):
1673 self.cookiejar.load()
1675 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1677 if opts_proxy is not None:
1678 if opts_proxy == '':
1681 proxies = {'http': opts_proxy, 'https': opts_proxy}
1683 proxies = compat_urllib_request.getproxies()
1684 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1685 if 'http' in proxies and 'https' not in proxies:
1686 proxies['https'] = proxies['http']
1687 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1689 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1690 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1691 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1692 opener = compat_urllib_request.build_opener(
1693 https_handler, proxy_handler, cookie_processor, ydlh)
1694 # Delete the default user-agent header, which would otherwise apply in
1695 # cases where our custom HTTP handler doesn't come into play
1696 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1697 opener.addheaders = []
1698 self._opener = opener
1700 def encode(self, s):
1701 if isinstance(s, bytes):
1702 return s # Already encoded
1705 return s.encode(self.get_encoding())
1706 except UnicodeEncodeError as err:
1707 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1710 def get_encoding(self):
1711 encoding = self.params.get('encoding')
1712 if encoding is None:
1713 encoding = preferredencoding()
1716 def _write_thumbnails(self, info_dict, filename):
1717 if self.params.get('writethumbnail', False):
1718 thumbnails = info_dict.get('thumbnails')
1720 thumbnails = [thumbnails[-1]]
1721 elif self.params.get('write_all_thumbnails', False):
1722 thumbnails = info_dict.get('thumbnails')
1727 # No thumbnails present, so return immediately
1730 for t in thumbnails:
1731 thumb_ext = determine_ext(t['url'], 'jpg')
1732 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1733 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1734 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1736 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1737 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1738 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1740 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1741 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1743 uf = self.urlopen(t['url'])
1744 with open(thumb_filename, 'wb') as thumbf:
1745 shutil.copyfileobj(uf, thumbf)
1746 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1747 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1748 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1749 self.report_warning('Unable to download thumbnail "%s": %s' %
1750 (t['url'], compat_str(err)))