2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
34 compat_urllib_request,
61 UnavailableVideoError,
71 from .cache import Cache
72 from .extractor import get_info_extractor, gen_extractors
73 from .downloader import get_suitable_downloader
74 from .downloader.rtmp import rtmpdump_version
75 from .postprocessor import (
76 FFmpegFixupStretchedPP,
81 from .version import __version__
84 class YoutubeDL(object):
87 YoutubeDL objects are the ones responsible of downloading the
88 actual video file and writing it to disk if the user has requested
89 it, among some other tasks. In most cases there should be one per
90 program. As, given a video URL, the downloader doesn't know how to
91 extract all the needed information, task that InfoExtractors do, it
92 has to pass the URL to one of them.
94 For this, YoutubeDL objects have a method that allows
95 InfoExtractors to be registered in a given order. When it is passed
96 a URL, the YoutubeDL object handles it to the first InfoExtractor it
97 finds that reports being able to handle it. The InfoExtractor extracts
98 all the information about the video or videos the URL refers to, and
99 YoutubeDL process the extracted information, possibly using a File
100 Downloader to download the video.
102 YoutubeDL objects accept a lot of parameters. In order not to saturate
103 the object constructor with arguments, it receives a dictionary of
104 options instead. These options are available through the params
105 attribute for the InfoExtractors to use. The YoutubeDL also
106 registers itself as the downloader in charge for the InfoExtractors
107 that are added to it, so this is a "mutual registration".
111 username: Username for authentication purposes.
112 password: Password for authentication purposes.
113 videopassword: Password for acces a video.
114 usenetrc: Use netrc for authentication instead.
115 verbose: Print additional info to stdout.
116 quiet: Do not print messages to stdout.
117 no_warnings: Do not print out anything for warnings.
118 forceurl: Force printing final URL.
119 forcetitle: Force printing title.
120 forceid: Force printing ID.
121 forcethumbnail: Force printing thumbnail URL.
122 forcedescription: Force printing description.
123 forcefilename: Force printing final filename.
124 forceduration: Force printing duration.
125 forcejson: Force printing info_dict as JSON.
126 dump_single_json: Force printing the info_dict of the whole playlist
127 (or video) as a single JSON line.
128 simulate: Do not download the video files.
129 format: Video format code. See options.py for more information.
130 format_limit: Highest quality format to try.
131 outtmpl: Template for output names.
132 restrictfilenames: Do not allow "&" and spaces in file names
133 ignoreerrors: Do not stop on download errors.
134 nooverwrites: Prevent overwriting files.
135 playliststart: Playlist item to start at.
136 playlistend: Playlist item to end at.
137 playlistreverse: Download playlist items in reverse order.
138 matchtitle: Download only matching titles.
139 rejecttitle: Reject downloads for matching titles.
140 logger: Log messages to a logging.Logger instance.
141 logtostderr: Log messages to stderr instead of stdout.
142 writedescription: Write the video description to a .description file
143 writeinfojson: Write the video description to a .info.json file
144 writeannotations: Write the video annotations to a .annotations.xml file
145 writethumbnail: Write the thumbnail image to a file
146 writesubtitles: Write the video subtitles to a file
147 writeautomaticsub: Write the automatic subtitles to a file
148 allsubtitles: Downloads all the subtitles of the video
149 (requires writesubtitles or writeautomaticsub)
150 listsubtitles: Lists all available subtitles for the video
151 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
152 subtitleslangs: List of languages of the subtitles to download
153 keepvideo: Keep the video file after post-processing
154 daterange: A DateRange object, download only if the upload_date is in the range.
155 skip_download: Skip the actual download of the video file
156 cachedir: Location of the cache files in the filesystem.
157 False to disable filesystem cache.
158 noplaylist: Download single video instead of a playlist if in doubt.
159 age_limit: An integer representing the user's age in years.
160 Unsuitable videos for the given age are skipped.
161 min_views: An integer representing the minimum view count the video
162 must have in order to not be skipped.
163 Videos without view count information are always
164 downloaded. None for no limit.
165 max_views: An integer representing the maximum view count.
166 Videos that are more popular than that are not
168 Videos without view count information are always
169 downloaded. None for no limit.
170 download_archive: File name of a file where all downloads are recorded.
171 Videos already present in the file are not downloaded
173 cookiefile: File name where cookies should be read from and dumped to.
174 nocheckcertificate:Do not verify SSL certificates
175 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
176 At the moment, this is only supported by YouTube.
177 proxy: URL of the proxy server to use
178 socket_timeout: Time to wait for unresponsive hosts, in seconds
179 bidi_workaround: Work around buggy terminals without bidirectional text
180 support, using fridibi
181 debug_printtraffic:Print out sent and received HTTP traffic
182 include_ads: Download ads as well
183 default_search: Prepend this string if an input url is not valid.
184 'auto' for elaborate guessing
185 encoding: Use this encoding instead of the system-specified.
186 extract_flat: Do not resolve URLs, return the immediate result.
187 Pass in 'in_playlist' to only show this behavior for
189 postprocessors: A list of dictionaries, each with an entry
190 * key: The name of the postprocessor. See
191 youtube_dl/postprocessor/__init__.py for a list.
192 as well as any further keyword arguments for the
194 progress_hooks: A list of functions that get called on download
195 progress, with a dictionary with the entries
196 * filename: The final filename
197 * status: One of "downloading" and "finished"
199 The dict may also have some of the following entries:
201 * downloaded_bytes: Bytes on disk
202 * total_bytes: Size of the whole file, None if unknown
203 * tmpfilename: The filename we're currently writing to
204 * eta: The estimated time in seconds, None if unknown
205 * speed: The download speed in bytes/second, None if
208 Progress hooks are guaranteed to be called at least once
209 (with status "finished") if the download is successful.
210 merge_output_format: Extension to use when merging formats.
211 fixup: Automatically correct known faults of the file.
213 - "never": do nothing
214 - "warn": only emit a warning
215 - "detect_or_warn": check whether we can do anything
216 about it, warn otherwise
217 source_address: (Experimental) Client-side IP address to bind to.
218 call_home: Boolean, true iff we are allowed to contact the
219 youtube-dl servers for debugging.
220 sleep_interval: Number of seconds to sleep before each download.
223 The following parameters are not used by YoutubeDL itself, they are used by
225 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
226 noresizebuffer, retries, continuedl, noprogress, consoletitle
228 The following options are used by the post processors:
229 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
230 otherwise prefer avconv.
231 exec_cmd: Arbitrary command to run after downloading
237 _download_retcode = None
238 _num_downloads = None
241 def __init__(self, params=None, auto_init=True):
242 """Create a FileDownloader object with the given options."""
246 self._ies_instances = {}
248 self._progress_hooks = []
249 self._download_retcode = 0
250 self._num_downloads = 0
251 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
252 self._err_file = sys.stderr
254 self.cache = Cache(self)
256 if params.get('bidi_workaround', False):
259 master, slave = pty.openpty()
260 width = get_term_width()
264 width_args = ['-w', str(width)]
266 stdin=subprocess.PIPE,
268 stderr=self._err_file)
270 self._output_process = subprocess.Popen(
271 ['bidiv'] + width_args, **sp_kwargs
274 self._output_process = subprocess.Popen(
275 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
276 self._output_channel = os.fdopen(master, 'rb')
277 except OSError as ose:
279 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
283 if (sys.version_info >= (3,) and sys.platform != 'win32' and
284 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
285 and not params.get('restrictfilenames', False)):
286 # On Python 3, the Unicode filesystem API will throw errors (#1474)
288 'Assuming --restrict-filenames since file system encoding '
289 'cannot encode all characters. '
290 'Set the LC_ALL environment variable to fix this.')
291 self.params['restrictfilenames'] = True
293 if '%(stitle)s' in self.params.get('outtmpl', ''):
294 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
299 self.print_debug_header()
300 self.add_default_info_extractors()
302 for pp_def_raw in self.params.get('postprocessors', []):
303 pp_class = get_postprocessor(pp_def_raw['key'])
304 pp_def = dict(pp_def_raw)
306 pp = pp_class(self, **compat_kwargs(pp_def))
307 self.add_post_processor(pp)
309 for ph in self.params.get('progress_hooks', []):
310 self.add_progress_hook(ph)
312 def warn_if_short_id(self, argv):
313 # short YouTube ID starting with dash?
315 i for i, a in enumerate(argv)
316 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
320 [a for i, a in enumerate(argv) if i not in idxs] +
321 ['--'] + [argv[i] for i in idxs]
324 'Long argument string detected. '
325 'Use -- to separate parameters and URLs, like this:\n%s\n' %
326 args_to_str(correct_argv))
328 def add_info_extractor(self, ie):
329 """Add an InfoExtractor object to the end of the list."""
331 self._ies_instances[ie.ie_key()] = ie
332 ie.set_downloader(self)
334 def get_info_extractor(self, ie_key):
336 Get an instance of an IE with name ie_key, it will try to get one from
337 the _ies list, if there's no instance it will create a new one and add
338 it to the extractor list.
340 ie = self._ies_instances.get(ie_key)
342 ie = get_info_extractor(ie_key)()
343 self.add_info_extractor(ie)
346 def add_default_info_extractors(self):
348 Add the InfoExtractors returned by gen_extractors to the end of the list
350 for ie in gen_extractors():
351 self.add_info_extractor(ie)
353 def add_post_processor(self, pp):
354 """Add a PostProcessor object to the end of the chain."""
356 pp.set_downloader(self)
358 def add_progress_hook(self, ph):
359 """Add the progress hook (currently only for the file downloader)"""
360 self._progress_hooks.append(ph)
362 def _bidi_workaround(self, message):
363 if not hasattr(self, '_output_channel'):
366 assert hasattr(self, '_output_process')
367 assert isinstance(message, compat_str)
368 line_count = message.count('\n') + 1
369 self._output_process.stdin.write((message + '\n').encode('utf-8'))
370 self._output_process.stdin.flush()
371 res = ''.join(self._output_channel.readline().decode('utf-8')
372 for _ in range(line_count))
373 return res[:-len('\n')]
375 def to_screen(self, message, skip_eol=False):
376 """Print message to stdout if not in quiet mode."""
377 return self.to_stdout(message, skip_eol, check_quiet=True)
379 def _write_string(self, s, out=None):
380 write_string(s, out=out, encoding=self.params.get('encoding'))
382 def to_stdout(self, message, skip_eol=False, check_quiet=False):
383 """Print message to stdout if not in quiet mode."""
384 if self.params.get('logger'):
385 self.params['logger'].debug(message)
386 elif not check_quiet or not self.params.get('quiet', False):
387 message = self._bidi_workaround(message)
388 terminator = ['\n', ''][skip_eol]
389 output = message + terminator
391 self._write_string(output, self._screen_file)
393 def to_stderr(self, message):
394 """Print message to stderr."""
395 assert isinstance(message, compat_str)
396 if self.params.get('logger'):
397 self.params['logger'].error(message)
399 message = self._bidi_workaround(message)
400 output = message + '\n'
401 self._write_string(output, self._err_file)
403 def to_console_title(self, message):
404 if not self.params.get('consoletitle', False):
406 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
407 # c_wchar_p() might not be necessary if `message` is
408 # already of type unicode()
409 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
410 elif 'TERM' in os.environ:
411 self._write_string('\033]0;%s\007' % message, self._screen_file)
413 def save_console_title(self):
414 if not self.params.get('consoletitle', False):
416 if 'TERM' in os.environ:
417 # Save the title on stack
418 self._write_string('\033[22;0t', self._screen_file)
420 def restore_console_title(self):
421 if not self.params.get('consoletitle', False):
423 if 'TERM' in os.environ:
424 # Restore the title from stack
425 self._write_string('\033[23;0t', self._screen_file)
428 self.save_console_title()
431 def __exit__(self, *args):
432 self.restore_console_title()
434 if self.params.get('cookiefile') is not None:
435 self.cookiejar.save()
437 def trouble(self, message=None, tb=None):
438 """Determine action to take when a download problem appears.
440 Depending on if the downloader has been configured to ignore
441 download errors or not, this method may throw an exception or
442 not when errors are found, after printing the message.
444 tb, if given, is additional traceback information.
446 if message is not None:
447 self.to_stderr(message)
448 if self.params.get('verbose'):
450 if sys.exc_info()[0]: # if .trouble has been called from an except block
452 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
453 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
454 tb += compat_str(traceback.format_exc())
456 tb_data = traceback.format_list(traceback.extract_stack())
457 tb = ''.join(tb_data)
459 if not self.params.get('ignoreerrors', False):
460 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
461 exc_info = sys.exc_info()[1].exc_info
463 exc_info = sys.exc_info()
464 raise DownloadError(message, exc_info)
465 self._download_retcode = 1
467 def report_warning(self, message):
469 Print the message to stderr, it will be prefixed with 'WARNING:'
470 If stderr is a tty file the 'WARNING:' will be colored
472 if self.params.get('logger') is not None:
473 self.params['logger'].warning(message)
475 if self.params.get('no_warnings'):
477 if self._err_file.isatty() and os.name != 'nt':
478 _msg_header = '\033[0;33mWARNING:\033[0m'
480 _msg_header = 'WARNING:'
481 warning_message = '%s %s' % (_msg_header, message)
482 self.to_stderr(warning_message)
484 def report_error(self, message, tb=None):
486 Do the same as trouble, but prefixes the message with 'ERROR:', colored
487 in red if stderr is a tty file.
489 if self._err_file.isatty() and os.name != 'nt':
490 _msg_header = '\033[0;31mERROR:\033[0m'
492 _msg_header = 'ERROR:'
493 error_message = '%s %s' % (_msg_header, message)
494 self.trouble(error_message, tb)
496 def report_file_already_downloaded(self, file_name):
497 """Report file has already been fully downloaded."""
499 self.to_screen('[download] %s has already been downloaded' % file_name)
500 except UnicodeEncodeError:
501 self.to_screen('[download] The file has already been downloaded')
503 def prepare_filename(self, info_dict):
504 """Generate the output filename."""
506 template_dict = dict(info_dict)
508 template_dict['epoch'] = int(time.time())
509 autonumber_size = self.params.get('autonumber_size')
510 if autonumber_size is None:
512 autonumber_templ = '%0' + str(autonumber_size) + 'd'
513 template_dict['autonumber'] = autonumber_templ % self._num_downloads
514 if template_dict.get('playlist_index') is not None:
515 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
516 if template_dict.get('resolution') is None:
517 if template_dict.get('width') and template_dict.get('height'):
518 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
519 elif template_dict.get('height'):
520 template_dict['resolution'] = '%sp' % template_dict['height']
521 elif template_dict.get('width'):
522 template_dict['resolution'] = '?x%d' % template_dict['width']
524 sanitize = lambda k, v: sanitize_filename(
526 restricted=self.params.get('restrictfilenames'),
528 template_dict = dict((k, sanitize(k, v))
529 for k, v in template_dict.items()
531 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
533 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
534 tmpl = compat_expanduser(outtmpl)
535 filename = tmpl % template_dict
537 except ValueError as err:
538 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
541 def _match_entry(self, info_dict):
542 """ Returns None iff the file should be downloaded """
544 video_title = info_dict.get('title', info_dict.get('id', 'video'))
545 if 'title' in info_dict:
546 # This can happen when we're just evaluating the playlist
547 title = info_dict['title']
548 matchtitle = self.params.get('matchtitle', False)
550 if not re.search(matchtitle, title, re.IGNORECASE):
551 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
552 rejecttitle = self.params.get('rejecttitle', False)
554 if re.search(rejecttitle, title, re.IGNORECASE):
555 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
556 date = info_dict.get('upload_date', None)
558 dateRange = self.params.get('daterange', DateRange())
559 if date not in dateRange:
560 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
561 view_count = info_dict.get('view_count', None)
562 if view_count is not None:
563 min_views = self.params.get('min_views')
564 if min_views is not None and view_count < min_views:
565 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
566 max_views = self.params.get('max_views')
567 if max_views is not None and view_count > max_views:
568 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
569 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
570 return 'Skipping "%s" because it is age restricted' % title
571 if self.in_download_archive(info_dict):
572 return '%s has already been recorded in archive' % video_title
576 def add_extra_info(info_dict, extra_info):
577 '''Set the keys from extra_info in info dict if they are missing'''
578 for key, value in extra_info.items():
579 info_dict.setdefault(key, value)
581 def extract_info(self, url, download=True, ie_key=None, extra_info={},
584 Returns a list with a dictionary for each video we find.
585 If 'download', also downloads the videos.
586 extra_info is a dict containing the extra values to add to each result
590 ies = [self.get_info_extractor(ie_key)]
595 if not ie.suitable(url):
599 self.report_warning('The program functionality for this site has been marked as broken, '
600 'and will probably not work.')
603 ie_result = ie.extract(url)
604 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
606 if isinstance(ie_result, list):
607 # Backwards compatibility: old IE result format
609 '_type': 'compat_list',
610 'entries': ie_result,
612 self.add_default_extra_info(ie_result, ie, url)
614 return self.process_ie_result(ie_result, download, extra_info)
617 except ExtractorError as de: # An error we somewhat expected
618 self.report_error(compat_str(de), de.format_traceback())
620 except MaxDownloadsReached:
622 except Exception as e:
623 if self.params.get('ignoreerrors', False):
624 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
629 self.report_error('no suitable InfoExtractor for URL %s' % url)
631 def add_default_extra_info(self, ie_result, ie, url):
632 self.add_extra_info(ie_result, {
633 'extractor': ie.IE_NAME,
635 'webpage_url_basename': url_basename(url),
636 'extractor_key': ie.ie_key(),
639 def process_ie_result(self, ie_result, download=True, extra_info={}):
641 Take the result of the ie(may be modified) and resolve all unresolved
642 references (URLs, playlist items).
644 It will also download the videos if 'download'.
645 Returns the resolved ie_result.
648 result_type = ie_result.get('_type', 'video')
650 if result_type in ('url', 'url_transparent'):
651 extract_flat = self.params.get('extract_flat', False)
652 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
653 extract_flat is True):
654 if self.params.get('forcejson', False):
655 self.to_stdout(json.dumps(ie_result))
658 if result_type == 'video':
659 self.add_extra_info(ie_result, extra_info)
660 return self.process_video_result(ie_result, download=download)
661 elif result_type == 'url':
662 # We have to add extra_info to the results because it may be
663 # contained in a playlist
664 return self.extract_info(ie_result['url'],
666 ie_key=ie_result.get('ie_key'),
667 extra_info=extra_info)
668 elif result_type == 'url_transparent':
669 # Use the information from the embedding page
670 info = self.extract_info(
671 ie_result['url'], ie_key=ie_result.get('ie_key'),
672 extra_info=extra_info, download=False, process=False)
674 force_properties = dict(
675 (k, v) for k, v in ie_result.items() if v is not None)
676 for f in ('_type', 'url'):
677 if f in force_properties:
678 del force_properties[f]
679 new_result = info.copy()
680 new_result.update(force_properties)
682 assert new_result.get('_type') != 'url_transparent'
684 return self.process_ie_result(
685 new_result, download=download, extra_info=extra_info)
686 elif result_type == 'playlist' or result_type == 'multi_video':
687 # We process each entry in the playlist
688 playlist = ie_result.get('title', None) or ie_result.get('id', None)
689 self.to_screen('[download] Downloading playlist: %s' % playlist)
691 playlist_results = []
693 playliststart = self.params.get('playliststart', 1) - 1
694 playlistend = self.params.get('playlistend', None)
695 # For backwards compatibility, interpret -1 as whole list
696 if playlistend == -1:
699 ie_entries = ie_result['entries']
700 if isinstance(ie_entries, list):
701 n_all_entries = len(ie_entries)
702 entries = ie_entries[playliststart:playlistend]
703 n_entries = len(entries)
705 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
706 (ie_result['extractor'], playlist, n_all_entries, n_entries))
707 elif isinstance(ie_entries, PagedList):
708 entries = ie_entries.getslice(
709 playliststart, playlistend)
710 n_entries = len(entries)
712 "[%s] playlist %s: Downloading %d videos" %
713 (ie_result['extractor'], playlist, n_entries))
715 entries = list(itertools.islice(
716 ie_entries, playliststart, playlistend))
717 n_entries = len(entries)
719 "[%s] playlist %s: Downloading %d videos" %
720 (ie_result['extractor'], playlist, n_entries))
722 if self.params.get('playlistreverse', False):
723 entries = entries[::-1]
725 for i, entry in enumerate(entries, 1):
726 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
728 'n_entries': n_entries,
729 'playlist': playlist,
730 'playlist_id': ie_result.get('id'),
731 'playlist_title': ie_result.get('title'),
732 'playlist_index': i + playliststart,
733 'extractor': ie_result['extractor'],
734 'webpage_url': ie_result['webpage_url'],
735 'webpage_url_basename': url_basename(ie_result['webpage_url']),
736 'extractor_key': ie_result['extractor_key'],
739 reason = self._match_entry(entry)
740 if reason is not None:
741 self.to_screen('[download] ' + reason)
744 entry_result = self.process_ie_result(entry,
747 playlist_results.append(entry_result)
748 ie_result['entries'] = playlist_results
750 elif result_type == 'compat_list':
752 'Extractor %s returned a compat_list result. '
753 'It needs to be updated.' % ie_result.get('extractor'))
759 'extractor': ie_result['extractor'],
760 'webpage_url': ie_result['webpage_url'],
761 'webpage_url_basename': url_basename(ie_result['webpage_url']),
762 'extractor_key': ie_result['extractor_key'],
766 ie_result['entries'] = [
767 self.process_ie_result(_fixup(r), download, extra_info)
768 for r in ie_result['entries']
772 raise Exception('Invalid result type: %s' % result_type)
774 def _apply_format_filter(self, format_spec, available_formats):
775 " Returns a tuple of the remaining format_spec and filtered formats "
785 operator_rex = re.compile(r'''(?x)\s*\[
786 (?P<key>width|height|tbr|abr|vbr|filesize)
787 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
788 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
790 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
791 m = operator_rex.search(format_spec)
793 raise ValueError('Invalid format specification %r' % format_spec)
796 comparison_value = int(m.group('value'))
798 comparison_value = parse_filesize(m.group('value'))
799 if comparison_value is None:
800 comparison_value = parse_filesize(m.group('value') + 'B')
801 if comparison_value is None:
803 'Invalid value %r in format specification %r' % (
804 m.group('value'), format_spec))
805 op = OPERATORS[m.group('op')]
808 actual_value = f.get(m.group('key'))
809 if actual_value is None:
810 return m.group('none_inclusive')
811 return op(actual_value, comparison_value)
812 new_formats = [f for f in available_formats if _filter(f)]
814 new_format_spec = format_spec[:-len(m.group(0))]
815 if not new_format_spec:
816 new_format_spec = 'best'
818 return (new_format_spec, new_formats)
820 def select_format(self, format_spec, available_formats):
821 while format_spec.endswith(']'):
822 format_spec, available_formats = self._apply_format_filter(
823 format_spec, available_formats)
824 if not available_formats:
827 if format_spec == 'best' or format_spec is None:
828 return available_formats[-1]
829 elif format_spec == 'worst':
830 return available_formats[0]
831 elif format_spec == 'bestaudio':
833 f for f in available_formats
834 if f.get('vcodec') == 'none']
836 return audio_formats[-1]
837 elif format_spec == 'worstaudio':
839 f for f in available_formats
840 if f.get('vcodec') == 'none']
842 return audio_formats[0]
843 elif format_spec == 'bestvideo':
845 f for f in available_formats
846 if f.get('acodec') == 'none']
848 return video_formats[-1]
849 elif format_spec == 'worstvideo':
851 f for f in available_formats
852 if f.get('acodec') == 'none']
854 return video_formats[0]
856 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
857 if format_spec in extensions:
858 filter_f = lambda f: f['ext'] == format_spec
860 filter_f = lambda f: f['format_id'] == format_spec
861 matches = list(filter(filter_f, available_formats))
866 def process_video_result(self, info_dict, download=True):
867 assert info_dict.get('_type', 'video') == 'video'
869 if 'id' not in info_dict:
870 raise ExtractorError('Missing "id" field in extractor result')
871 if 'title' not in info_dict:
872 raise ExtractorError('Missing "title" field in extractor result')
874 if 'playlist' not in info_dict:
875 # It isn't part of a playlist
876 info_dict['playlist'] = None
877 info_dict['playlist_index'] = None
879 thumbnails = info_dict.get('thumbnails')
881 thumbnails.sort(key=lambda t: (
882 t.get('width'), t.get('height'), t.get('url')))
884 if 'width' in t and 'height' in t:
885 t['resolution'] = '%dx%d' % (t['width'], t['height'])
887 if thumbnails and 'thumbnail' not in info_dict:
888 info_dict['thumbnail'] = thumbnails[-1]['url']
890 if 'display_id' not in info_dict and 'id' in info_dict:
891 info_dict['display_id'] = info_dict['id']
893 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
894 # Working around negative timestamps in Windows
895 # (see http://bugs.python.org/issue1646728)
896 if info_dict['timestamp'] < 0 and os.name == 'nt':
897 info_dict['timestamp'] = 0
898 upload_date = datetime.datetime.utcfromtimestamp(
899 info_dict['timestamp'])
900 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
902 # This extractors handle format selection themselves
903 if info_dict['extractor'] in ['Youku']:
905 self.process_info(info_dict)
908 # We now pick which formats have to be downloaded
909 if info_dict.get('formats') is None:
910 # There's only one format available
911 formats = [info_dict]
913 formats = info_dict['formats']
916 raise ExtractorError('No video formats found!')
918 # We check that all the formats have the format and format_id fields
919 for i, format in enumerate(formats):
920 if 'url' not in format:
921 raise ExtractorError('Missing "url" key in result (index %d)' % i)
923 if format.get('format_id') is None:
924 format['format_id'] = compat_str(i)
925 if format.get('format') is None:
926 format['format'] = '{id} - {res}{note}'.format(
927 id=format['format_id'],
928 res=self.format_resolution(format),
929 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
931 # Automatically determine file extension if missing
932 if 'ext' not in format:
933 format['ext'] = determine_ext(format['url']).lower()
935 format_limit = self.params.get('format_limit', None)
937 formats = list(takewhile_inclusive(
938 lambda f: f['format_id'] != format_limit, formats
941 # TODO Central sorting goes here
943 if formats[0] is not info_dict:
944 # only set the 'formats' fields if the original info_dict list them
945 # otherwise we end up with a circular reference, the first (and unique)
946 # element in the 'formats' field in info_dict is info_dict itself,
947 # wich can't be exported to json
948 info_dict['formats'] = formats
949 if self.params.get('listformats', None):
950 self.list_formats(info_dict)
953 req_format = self.params.get('format')
954 if req_format is None:
956 formats_to_download = []
957 # The -1 is for supporting YoutubeIE
958 if req_format in ('-1', 'all'):
959 formats_to_download = formats
961 for rfstr in req_format.split(','):
962 # We can accept formats requested in the format: 34/5/best, we pick
963 # the first that is available, starting from left
964 req_formats = rfstr.split('/')
965 for rf in req_formats:
966 if re.match(r'.+?\+.+?', rf) is not None:
967 # Two formats have been requested like '137+139'
968 format_1, format_2 = rf.split('+')
969 formats_info = (self.select_format(format_1, formats),
970 self.select_format(format_2, formats))
971 if all(formats_info):
972 # The first format must contain the video and the
974 if formats_info[0].get('vcodec') == 'none':
975 self.report_error('The first format must '
976 'contain the video, try using '
977 '"-f %s+%s"' % (format_2, format_1))
980 formats_info[0]['ext']
981 if self.params.get('merge_output_format') is None
982 else self.params['merge_output_format'])
984 'requested_formats': formats_info,
986 'ext': formats_info[0]['ext'],
987 'width': formats_info[0].get('width'),
988 'height': formats_info[0].get('height'),
989 'resolution': formats_info[0].get('resolution'),
990 'fps': formats_info[0].get('fps'),
991 'vcodec': formats_info[0].get('vcodec'),
992 'vbr': formats_info[0].get('vbr'),
993 'stretched_ratio': formats_info[0].get('stretched_ratio'),
994 'acodec': formats_info[1].get('acodec'),
995 'abr': formats_info[1].get('abr'),
999 selected_format = None
1001 selected_format = self.select_format(rf, formats)
1002 if selected_format is not None:
1003 formats_to_download.append(selected_format)
1005 if not formats_to_download:
1006 raise ExtractorError('requested format not available',
1010 if len(formats_to_download) > 1:
1011 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1012 for format in formats_to_download:
1013 new_info = dict(info_dict)
1014 new_info.update(format)
1015 self.process_info(new_info)
1016 # We update the info dict with the best quality format (backwards compatibility)
1017 info_dict.update(formats_to_download[-1])
1020 def process_info(self, info_dict):
1021 """Process a single resolved IE result."""
1023 assert info_dict.get('_type', 'video') == 'video'
1025 max_downloads = self.params.get('max_downloads')
1026 if max_downloads is not None:
1027 if self._num_downloads >= int(max_downloads):
1028 raise MaxDownloadsReached()
1030 info_dict['fulltitle'] = info_dict['title']
1031 if len(info_dict['title']) > 200:
1032 info_dict['title'] = info_dict['title'][:197] + '...'
1034 # Keep for backwards compatibility
1035 info_dict['stitle'] = info_dict['title']
1037 if 'format' not in info_dict:
1038 info_dict['format'] = info_dict['ext']
1040 reason = self._match_entry(info_dict)
1041 if reason is not None:
1042 self.to_screen('[download] ' + reason)
1045 self._num_downloads += 1
1047 filename = self.prepare_filename(info_dict)
1050 if self.params.get('forcetitle', False):
1051 self.to_stdout(info_dict['fulltitle'])
1052 if self.params.get('forceid', False):
1053 self.to_stdout(info_dict['id'])
1054 if self.params.get('forceurl', False):
1055 if info_dict.get('requested_formats') is not None:
1056 for f in info_dict['requested_formats']:
1057 self.to_stdout(f['url'] + f.get('play_path', ''))
1059 # For RTMP URLs, also include the playpath
1060 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1061 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1062 self.to_stdout(info_dict['thumbnail'])
1063 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1064 self.to_stdout(info_dict['description'])
1065 if self.params.get('forcefilename', False) and filename is not None:
1066 self.to_stdout(filename)
1067 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1068 self.to_stdout(formatSeconds(info_dict['duration']))
1069 if self.params.get('forceformat', False):
1070 self.to_stdout(info_dict['format'])
1071 if self.params.get('forcejson', False):
1072 info_dict['_filename'] = filename
1073 self.to_stdout(json.dumps(info_dict))
1074 if self.params.get('dump_single_json', False):
1075 info_dict['_filename'] = filename
1077 # Do nothing else if in simulate mode
1078 if self.params.get('simulate', False):
1081 if filename is None:
1085 dn = os.path.dirname(encodeFilename(filename))
1086 if dn and not os.path.exists(dn):
1088 except (OSError, IOError) as err:
1089 self.report_error('unable to create directory ' + compat_str(err))
1092 if self.params.get('writedescription', False):
1093 descfn = filename + '.description'
1094 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1095 self.to_screen('[info] Video description is already present')
1096 elif info_dict.get('description') is None:
1097 self.report_warning('There\'s no description to write.')
1100 self.to_screen('[info] Writing video description to: ' + descfn)
1101 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1102 descfile.write(info_dict['description'])
1103 except (OSError, IOError):
1104 self.report_error('Cannot write description file ' + descfn)
1107 if self.params.get('writeannotations', False):
1108 annofn = filename + '.annotations.xml'
1109 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1110 self.to_screen('[info] Video annotations are already present')
1113 self.to_screen('[info] Writing video annotations to: ' + annofn)
1114 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1115 annofile.write(info_dict['annotations'])
1116 except (KeyError, TypeError):
1117 self.report_warning('There are no annotations to write.')
1118 except (OSError, IOError):
1119 self.report_error('Cannot write annotations file: ' + annofn)
1122 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1123 self.params.get('writeautomaticsub')])
1125 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1126 # subtitles download errors are already managed as troubles in relevant IE
1127 # that way it will silently go on when used with unsupporting IE
1128 subtitles = info_dict['subtitles']
1129 sub_format = self.params.get('subtitlesformat', 'srt')
1130 for sub_lang in subtitles.keys():
1131 sub = subtitles[sub_lang]
1135 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1136 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1137 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1139 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1140 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1142 except (OSError, IOError):
1143 self.report_error('Cannot write subtitles file ' + sub_filename)
1146 if self.params.get('writeinfojson', False):
1147 infofn = os.path.splitext(filename)[0] + '.info.json'
1148 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1149 self.to_screen('[info] Video description metadata is already present')
1151 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1153 write_json_file(info_dict, infofn)
1154 except (OSError, IOError):
1155 self.report_error('Cannot write metadata to JSON file ' + infofn)
1158 if self.params.get('writethumbnail', False):
1159 if info_dict.get('thumbnail') is not None:
1160 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1161 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1162 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1163 self.to_screen('[%s] %s: Thumbnail is already present' %
1164 (info_dict['extractor'], info_dict['id']))
1166 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1167 (info_dict['extractor'], info_dict['id']))
1169 uf = self.urlopen(info_dict['thumbnail'])
1170 with open(thumb_filename, 'wb') as thumbf:
1171 shutil.copyfileobj(uf, thumbf)
1172 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1173 (info_dict['extractor'], info_dict['id'], thumb_filename))
1174 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1175 self.report_warning('Unable to download thumbnail "%s": %s' %
1176 (info_dict['thumbnail'], compat_str(err)))
1178 if not self.params.get('skip_download', False):
1181 fd = get_suitable_downloader(info)(self, self.params)
1182 for ph in self._progress_hooks:
1183 fd.add_progress_hook(ph)
1184 if self.params.get('verbose'):
1185 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1186 return fd.download(name, info)
1187 if info_dict.get('requested_formats') is not None:
1190 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1191 if not merger._executable:
1193 self.report_warning('You have requested multiple '
1194 'formats but ffmpeg or avconv are not installed.'
1195 ' The formats won\'t be merged')
1197 postprocessors = [merger]
1198 for f in info_dict['requested_formats']:
1199 new_info = dict(info_dict)
1201 fname = self.prepare_filename(new_info)
1202 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1203 downloaded.append(fname)
1204 partial_success = dl(fname, new_info)
1205 success = success and partial_success
1206 info_dict['__postprocessors'] = postprocessors
1207 info_dict['__files_to_merge'] = downloaded
1209 # Just a single file
1210 success = dl(filename, info_dict)
1211 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1212 self.report_error('unable to download video data: %s' % str(err))
1214 except (OSError, IOError) as err:
1215 raise UnavailableVideoError(err)
1216 except (ContentTooShortError, ) as err:
1217 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1222 stretched_ratio = info_dict.get('stretched_ratio')
1223 if stretched_ratio is not None and stretched_ratio != 1:
1224 fixup_policy = self.params.get('fixup')
1225 if fixup_policy is None:
1226 fixup_policy = 'detect_or_warn'
1227 if fixup_policy == 'warn':
1228 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1229 info_dict['id'], stretched_ratio))
1230 elif fixup_policy == 'detect_or_warn':
1231 stretched_pp = FFmpegFixupStretchedPP(self)
1232 if stretched_pp.available:
1233 info_dict.setdefault('__postprocessors', [])
1234 info_dict['__postprocessors'].append(stretched_pp)
1236 self.report_warning(
1237 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1238 info_dict['id'], stretched_ratio))
1240 assert fixup_policy == 'ignore'
1243 self.post_process(filename, info_dict)
1244 except (PostProcessingError) as err:
1245 self.report_error('postprocessing: %s' % str(err))
1247 self.record_download_archive(info_dict)
1249 def download(self, url_list):
1250 """Download a given list of URLs."""
1251 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1252 if (len(url_list) > 1 and
1254 and self.params.get('max_downloads') != 1):
1255 raise SameFileError(outtmpl)
1257 for url in url_list:
1259 # It also downloads the videos
1260 res = self.extract_info(url)
1261 except UnavailableVideoError:
1262 self.report_error('unable to download video')
1263 except MaxDownloadsReached:
1264 self.to_screen('[info] Maximum number of downloaded files reached.')
1267 if self.params.get('dump_single_json', False):
1268 self.to_stdout(json.dumps(res))
1270 return self._download_retcode
1272 def download_with_info_file(self, info_filename):
1273 with io.open(info_filename, 'r', encoding='utf-8') as f:
1276 self.process_ie_result(info, download=True)
1277 except DownloadError:
1278 webpage_url = info.get('webpage_url')
1279 if webpage_url is not None:
1280 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1281 return self.download([webpage_url])
1284 return self._download_retcode
1286 def post_process(self, filename, ie_info):
1287 """Run all the postprocessors on the given file."""
1288 info = dict(ie_info)
1289 info['filepath'] = filename
1291 if ie_info.get('__postprocessors') is not None:
1292 pps_chain.extend(ie_info['__postprocessors'])
1293 pps_chain.extend(self._pps)
1294 for pp in pps_chain:
1296 old_filename = info['filepath']
1298 keep_video_wish, info = pp.run(info)
1299 if keep_video_wish is not None:
1301 keep_video = keep_video_wish
1302 elif keep_video is None:
1303 # No clear decision yet, let IE decide
1304 keep_video = keep_video_wish
1305 except PostProcessingError as e:
1306 self.report_error(e.msg)
1307 if keep_video is False and not self.params.get('keepvideo', False):
1309 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1310 os.remove(encodeFilename(old_filename))
1311 except (IOError, OSError):
1312 self.report_warning('Unable to remove downloaded video file')
1314 def _make_archive_id(self, info_dict):
1315 # Future-proof against any change in case
1316 # and backwards compatibility with prior versions
1317 extractor = info_dict.get('extractor_key')
1318 if extractor is None:
1319 if 'id' in info_dict:
1320 extractor = info_dict.get('ie_key') # key in a playlist
1321 if extractor is None:
1322 return None # Incomplete video information
1323 return extractor.lower() + ' ' + info_dict['id']
1325 def in_download_archive(self, info_dict):
1326 fn = self.params.get('download_archive')
1330 vid_id = self._make_archive_id(info_dict)
1332 return False # Incomplete video information
1335 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1336 for line in archive_file:
1337 if line.strip() == vid_id:
1339 except IOError as ioe:
1340 if ioe.errno != errno.ENOENT:
1344 def record_download_archive(self, info_dict):
1345 fn = self.params.get('download_archive')
1348 vid_id = self._make_archive_id(info_dict)
1350 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1351 archive_file.write(vid_id + '\n')
1354 def format_resolution(format, default='unknown'):
1355 if format.get('vcodec') == 'none':
1357 if format.get('resolution') is not None:
1358 return format['resolution']
1359 if format.get('height') is not None:
1360 if format.get('width') is not None:
1361 res = '%sx%s' % (format['width'], format['height'])
1363 res = '%sp' % format['height']
1364 elif format.get('width') is not None:
1365 res = '?x%d' % format['width']
1370 def _format_note(self, fdict):
1372 if fdict.get('ext') in ['f4f', 'f4m']:
1373 res += '(unsupported) '
1374 if fdict.get('format_note') is not None:
1375 res += fdict['format_note'] + ' '
1376 if fdict.get('tbr') is not None:
1377 res += '%4dk ' % fdict['tbr']
1378 if fdict.get('container') is not None:
1381 res += '%s container' % fdict['container']
1382 if (fdict.get('vcodec') is not None and
1383 fdict.get('vcodec') != 'none'):
1386 res += fdict['vcodec']
1387 if fdict.get('vbr') is not None:
1389 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1391 if fdict.get('vbr') is not None:
1392 res += '%4dk' % fdict['vbr']
1393 if fdict.get('fps') is not None:
1394 res += ', %sfps' % fdict['fps']
1395 if fdict.get('acodec') is not None:
1398 if fdict['acodec'] == 'none':
1401 res += '%-5s' % fdict['acodec']
1402 elif fdict.get('abr') is not None:
1406 if fdict.get('abr') is not None:
1407 res += '@%3dk' % fdict['abr']
1408 if fdict.get('asr') is not None:
1409 res += ' (%5dHz)' % fdict['asr']
1410 if fdict.get('filesize') is not None:
1413 res += format_bytes(fdict['filesize'])
1414 elif fdict.get('filesize_approx') is not None:
1417 res += '~' + format_bytes(fdict['filesize_approx'])
1420 def list_formats(self, info_dict):
1421 def line(format, idlen=20):
1422 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1423 format['format_id'],
1425 self.format_resolution(format),
1426 self._format_note(format),
1429 formats = info_dict.get('formats', [info_dict])
1430 idlen = max(len('format code'),
1431 max(len(f['format_id']) for f in formats))
1433 line(f, idlen) for f in formats
1434 if f.get('preference') is None or f['preference'] >= -1000]
1435 if len(formats) > 1:
1436 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1437 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1439 header_line = line({
1440 'format_id': 'format code', 'ext': 'extension',
1441 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1442 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1443 (info_dict['id'], header_line, '\n'.join(formats_s)))
1445 def urlopen(self, req):
1446 """ Start an HTTP download """
1448 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1449 # always respected by websites, some tend to give out URLs with non percent-encoded
1450 # non-ASCII characters (see telemb.py, ard.py [#3412])
1451 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1452 # To work around aforementioned issue we will replace request's original URL with
1453 # percent-encoded one
1454 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1455 url = req if req_is_string else req.get_full_url()
1456 url_escaped = escape_url(url)
1458 # Substitute URL if any change after escaping
1459 if url != url_escaped:
1463 req = compat_urllib_request.Request(
1464 url_escaped, data=req.data, headers=req.headers,
1465 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1467 return self._opener.open(req, timeout=self._socket_timeout)
1469 def print_debug_header(self):
1470 if not self.params.get('verbose'):
1473 if type('') is not compat_str:
1474 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1475 self.report_warning(
1476 'Your Python is broken! Update to a newer and supported version')
1478 stdout_encoding = getattr(
1479 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1481 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1482 locale.getpreferredencoding(),
1483 sys.getfilesystemencoding(),
1485 self.get_encoding()))
1486 write_string(encoding_str, encoding=None)
1488 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1490 sp = subprocess.Popen(
1491 ['git', 'rev-parse', '--short', 'HEAD'],
1492 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1493 cwd=os.path.dirname(os.path.abspath(__file__)))
1494 out, err = sp.communicate()
1495 out = out.decode().strip()
1496 if re.match('[0-9a-f]+', out):
1497 self._write_string('[debug] Git HEAD: ' + out + '\n')
1503 self._write_string('[debug] Python version %s - %s\n' % (
1504 platform.python_version(), platform_name()))
1506 exe_versions = FFmpegPostProcessor.get_versions()
1507 exe_versions['rtmpdump'] = rtmpdump_version()
1508 exe_str = ', '.join(
1510 for exe, v in sorted(exe_versions.items())
1515 self._write_string('[debug] exe versions: %s\n' % exe_str)
1518 for handler in self._opener.handlers:
1519 if hasattr(handler, 'proxies'):
1520 proxy_map.update(handler.proxies)
1521 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1523 if self.params.get('call_home', False):
1524 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1525 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1526 latest_version = self.urlopen(
1527 'https://yt-dl.org/latest/version').read().decode('utf-8')
1528 if version_tuple(latest_version) > version_tuple(__version__):
1529 self.report_warning(
1530 'You are using an outdated version (newest version: %s)! '
1531 'See https://yt-dl.org/update if you need help updating.' %
1534 def _setup_opener(self):
1535 timeout_val = self.params.get('socket_timeout')
1536 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1538 opts_cookiefile = self.params.get('cookiefile')
1539 opts_proxy = self.params.get('proxy')
1541 if opts_cookiefile is None:
1542 self.cookiejar = compat_cookiejar.CookieJar()
1544 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1546 if os.access(opts_cookiefile, os.R_OK):
1547 self.cookiejar.load()
1549 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1551 if opts_proxy is not None:
1552 if opts_proxy == '':
1555 proxies = {'http': opts_proxy, 'https': opts_proxy}
1557 proxies = compat_urllib_request.getproxies()
1558 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1559 if 'http' in proxies and 'https' not in proxies:
1560 proxies['https'] = proxies['http']
1561 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1563 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1564 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1565 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1566 opener = compat_urllib_request.build_opener(
1567 https_handler, proxy_handler, cookie_processor, ydlh)
1568 # Delete the default user-agent header, which would otherwise apply in
1569 # cases where our custom HTTP handler doesn't come into play
1570 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1571 opener.addheaders = []
1572 self._opener = opener
1574 def encode(self, s):
1575 if isinstance(s, bytes):
1576 return s # Already encoded
1579 return s.encode(self.get_encoding())
1580 except UnicodeEncodeError as err:
1581 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1584 def get_encoding(self):
1585 encoding = self.params.get('encoding')
1586 if encoding is None:
1587 encoding = preferredencoding()