2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
31 compat_urllib_request,
55 UnavailableVideoError,
62 from .cache import Cache
63 from .extractor import get_info_extractor, gen_extractors
64 from .downloader import get_suitable_downloader
65 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
66 from .version import __version__
69 class YoutubeDL(object):
72 YoutubeDL objects are the ones responsible of downloading the
73 actual video file and writing it to disk if the user has requested
74 it, among some other tasks. In most cases there should be one per
75 program. As, given a video URL, the downloader doesn't know how to
76 extract all the needed information, task that InfoExtractors do, it
77 has to pass the URL to one of them.
79 For this, YoutubeDL objects have a method that allows
80 InfoExtractors to be registered in a given order. When it is passed
81 a URL, the YoutubeDL object handles it to the first InfoExtractor it
82 finds that reports being able to handle it. The InfoExtractor extracts
83 all the information about the video or videos the URL refers to, and
84 YoutubeDL process the extracted information, possibly using a File
85 Downloader to download the video.
87 YoutubeDL objects accept a lot of parameters. In order not to saturate
88 the object constructor with arguments, it receives a dictionary of
89 options instead. These options are available through the params
90 attribute for the InfoExtractors to use. The YoutubeDL also
91 registers itself as the downloader in charge for the InfoExtractors
92 that are added to it, so this is a "mutual registration".
96 username: Username for authentication purposes.
97 password: Password for authentication purposes.
98 videopassword: Password for acces a video.
99 usenetrc: Use netrc for authentication instead.
100 verbose: Print additional info to stdout.
101 quiet: Do not print messages to stdout.
102 no_warnings: Do not print out anything for warnings.
103 forceurl: Force printing final URL.
104 forcetitle: Force printing title.
105 forceid: Force printing ID.
106 forcethumbnail: Force printing thumbnail URL.
107 forcedescription: Force printing description.
108 forcefilename: Force printing final filename.
109 forceduration: Force printing duration.
110 forcejson: Force printing info_dict as JSON.
111 dump_single_json: Force printing the info_dict of the whole playlist
112 (or video) as a single JSON line.
113 simulate: Do not download the video files.
114 format: Video format code.
115 format_limit: Highest quality format to try.
116 outtmpl: Template for output names.
117 restrictfilenames: Do not allow "&" and spaces in file names
118 ignoreerrors: Do not stop on download errors.
119 nooverwrites: Prevent overwriting files.
120 playliststart: Playlist item to start at.
121 playlistend: Playlist item to end at.
122 matchtitle: Download only matching titles.
123 rejecttitle: Reject downloads for matching titles.
124 logger: Log messages to a logging.Logger instance.
125 logtostderr: Log messages to stderr instead of stdout.
126 writedescription: Write the video description to a .description file
127 writeinfojson: Write the video description to a .info.json file
128 writeannotations: Write the video annotations to a .annotations.xml file
129 writethumbnail: Write the thumbnail image to a file
130 writesubtitles: Write the video subtitles to a file
131 writeautomaticsub: Write the automatic subtitles to a file
132 allsubtitles: Downloads all the subtitles of the video
133 (requires writesubtitles or writeautomaticsub)
134 listsubtitles: Lists all available subtitles for the video
135 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
136 subtitleslangs: List of languages of the subtitles to download
137 keepvideo: Keep the video file after post-processing
138 daterange: A DateRange object, download only if the upload_date is in the range.
139 skip_download: Skip the actual download of the video file
140 cachedir: Location of the cache files in the filesystem.
141 False to disable filesystem cache.
142 noplaylist: Download single video instead of a playlist if in doubt.
143 age_limit: An integer representing the user's age in years.
144 Unsuitable videos for the given age are skipped.
145 min_views: An integer representing the minimum view count the video
146 must have in order to not be skipped.
147 Videos without view count information are always
148 downloaded. None for no limit.
149 max_views: An integer representing the maximum view count.
150 Videos that are more popular than that are not
152 Videos without view count information are always
153 downloaded. None for no limit.
154 download_archive: File name of a file where all downloads are recorded.
155 Videos already present in the file are not downloaded
157 cookiefile: File name where cookies should be read from and dumped to.
158 nocheckcertificate:Do not verify SSL certificates
159 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
160 At the moment, this is only supported by YouTube.
161 proxy: URL of the proxy server to use
162 socket_timeout: Time to wait for unresponsive hosts, in seconds
163 bidi_workaround: Work around buggy terminals without bidirectional text
164 support, using fridibi
165 debug_printtraffic:Print out sent and received HTTP traffic
166 include_ads: Download ads as well
167 default_search: Prepend this string if an input url is not valid.
168 'auto' for elaborate guessing
169 encoding: Use this encoding instead of the system-specified.
170 extract_flat: Do not resolve URLs, return the immediate result.
171 Pass in 'in_playlist' to only show this behavior for
174 The following parameters are not used by YoutubeDL itself, they are used by
176 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
177 noresizebuffer, retries, continuedl, noprogress, consoletitle
179 The following options are used by the post processors:
180 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
181 otherwise prefer avconv.
182 exec_cmd: Arbitrary command to run after downloading
188 _download_retcode = None
189 _num_downloads = None
192 def __init__(self, params=None):
193 """Create a FileDownloader object with the given options."""
197 self._ies_instances = {}
199 self._progress_hooks = []
200 self._download_retcode = 0
201 self._num_downloads = 0
202 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
203 self._err_file = sys.stderr
205 self.cache = Cache(self)
207 if params.get('bidi_workaround', False):
210 master, slave = pty.openpty()
211 width = get_term_width()
215 width_args = ['-w', str(width)]
217 stdin=subprocess.PIPE,
219 stderr=self._err_file)
221 self._output_process = subprocess.Popen(
222 ['bidiv'] + width_args, **sp_kwargs
225 self._output_process = subprocess.Popen(
226 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
227 self._output_channel = os.fdopen(master, 'rb')
228 except OSError as ose:
230 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
234 if (sys.version_info >= (3,) and sys.platform != 'win32' and
235 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
236 and not params.get('restrictfilenames', False)):
237 # On Python 3, the Unicode filesystem API will throw errors (#1474)
239 'Assuming --restrict-filenames since file system encoding '
240 'cannot encode all characters. '
241 'Set the LC_ALL environment variable to fix this.')
242 self.params['restrictfilenames'] = True
244 if '%(stitle)s' in self.params.get('outtmpl', ''):
245 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
249 def add_info_extractor(self, ie):
250 """Add an InfoExtractor object to the end of the list."""
252 self._ies_instances[ie.ie_key()] = ie
253 ie.set_downloader(self)
255 def get_info_extractor(self, ie_key):
257 Get an instance of an IE with name ie_key, it will try to get one from
258 the _ies list, if there's no instance it will create a new one and add
259 it to the extractor list.
261 ie = self._ies_instances.get(ie_key)
263 ie = get_info_extractor(ie_key)()
264 self.add_info_extractor(ie)
267 def add_default_info_extractors(self):
269 Add the InfoExtractors returned by gen_extractors to the end of the list
271 for ie in gen_extractors():
272 self.add_info_extractor(ie)
274 def add_post_processor(self, pp):
275 """Add a PostProcessor object to the end of the chain."""
277 pp.set_downloader(self)
279 def add_progress_hook(self, ph):
280 """Add the progress hook (currently only for the file downloader)"""
281 self._progress_hooks.append(ph)
283 def _bidi_workaround(self, message):
284 if not hasattr(self, '_output_channel'):
287 assert hasattr(self, '_output_process')
288 assert isinstance(message, compat_str)
289 line_count = message.count('\n') + 1
290 self._output_process.stdin.write((message + '\n').encode('utf-8'))
291 self._output_process.stdin.flush()
292 res = ''.join(self._output_channel.readline().decode('utf-8')
293 for _ in range(line_count))
294 return res[:-len('\n')]
296 def to_screen(self, message, skip_eol=False):
297 """Print message to stdout if not in quiet mode."""
298 return self.to_stdout(message, skip_eol, check_quiet=True)
300 def _write_string(self, s, out=None):
301 write_string(s, out=out, encoding=self.params.get('encoding'))
303 def to_stdout(self, message, skip_eol=False, check_quiet=False):
304 """Print message to stdout if not in quiet mode."""
305 if self.params.get('logger'):
306 self.params['logger'].debug(message)
307 elif not check_quiet or not self.params.get('quiet', False):
308 message = self._bidi_workaround(message)
309 terminator = ['\n', ''][skip_eol]
310 output = message + terminator
312 self._write_string(output, self._screen_file)
314 def to_stderr(self, message):
315 """Print message to stderr."""
316 assert isinstance(message, compat_str)
317 if self.params.get('logger'):
318 self.params['logger'].error(message)
320 message = self._bidi_workaround(message)
321 output = message + '\n'
322 self._write_string(output, self._err_file)
324 def to_console_title(self, message):
325 if not self.params.get('consoletitle', False):
327 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
328 # c_wchar_p() might not be necessary if `message` is
329 # already of type unicode()
330 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
331 elif 'TERM' in os.environ:
332 self._write_string('\033]0;%s\007' % message, self._screen_file)
334 def save_console_title(self):
335 if not self.params.get('consoletitle', False):
337 if 'TERM' in os.environ:
338 # Save the title on stack
339 self._write_string('\033[22;0t', self._screen_file)
341 def restore_console_title(self):
342 if not self.params.get('consoletitle', False):
344 if 'TERM' in os.environ:
345 # Restore the title from stack
346 self._write_string('\033[23;0t', self._screen_file)
349 self.save_console_title()
352 def __exit__(self, *args):
353 self.restore_console_title()
355 if self.params.get('cookiefile') is not None:
356 self.cookiejar.save()
358 def trouble(self, message=None, tb=None):
359 """Determine action to take when a download problem appears.
361 Depending on if the downloader has been configured to ignore
362 download errors or not, this method may throw an exception or
363 not when errors are found, after printing the message.
365 tb, if given, is additional traceback information.
367 if message is not None:
368 self.to_stderr(message)
369 if self.params.get('verbose'):
371 if sys.exc_info()[0]: # if .trouble has been called from an except block
373 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
374 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
375 tb += compat_str(traceback.format_exc())
377 tb_data = traceback.format_list(traceback.extract_stack())
378 tb = ''.join(tb_data)
380 if not self.params.get('ignoreerrors', False):
381 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
382 exc_info = sys.exc_info()[1].exc_info
384 exc_info = sys.exc_info()
385 raise DownloadError(message, exc_info)
386 self._download_retcode = 1
388 def report_warning(self, message):
390 Print the message to stderr, it will be prefixed with 'WARNING:'
391 If stderr is a tty file the 'WARNING:' will be colored
393 if self.params.get('logger') is not None:
394 self.params['logger'].warning(message)
396 if self.params.get('no_warnings'):
398 if self._err_file.isatty() and os.name != 'nt':
399 _msg_header = '\033[0;33mWARNING:\033[0m'
401 _msg_header = 'WARNING:'
402 warning_message = '%s %s' % (_msg_header, message)
403 self.to_stderr(warning_message)
405 def report_error(self, message, tb=None):
407 Do the same as trouble, but prefixes the message with 'ERROR:', colored
408 in red if stderr is a tty file.
410 if self._err_file.isatty() and os.name != 'nt':
411 _msg_header = '\033[0;31mERROR:\033[0m'
413 _msg_header = 'ERROR:'
414 error_message = '%s %s' % (_msg_header, message)
415 self.trouble(error_message, tb)
417 def report_file_already_downloaded(self, file_name):
418 """Report file has already been fully downloaded."""
420 self.to_screen('[download] %s has already been downloaded' % file_name)
421 except UnicodeEncodeError:
422 self.to_screen('[download] The file has already been downloaded')
424 def prepare_filename(self, info_dict):
425 """Generate the output filename."""
427 template_dict = dict(info_dict)
429 template_dict['epoch'] = int(time.time())
430 autonumber_size = self.params.get('autonumber_size')
431 if autonumber_size is None:
433 autonumber_templ = '%0' + str(autonumber_size) + 'd'
434 template_dict['autonumber'] = autonumber_templ % self._num_downloads
435 if template_dict.get('playlist_index') is not None:
436 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
437 if template_dict.get('resolution') is None:
438 if template_dict.get('width') and template_dict.get('height'):
439 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
440 elif template_dict.get('height'):
441 template_dict['resolution'] = '%sp' % template_dict['height']
442 elif template_dict.get('width'):
443 template_dict['resolution'] = '?x%d' % template_dict['width']
445 sanitize = lambda k, v: sanitize_filename(
447 restricted=self.params.get('restrictfilenames'),
449 template_dict = dict((k, sanitize(k, v))
450 for k, v in template_dict.items()
452 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
454 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
455 tmpl = compat_expanduser(outtmpl)
456 filename = tmpl % template_dict
458 except ValueError as err:
459 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
462 def _match_entry(self, info_dict):
463 """ Returns None iff the file should be downloaded """
465 video_title = info_dict.get('title', info_dict.get('id', 'video'))
466 if 'title' in info_dict:
467 # This can happen when we're just evaluating the playlist
468 title = info_dict['title']
469 matchtitle = self.params.get('matchtitle', False)
471 if not re.search(matchtitle, title, re.IGNORECASE):
472 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
473 rejecttitle = self.params.get('rejecttitle', False)
475 if re.search(rejecttitle, title, re.IGNORECASE):
476 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
477 date = info_dict.get('upload_date', None)
479 dateRange = self.params.get('daterange', DateRange())
480 if date not in dateRange:
481 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
482 view_count = info_dict.get('view_count', None)
483 if view_count is not None:
484 min_views = self.params.get('min_views')
485 if min_views is not None and view_count < min_views:
486 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
487 max_views = self.params.get('max_views')
488 if max_views is not None and view_count > max_views:
489 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
490 age_limit = self.params.get('age_limit')
491 if age_limit is not None:
492 actual_age_limit = info_dict.get('age_limit')
493 if actual_age_limit is None:
495 if age_limit < actual_age_limit:
496 return 'Skipping "' + title + '" because it is age restricted'
497 if self.in_download_archive(info_dict):
498 return '%s has already been recorded in archive' % video_title
502 def add_extra_info(info_dict, extra_info):
503 '''Set the keys from extra_info in info dict if they are missing'''
504 for key, value in extra_info.items():
505 info_dict.setdefault(key, value)
507 def extract_info(self, url, download=True, ie_key=None, extra_info={},
510 Returns a list with a dictionary for each video we find.
511 If 'download', also downloads the videos.
512 extra_info is a dict containing the extra values to add to each result
516 ies = [self.get_info_extractor(ie_key)]
521 if not ie.suitable(url):
525 self.report_warning('The program functionality for this site has been marked as broken, '
526 'and will probably not work.')
529 ie_result = ie.extract(url)
530 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
532 if isinstance(ie_result, list):
533 # Backwards compatibility: old IE result format
535 '_type': 'compat_list',
536 'entries': ie_result,
538 self.add_default_extra_info(ie_result, ie, url)
540 return self.process_ie_result(ie_result, download, extra_info)
543 except ExtractorError as de: # An error we somewhat expected
544 self.report_error(compat_str(de), de.format_traceback())
546 except MaxDownloadsReached:
548 except Exception as e:
549 if self.params.get('ignoreerrors', False):
550 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
555 self.report_error('no suitable InfoExtractor for URL %s' % url)
557 def add_default_extra_info(self, ie_result, ie, url):
558 self.add_extra_info(ie_result, {
559 'extractor': ie.IE_NAME,
561 'webpage_url_basename': url_basename(url),
562 'extractor_key': ie.ie_key(),
565 def process_ie_result(self, ie_result, download=True, extra_info={}):
567 Take the result of the ie(may be modified) and resolve all unresolved
568 references (URLs, playlist items).
570 It will also download the videos if 'download'.
571 Returns the resolved ie_result.
574 result_type = ie_result.get('_type', 'video')
576 if result_type in ('url', 'url_transparent'):
577 extract_flat = self.params.get('extract_flat', False)
578 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
579 extract_flat is True):
580 if self.params.get('forcejson', False):
581 self.to_stdout(json.dumps(ie_result))
584 if result_type == 'video':
585 self.add_extra_info(ie_result, extra_info)
586 return self.process_video_result(ie_result, download=download)
587 elif result_type == 'url':
588 # We have to add extra_info to the results because it may be
589 # contained in a playlist
590 return self.extract_info(ie_result['url'],
592 ie_key=ie_result.get('ie_key'),
593 extra_info=extra_info)
594 elif result_type == 'url_transparent':
595 # Use the information from the embedding page
596 info = self.extract_info(
597 ie_result['url'], ie_key=ie_result.get('ie_key'),
598 extra_info=extra_info, download=False, process=False)
600 def make_result(embedded_info):
601 new_result = ie_result.copy()
602 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
603 'entries', 'ie_key', 'duration',
604 'subtitles', 'annotations', 'format',
605 'thumbnail', 'thumbnails'):
608 if f in embedded_info:
609 new_result[f] = embedded_info[f]
611 new_result = make_result(info)
613 assert new_result.get('_type') != 'url_transparent'
614 if new_result.get('_type') == 'compat_list':
615 new_result['entries'] = [
616 make_result(e) for e in new_result['entries']]
618 return self.process_ie_result(
619 new_result, download=download, extra_info=extra_info)
620 elif result_type == 'playlist':
621 # We process each entry in the playlist
622 playlist = ie_result.get('title', None) or ie_result.get('id', None)
623 self.to_screen('[download] Downloading playlist: %s' % playlist)
625 playlist_results = []
627 playliststart = self.params.get('playliststart', 1) - 1
628 playlistend = self.params.get('playlistend', None)
629 # For backwards compatibility, interpret -1 as whole list
630 if playlistend == -1:
633 if isinstance(ie_result['entries'], list):
634 n_all_entries = len(ie_result['entries'])
635 entries = ie_result['entries'][playliststart:playlistend]
636 n_entries = len(entries)
638 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
639 (ie_result['extractor'], playlist, n_all_entries, n_entries))
641 assert isinstance(ie_result['entries'], PagedList)
642 entries = ie_result['entries'].getslice(
643 playliststart, playlistend)
644 n_entries = len(entries)
646 "[%s] playlist %s: Downloading %d videos" %
647 (ie_result['extractor'], playlist, n_entries))
649 for i, entry in enumerate(entries, 1):
650 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
652 'n_entries': n_entries,
653 'playlist': playlist,
654 'playlist_index': i + playliststart,
655 'extractor': ie_result['extractor'],
656 'webpage_url': ie_result['webpage_url'],
657 'webpage_url_basename': url_basename(ie_result['webpage_url']),
658 'extractor_key': ie_result['extractor_key'],
661 reason = self._match_entry(entry)
662 if reason is not None:
663 self.to_screen('[download] ' + reason)
666 entry_result = self.process_ie_result(entry,
669 playlist_results.append(entry_result)
670 ie_result['entries'] = playlist_results
672 elif result_type == 'compat_list':
674 self.add_extra_info(r,
676 'extractor': ie_result['extractor'],
677 'webpage_url': ie_result['webpage_url'],
678 'webpage_url_basename': url_basename(ie_result['webpage_url']),
679 'extractor_key': ie_result['extractor_key'],
682 ie_result['entries'] = [
683 self.process_ie_result(_fixup(r), download, extra_info)
684 for r in ie_result['entries']
688 raise Exception('Invalid result type: %s' % result_type)
690 def select_format(self, format_spec, available_formats):
691 if format_spec == 'best' or format_spec is None:
692 return available_formats[-1]
693 elif format_spec == 'worst':
694 return available_formats[0]
695 elif format_spec == 'bestaudio':
697 f for f in available_formats
698 if f.get('vcodec') == 'none']
700 return audio_formats[-1]
701 elif format_spec == 'worstaudio':
703 f for f in available_formats
704 if f.get('vcodec') == 'none']
706 return audio_formats[0]
707 elif format_spec == 'bestvideo':
709 f for f in available_formats
710 if f.get('acodec') == 'none']
712 return video_formats[-1]
713 elif format_spec == 'worstvideo':
715 f for f in available_formats
716 if f.get('acodec') == 'none']
718 return video_formats[0]
720 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
721 if format_spec in extensions:
722 filter_f = lambda f: f['ext'] == format_spec
724 filter_f = lambda f: f['format_id'] == format_spec
725 matches = list(filter(filter_f, available_formats))
730 def process_video_result(self, info_dict, download=True):
731 assert info_dict.get('_type', 'video') == 'video'
733 if 'id' not in info_dict:
734 raise ExtractorError('Missing "id" field in extractor result')
735 if 'title' not in info_dict:
736 raise ExtractorError('Missing "title" field in extractor result')
738 if 'playlist' not in info_dict:
739 # It isn't part of a playlist
740 info_dict['playlist'] = None
741 info_dict['playlist_index'] = None
743 thumbnails = info_dict.get('thumbnails')
745 thumbnails.sort(key=lambda t: (
746 t.get('width'), t.get('height'), t.get('url')))
748 if 'width' in t and 'height' in t:
749 t['resolution'] = '%dx%d' % (t['width'], t['height'])
751 if thumbnails and 'thumbnail' not in info_dict:
752 info_dict['thumbnail'] = thumbnails[-1]['url']
754 if 'display_id' not in info_dict and 'id' in info_dict:
755 info_dict['display_id'] = info_dict['id']
757 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
758 upload_date = datetime.datetime.utcfromtimestamp(
759 info_dict['timestamp'])
760 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
762 # This extractors handle format selection themselves
763 if info_dict['extractor'] in ['Youku']:
765 self.process_info(info_dict)
768 # We now pick which formats have to be downloaded
769 if info_dict.get('formats') is None:
770 # There's only one format available
771 formats = [info_dict]
773 formats = info_dict['formats']
776 raise ExtractorError('No video formats found!')
778 # We check that all the formats have the format and format_id fields
779 for i, format in enumerate(formats):
780 if 'url' not in format:
781 raise ExtractorError('Missing "url" key in result (index %d)' % i)
783 if format.get('format_id') is None:
784 format['format_id'] = compat_str(i)
785 if format.get('format') is None:
786 format['format'] = '{id} - {res}{note}'.format(
787 id=format['format_id'],
788 res=self.format_resolution(format),
789 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
791 # Automatically determine file extension if missing
792 if 'ext' not in format:
793 format['ext'] = determine_ext(format['url']).lower()
795 format_limit = self.params.get('format_limit', None)
797 formats = list(takewhile_inclusive(
798 lambda f: f['format_id'] != format_limit, formats
801 # TODO Central sorting goes here
803 if formats[0] is not info_dict:
804 # only set the 'formats' fields if the original info_dict list them
805 # otherwise we end up with a circular reference, the first (and unique)
806 # element in the 'formats' field in info_dict is info_dict itself,
807 # wich can't be exported to json
808 info_dict['formats'] = formats
809 if self.params.get('listformats', None):
810 self.list_formats(info_dict)
813 req_format = self.params.get('format')
814 if req_format is None:
816 formats_to_download = []
817 # The -1 is for supporting YoutubeIE
818 if req_format in ('-1', 'all'):
819 formats_to_download = formats
821 for rfstr in req_format.split(','):
822 # We can accept formats requested in the format: 34/5/best, we pick
823 # the first that is available, starting from left
824 req_formats = rfstr.split('/')
825 for rf in req_formats:
826 if re.match(r'.+?\+.+?', rf) is not None:
827 # Two formats have been requested like '137+139'
828 format_1, format_2 = rf.split('+')
829 formats_info = (self.select_format(format_1, formats),
830 self.select_format(format_2, formats))
831 if all(formats_info):
833 'requested_formats': formats_info,
835 'ext': formats_info[0]['ext'],
838 selected_format = None
840 selected_format = self.select_format(rf, formats)
841 if selected_format is not None:
842 formats_to_download.append(selected_format)
844 if not formats_to_download:
845 raise ExtractorError('requested format not available',
849 if len(formats_to_download) > 1:
850 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
851 for format in formats_to_download:
852 new_info = dict(info_dict)
853 new_info.update(format)
854 self.process_info(new_info)
855 # We update the info dict with the best quality format (backwards compatibility)
856 info_dict.update(formats_to_download[-1])
859 def process_info(self, info_dict):
860 """Process a single resolved IE result."""
862 assert info_dict.get('_type', 'video') == 'video'
864 max_downloads = self.params.get('max_downloads')
865 if max_downloads is not None:
866 if self._num_downloads >= int(max_downloads):
867 raise MaxDownloadsReached()
869 info_dict['fulltitle'] = info_dict['title']
870 if len(info_dict['title']) > 200:
871 info_dict['title'] = info_dict['title'][:197] + '...'
873 # Keep for backwards compatibility
874 info_dict['stitle'] = info_dict['title']
876 if 'format' not in info_dict:
877 info_dict['format'] = info_dict['ext']
879 reason = self._match_entry(info_dict)
880 if reason is not None:
881 self.to_screen('[download] ' + reason)
884 self._num_downloads += 1
886 filename = self.prepare_filename(info_dict)
889 if self.params.get('forcetitle', False):
890 self.to_stdout(info_dict['fulltitle'])
891 if self.params.get('forceid', False):
892 self.to_stdout(info_dict['id'])
893 if self.params.get('forceurl', False):
894 # For RTMP URLs, also include the playpath
895 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
896 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
897 self.to_stdout(info_dict['thumbnail'])
898 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
899 self.to_stdout(info_dict['description'])
900 if self.params.get('forcefilename', False) and filename is not None:
901 self.to_stdout(filename)
902 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
903 self.to_stdout(formatSeconds(info_dict['duration']))
904 if self.params.get('forceformat', False):
905 self.to_stdout(info_dict['format'])
906 if self.params.get('forcejson', False):
907 info_dict['_filename'] = filename
908 self.to_stdout(json.dumps(info_dict))
909 if self.params.get('dump_single_json', False):
910 info_dict['_filename'] = filename
912 # Do nothing else if in simulate mode
913 if self.params.get('simulate', False):
920 dn = os.path.dirname(encodeFilename(filename))
921 if dn and not os.path.exists(dn):
923 except (OSError, IOError) as err:
924 self.report_error('unable to create directory ' + compat_str(err))
927 if self.params.get('writedescription', False):
928 descfn = filename + '.description'
929 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
930 self.to_screen('[info] Video description is already present')
933 self.to_screen('[info] Writing video description to: ' + descfn)
934 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
935 descfile.write(info_dict['description'])
936 except (KeyError, TypeError):
937 self.report_warning('There\'s no description to write.')
938 except (OSError, IOError):
939 self.report_error('Cannot write description file ' + descfn)
942 if self.params.get('writeannotations', False):
943 annofn = filename + '.annotations.xml'
944 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
945 self.to_screen('[info] Video annotations are already present')
948 self.to_screen('[info] Writing video annotations to: ' + annofn)
949 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
950 annofile.write(info_dict['annotations'])
951 except (KeyError, TypeError):
952 self.report_warning('There are no annotations to write.')
953 except (OSError, IOError):
954 self.report_error('Cannot write annotations file: ' + annofn)
957 subtitles_are_requested = any([self.params.get('writesubtitles', False),
958 self.params.get('writeautomaticsub')])
960 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
961 # subtitles download errors are already managed as troubles in relevant IE
962 # that way it will silently go on when used with unsupporting IE
963 subtitles = info_dict['subtitles']
964 sub_format = self.params.get('subtitlesformat', 'srt')
965 for sub_lang in subtitles.keys():
966 sub = subtitles[sub_lang]
970 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
971 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
972 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
974 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
975 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
977 except (OSError, IOError):
978 self.report_error('Cannot write subtitles file ' + sub_filename)
981 if self.params.get('writeinfojson', False):
982 infofn = os.path.splitext(filename)[0] + '.info.json'
983 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
984 self.to_screen('[info] Video description metadata is already present')
986 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
988 write_json_file(info_dict, encodeFilename(infofn))
989 except (OSError, IOError):
990 self.report_error('Cannot write metadata to JSON file ' + infofn)
993 if self.params.get('writethumbnail', False):
994 if info_dict.get('thumbnail') is not None:
995 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
996 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
997 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
998 self.to_screen('[%s] %s: Thumbnail is already present' %
999 (info_dict['extractor'], info_dict['id']))
1001 self.to_screen('[%s] %s: Downloading thumbnail ...' %
1002 (info_dict['extractor'], info_dict['id']))
1004 uf = self.urlopen(info_dict['thumbnail'])
1005 with open(thumb_filename, 'wb') as thumbf:
1006 shutil.copyfileobj(uf, thumbf)
1007 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1008 (info_dict['extractor'], info_dict['id'], thumb_filename))
1009 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1010 self.report_warning('Unable to download thumbnail "%s": %s' %
1011 (info_dict['thumbnail'], compat_str(err)))
1013 if not self.params.get('skip_download', False):
1014 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1019 fd = get_suitable_downloader(info)(self, self.params)
1020 for ph in self._progress_hooks:
1021 fd.add_progress_hook(ph)
1022 if self.params.get('verbose'):
1023 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1024 return fd.download(name, info)
1025 if info_dict.get('requested_formats') is not None:
1028 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1029 if not merger._get_executable():
1031 self.report_warning('You have requested multiple '
1032 'formats but ffmpeg or avconv are not installed.'
1033 ' The formats won\'t be merged')
1035 postprocessors = [merger]
1036 for f in info_dict['requested_formats']:
1037 new_info = dict(info_dict)
1039 fname = self.prepare_filename(new_info)
1040 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1041 downloaded.append(fname)
1042 partial_success = dl(fname, new_info)
1043 success = success and partial_success
1044 info_dict['__postprocessors'] = postprocessors
1045 info_dict['__files_to_merge'] = downloaded
1047 # Just a single file
1048 success = dl(filename, info_dict)
1049 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1050 self.report_error('unable to download video data: %s' % str(err))
1052 except (OSError, IOError) as err:
1053 raise UnavailableVideoError(err)
1054 except (ContentTooShortError, ) as err:
1055 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1060 self.post_process(filename, info_dict)
1061 except (PostProcessingError) as err:
1062 self.report_error('postprocessing: %s' % str(err))
1065 self.record_download_archive(info_dict)
1067 def download(self, url_list):
1068 """Download a given list of URLs."""
1069 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1070 if (len(url_list) > 1 and
1072 and self.params.get('max_downloads') != 1):
1073 raise SameFileError(outtmpl)
1075 for url in url_list:
1077 #It also downloads the videos
1078 res = self.extract_info(url)
1079 except UnavailableVideoError:
1080 self.report_error('unable to download video')
1081 except MaxDownloadsReached:
1082 self.to_screen('[info] Maximum number of downloaded files reached.')
1085 if self.params.get('dump_single_json', False):
1086 self.to_stdout(json.dumps(res))
1088 return self._download_retcode
1090 def download_with_info_file(self, info_filename):
1091 with io.open(info_filename, 'r', encoding='utf-8') as f:
1094 self.process_ie_result(info, download=True)
1095 except DownloadError:
1096 webpage_url = info.get('webpage_url')
1097 if webpage_url is not None:
1098 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1099 return self.download([webpage_url])
1102 return self._download_retcode
1104 def post_process(self, filename, ie_info):
1105 """Run all the postprocessors on the given file."""
1106 info = dict(ie_info)
1107 info['filepath'] = filename
1110 if ie_info.get('__postprocessors') is not None:
1111 pps_chain.extend(ie_info['__postprocessors'])
1112 pps_chain.extend(self._pps)
1113 for pp in pps_chain:
1115 keep_video_wish, new_info = pp.run(info)
1116 if keep_video_wish is not None:
1118 keep_video = keep_video_wish
1119 elif keep_video is None:
1120 # No clear decision yet, let IE decide
1121 keep_video = keep_video_wish
1122 except PostProcessingError as e:
1123 self.report_error(e.msg)
1124 if keep_video is False and not self.params.get('keepvideo', False):
1126 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1127 os.remove(encodeFilename(filename))
1128 except (IOError, OSError):
1129 self.report_warning('Unable to remove downloaded video file')
1131 def _make_archive_id(self, info_dict):
1132 # Future-proof against any change in case
1133 # and backwards compatibility with prior versions
1134 extractor = info_dict.get('extractor_key')
1135 if extractor is None:
1136 if 'id' in info_dict:
1137 extractor = info_dict.get('ie_key') # key in a playlist
1138 if extractor is None:
1139 return None # Incomplete video information
1140 return extractor.lower() + ' ' + info_dict['id']
1142 def in_download_archive(self, info_dict):
1143 fn = self.params.get('download_archive')
1147 vid_id = self._make_archive_id(info_dict)
1149 return False # Incomplete video information
1152 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1153 for line in archive_file:
1154 if line.strip() == vid_id:
1156 except IOError as ioe:
1157 if ioe.errno != errno.ENOENT:
1161 def record_download_archive(self, info_dict):
1162 fn = self.params.get('download_archive')
1165 vid_id = self._make_archive_id(info_dict)
1167 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1168 archive_file.write(vid_id + '\n')
1171 def format_resolution(format, default='unknown'):
1172 if format.get('vcodec') == 'none':
1174 if format.get('resolution') is not None:
1175 return format['resolution']
1176 if format.get('height') is not None:
1177 if format.get('width') is not None:
1178 res = '%sx%s' % (format['width'], format['height'])
1180 res = '%sp' % format['height']
1181 elif format.get('width') is not None:
1182 res = '?x%d' % format['width']
1187 def _format_note(self, fdict):
1189 if fdict.get('ext') in ['f4f', 'f4m']:
1190 res += '(unsupported) '
1191 if fdict.get('format_note') is not None:
1192 res += fdict['format_note'] + ' '
1193 if fdict.get('tbr') is not None:
1194 res += '%4dk ' % fdict['tbr']
1195 if fdict.get('container') is not None:
1198 res += '%s container' % fdict['container']
1199 if (fdict.get('vcodec') is not None and
1200 fdict.get('vcodec') != 'none'):
1203 res += fdict['vcodec']
1204 if fdict.get('vbr') is not None:
1206 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1208 if fdict.get('vbr') is not None:
1209 res += '%4dk' % fdict['vbr']
1210 if fdict.get('acodec') is not None:
1213 if fdict['acodec'] == 'none':
1216 res += '%-5s' % fdict['acodec']
1217 elif fdict.get('abr') is not None:
1221 if fdict.get('abr') is not None:
1222 res += '@%3dk' % fdict['abr']
1223 if fdict.get('asr') is not None:
1224 res += ' (%5dHz)' % fdict['asr']
1225 if fdict.get('filesize') is not None:
1228 res += format_bytes(fdict['filesize'])
1229 elif fdict.get('filesize_approx') is not None:
1232 res += '~' + format_bytes(fdict['filesize_approx'])
1235 def list_formats(self, info_dict):
1236 def line(format, idlen=20):
1237 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1238 format['format_id'],
1240 self.format_resolution(format),
1241 self._format_note(format),
1244 formats = info_dict.get('formats', [info_dict])
1245 idlen = max(len('format code'),
1246 max(len(f['format_id']) for f in formats))
1247 formats_s = [line(f, idlen) for f in formats]
1248 if len(formats) > 1:
1249 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1250 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1252 header_line = line({
1253 'format_id': 'format code', 'ext': 'extension',
1254 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1255 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1256 (info_dict['id'], header_line, '\n'.join(formats_s)))
1258 def urlopen(self, req):
1259 """ Start an HTTP download """
1261 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1262 # always respected by websites, some tend to give out URLs with non percent-encoded
1263 # non-ASCII characters (see telemb.py, ard.py [#3412])
1264 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1265 # To work around aforementioned issue we will replace request's original URL with
1266 # percent-encoded one
1267 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1268 url = req if req_is_string else req.get_full_url()
1269 url_escaped = escape_url(url)
1271 # Substitute URL if any change after escaping
1272 if url != url_escaped:
1276 req = compat_urllib_request.Request(
1277 url_escaped, data=req.data, headers=req.headers,
1278 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1280 return self._opener.open(req, timeout=self._socket_timeout)
1282 def print_debug_header(self):
1283 if not self.params.get('verbose'):
1286 if type('') is not compat_str:
1287 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1288 self.report_warning(
1289 'Your Python is broken! Update to a newer and supported version')
1292 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1293 locale.getpreferredencoding(),
1294 sys.getfilesystemencoding(),
1295 sys.stdout.encoding,
1296 self.get_encoding()))
1297 write_string(encoding_str, encoding=None)
1299 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1301 sp = subprocess.Popen(
1302 ['git', 'rev-parse', '--short', 'HEAD'],
1303 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1304 cwd=os.path.dirname(os.path.abspath(__file__)))
1305 out, err = sp.communicate()
1306 out = out.decode().strip()
1307 if re.match('[0-9a-f]+', out):
1308 self._write_string('[debug] Git HEAD: ' + out + '\n')
1314 self._write_string('[debug] Python version %s - %s\n' % (
1315 platform.python_version(), platform_name()))
1317 exe_versions = FFmpegPostProcessor.get_versions()
1318 exe_str = ', '.join(
1320 for exe, v in sorted(exe_versions.items())
1325 self._write_string('[debug] exe versions: %s\n' % exe_str)
1328 for handler in self._opener.handlers:
1329 if hasattr(handler, 'proxies'):
1330 proxy_map.update(handler.proxies)
1331 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1333 def _setup_opener(self):
1334 timeout_val = self.params.get('socket_timeout')
1335 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1337 opts_cookiefile = self.params.get('cookiefile')
1338 opts_proxy = self.params.get('proxy')
1340 if opts_cookiefile is None:
1341 self.cookiejar = compat_cookiejar.CookieJar()
1343 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1345 if os.access(opts_cookiefile, os.R_OK):
1346 self.cookiejar.load()
1348 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1350 if opts_proxy is not None:
1351 if opts_proxy == '':
1354 proxies = {'http': opts_proxy, 'https': opts_proxy}
1356 proxies = compat_urllib_request.getproxies()
1357 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1358 if 'http' in proxies and 'https' not in proxies:
1359 proxies['https'] = proxies['http']
1360 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1362 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1363 https_handler = make_HTTPS_handler(
1364 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1365 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1366 opener = compat_urllib_request.build_opener(
1367 https_handler, proxy_handler, cookie_processor, ydlh)
1368 # Delete the default user-agent header, which would otherwise apply in
1369 # cases where our custom HTTP handler doesn't come into play
1370 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1371 opener.addheaders = []
1372 self._opener = opener
1374 def encode(self, s):
1375 if isinstance(s, bytes):
1376 return s # Already encoded
1379 return s.encode(self.get_encoding())
1380 except UnicodeEncodeError as err:
1381 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1384 def get_encoding(self):
1385 encoding = self.params.get('encoding')
1386 if encoding is None:
1387 encoding = preferredencoding()