2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
30 compat_urllib_request,
54 UnavailableVideoError,
61 from .cache import Cache
62 from .extractor import get_info_extractor, gen_extractors
63 from .downloader import get_suitable_downloader
64 from .postprocessor import FFmpegMergerPP
65 from .version import __version__
68 class YoutubeDL(object):
71 YoutubeDL objects are the ones responsible of downloading the
72 actual video file and writing it to disk if the user has requested
73 it, among some other tasks. In most cases there should be one per
74 program. As, given a video URL, the downloader doesn't know how to
75 extract all the needed information, task that InfoExtractors do, it
76 has to pass the URL to one of them.
78 For this, YoutubeDL objects have a method that allows
79 InfoExtractors to be registered in a given order. When it is passed
80 a URL, the YoutubeDL object handles it to the first InfoExtractor it
81 finds that reports being able to handle it. The InfoExtractor extracts
82 all the information about the video or videos the URL refers to, and
83 YoutubeDL process the extracted information, possibly using a File
84 Downloader to download the video.
86 YoutubeDL objects accept a lot of parameters. In order not to saturate
87 the object constructor with arguments, it receives a dictionary of
88 options instead. These options are available through the params
89 attribute for the InfoExtractors to use. The YoutubeDL also
90 registers itself as the downloader in charge for the InfoExtractors
91 that are added to it, so this is a "mutual registration".
95 username: Username for authentication purposes.
96 password: Password for authentication purposes.
97 videopassword: Password for acces a video.
98 usenetrc: Use netrc for authentication instead.
99 verbose: Print additional info to stdout.
100 quiet: Do not print messages to stdout.
101 no_warnings: Do not print out anything for warnings.
102 forceurl: Force printing final URL.
103 forcetitle: Force printing title.
104 forceid: Force printing ID.
105 forcethumbnail: Force printing thumbnail URL.
106 forcedescription: Force printing description.
107 forcefilename: Force printing final filename.
108 forceduration: Force printing duration.
109 forcejson: Force printing info_dict as JSON.
110 simulate: Do not download the video files.
111 format: Video format code.
112 format_limit: Highest quality format to try.
113 outtmpl: Template for output names.
114 restrictfilenames: Do not allow "&" and spaces in file names
115 ignoreerrors: Do not stop on download errors.
116 nooverwrites: Prevent overwriting files.
117 playliststart: Playlist item to start at.
118 playlistend: Playlist item to end at.
119 matchtitle: Download only matching titles.
120 rejecttitle: Reject downloads for matching titles.
121 logger: Log messages to a logging.Logger instance.
122 logtostderr: Log messages to stderr instead of stdout.
123 writedescription: Write the video description to a .description file
124 writeinfojson: Write the video description to a .info.json file
125 writeannotations: Write the video annotations to a .annotations.xml file
126 writethumbnail: Write the thumbnail image to a file
127 writesubtitles: Write the video subtitles to a file
128 writeautomaticsub: Write the automatic subtitles to a file
129 allsubtitles: Downloads all the subtitles of the video
130 (requires writesubtitles or writeautomaticsub)
131 listsubtitles: Lists all available subtitles for the video
132 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
133 subtitleslangs: List of languages of the subtitles to download
134 keepvideo: Keep the video file after post-processing
135 daterange: A DateRange object, download only if the upload_date is in the range.
136 skip_download: Skip the actual download of the video file
137 cachedir: Location of the cache files in the filesystem.
138 False to disable filesystem cache.
139 noplaylist: Download single video instead of a playlist if in doubt.
140 age_limit: An integer representing the user's age in years.
141 Unsuitable videos for the given age are skipped.
142 min_views: An integer representing the minimum view count the video
143 must have in order to not be skipped.
144 Videos without view count information are always
145 downloaded. None for no limit.
146 max_views: An integer representing the maximum view count.
147 Videos that are more popular than that are not
149 Videos without view count information are always
150 downloaded. None for no limit.
151 download_archive: File name of a file where all downloads are recorded.
152 Videos already present in the file are not downloaded
154 cookiefile: File name where cookies should be read from and dumped to.
155 nocheckcertificate:Do not verify SSL certificates
156 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
157 At the moment, this is only supported by YouTube.
158 proxy: URL of the proxy server to use
159 socket_timeout: Time to wait for unresponsive hosts, in seconds
160 bidi_workaround: Work around buggy terminals without bidirectional text
161 support, using fridibi
162 debug_printtraffic:Print out sent and received HTTP traffic
163 include_ads: Download ads as well
164 default_search: Prepend this string if an input url is not valid.
165 'auto' for elaborate guessing
166 encoding: Use this encoding instead of the system-specified.
167 extract_flat: Do not resolve URLs, return the immediate result.
169 The following parameters are not used by YoutubeDL itself, they are used by
171 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
172 noresizebuffer, retries, continuedl, noprogress, consoletitle
174 The following options are used by the post processors:
175 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
176 otherwise prefer avconv.
177 exec_cmd: Arbitrary command to run after downloading
183 _download_retcode = None
184 _num_downloads = None
187 def __init__(self, params=None):
188 """Create a FileDownloader object with the given options."""
192 self._ies_instances = {}
194 self._progress_hooks = []
195 self._download_retcode = 0
196 self._num_downloads = 0
197 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
198 self._err_file = sys.stderr
200 self.cache = Cache(self)
202 if params.get('bidi_workaround', False):
205 master, slave = pty.openpty()
206 width = get_term_width()
210 width_args = ['-w', str(width)]
212 stdin=subprocess.PIPE,
214 stderr=self._err_file)
216 self._output_process = subprocess.Popen(
217 ['bidiv'] + width_args, **sp_kwargs
220 self._output_process = subprocess.Popen(
221 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
222 self._output_channel = os.fdopen(master, 'rb')
223 except OSError as ose:
225 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
229 if (sys.version_info >= (3,) and sys.platform != 'win32' and
230 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
231 and not params['restrictfilenames']):
232 # On Python 3, the Unicode filesystem API will throw errors (#1474)
234 'Assuming --restrict-filenames since file system encoding '
235 'cannot encode all charactes. '
236 'Set the LC_ALL environment variable to fix this.')
237 self.params['restrictfilenames'] = True
239 if '%(stitle)s' in self.params.get('outtmpl', ''):
240 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
244 def add_info_extractor(self, ie):
245 """Add an InfoExtractor object to the end of the list."""
247 self._ies_instances[ie.ie_key()] = ie
248 ie.set_downloader(self)
250 def get_info_extractor(self, ie_key):
252 Get an instance of an IE with name ie_key, it will try to get one from
253 the _ies list, if there's no instance it will create a new one and add
254 it to the extractor list.
256 ie = self._ies_instances.get(ie_key)
258 ie = get_info_extractor(ie_key)()
259 self.add_info_extractor(ie)
262 def add_default_info_extractors(self):
264 Add the InfoExtractors returned by gen_extractors to the end of the list
266 for ie in gen_extractors():
267 self.add_info_extractor(ie)
269 def add_post_processor(self, pp):
270 """Add a PostProcessor object to the end of the chain."""
272 pp.set_downloader(self)
274 def add_progress_hook(self, ph):
275 """Add the progress hook (currently only for the file downloader)"""
276 self._progress_hooks.append(ph)
278 def _bidi_workaround(self, message):
279 if not hasattr(self, '_output_channel'):
282 assert hasattr(self, '_output_process')
283 assert isinstance(message, compat_str)
284 line_count = message.count('\n') + 1
285 self._output_process.stdin.write((message + '\n').encode('utf-8'))
286 self._output_process.stdin.flush()
287 res = ''.join(self._output_channel.readline().decode('utf-8')
288 for _ in range(line_count))
289 return res[:-len('\n')]
291 def to_screen(self, message, skip_eol=False):
292 """Print message to stdout if not in quiet mode."""
293 return self.to_stdout(message, skip_eol, check_quiet=True)
295 def _write_string(self, s, out=None):
296 write_string(s, out=out, encoding=self.params.get('encoding'))
298 def to_stdout(self, message, skip_eol=False, check_quiet=False):
299 """Print message to stdout if not in quiet mode."""
300 if self.params.get('logger'):
301 self.params['logger'].debug(message)
302 elif not check_quiet or not self.params.get('quiet', False):
303 message = self._bidi_workaround(message)
304 terminator = ['\n', ''][skip_eol]
305 output = message + terminator
307 self._write_string(output, self._screen_file)
309 def to_stderr(self, message):
310 """Print message to stderr."""
311 assert isinstance(message, compat_str)
312 if self.params.get('logger'):
313 self.params['logger'].error(message)
315 message = self._bidi_workaround(message)
316 output = message + '\n'
317 self._write_string(output, self._err_file)
319 def to_console_title(self, message):
320 if not self.params.get('consoletitle', False):
322 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
323 # c_wchar_p() might not be necessary if `message` is
324 # already of type unicode()
325 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
326 elif 'TERM' in os.environ:
327 self._write_string('\033]0;%s\007' % message, self._screen_file)
329 def save_console_title(self):
330 if not self.params.get('consoletitle', False):
332 if 'TERM' in os.environ:
333 # Save the title on stack
334 self._write_string('\033[22;0t', self._screen_file)
336 def restore_console_title(self):
337 if not self.params.get('consoletitle', False):
339 if 'TERM' in os.environ:
340 # Restore the title from stack
341 self._write_string('\033[23;0t', self._screen_file)
344 self.save_console_title()
347 def __exit__(self, *args):
348 self.restore_console_title()
350 if self.params.get('cookiefile') is not None:
351 self.cookiejar.save()
353 def trouble(self, message=None, tb=None):
354 """Determine action to take when a download problem appears.
356 Depending on if the downloader has been configured to ignore
357 download errors or not, this method may throw an exception or
358 not when errors are found, after printing the message.
360 tb, if given, is additional traceback information.
362 if message is not None:
363 self.to_stderr(message)
364 if self.params.get('verbose'):
366 if sys.exc_info()[0]: # if .trouble has been called from an except block
368 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
369 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
370 tb += compat_str(traceback.format_exc())
372 tb_data = traceback.format_list(traceback.extract_stack())
373 tb = ''.join(tb_data)
375 if not self.params.get('ignoreerrors', False):
376 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
377 exc_info = sys.exc_info()[1].exc_info
379 exc_info = sys.exc_info()
380 raise DownloadError(message, exc_info)
381 self._download_retcode = 1
383 def report_warning(self, message):
385 Print the message to stderr, it will be prefixed with 'WARNING:'
386 If stderr is a tty file the 'WARNING:' will be colored
388 if self.params.get('logger') is not None:
389 self.params['logger'].warning(message)
391 if self.params.get('no_warnings'):
393 if self._err_file.isatty() and os.name != 'nt':
394 _msg_header = '\033[0;33mWARNING:\033[0m'
396 _msg_header = 'WARNING:'
397 warning_message = '%s %s' % (_msg_header, message)
398 self.to_stderr(warning_message)
400 def report_error(self, message, tb=None):
402 Do the same as trouble, but prefixes the message with 'ERROR:', colored
403 in red if stderr is a tty file.
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;31mERROR:\033[0m'
408 _msg_header = 'ERROR:'
409 error_message = '%s %s' % (_msg_header, message)
410 self.trouble(error_message, tb)
412 def report_file_already_downloaded(self, file_name):
413 """Report file has already been fully downloaded."""
415 self.to_screen('[download] %s has already been downloaded' % file_name)
416 except UnicodeEncodeError:
417 self.to_screen('[download] The file has already been downloaded')
419 def prepare_filename(self, info_dict):
420 """Generate the output filename."""
422 template_dict = dict(info_dict)
424 template_dict['epoch'] = int(time.time())
425 autonumber_size = self.params.get('autonumber_size')
426 if autonumber_size is None:
428 autonumber_templ = '%0' + str(autonumber_size) + 'd'
429 template_dict['autonumber'] = autonumber_templ % self._num_downloads
430 if template_dict.get('playlist_index') is not None:
431 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
432 if template_dict.get('resolution') is None:
433 if template_dict.get('width') and template_dict.get('height'):
434 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
435 elif template_dict.get('height'):
436 template_dict['resolution'] = '%sp' % template_dict['height']
437 elif template_dict.get('width'):
438 template_dict['resolution'] = '?x%d' % template_dict['width']
440 sanitize = lambda k, v: sanitize_filename(
442 restricted=self.params.get('restrictfilenames'),
444 template_dict = dict((k, sanitize(k, v))
445 for k, v in template_dict.items()
447 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
449 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
450 tmpl = os.path.expanduser(outtmpl)
451 filename = tmpl % template_dict
453 except ValueError as err:
454 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
457 def _match_entry(self, info_dict):
458 """ Returns None iff the file should be downloaded """
460 video_title = info_dict.get('title', info_dict.get('id', 'video'))
461 if 'title' in info_dict:
462 # This can happen when we're just evaluating the playlist
463 title = info_dict['title']
464 matchtitle = self.params.get('matchtitle', False)
466 if not re.search(matchtitle, title, re.IGNORECASE):
467 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
468 rejecttitle = self.params.get('rejecttitle', False)
470 if re.search(rejecttitle, title, re.IGNORECASE):
471 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
472 date = info_dict.get('upload_date', None)
474 dateRange = self.params.get('daterange', DateRange())
475 if date not in dateRange:
476 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
477 view_count = info_dict.get('view_count', None)
478 if view_count is not None:
479 min_views = self.params.get('min_views')
480 if min_views is not None and view_count < min_views:
481 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
482 max_views = self.params.get('max_views')
483 if max_views is not None and view_count > max_views:
484 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
485 age_limit = self.params.get('age_limit')
486 if age_limit is not None:
487 actual_age_limit = info_dict.get('age_limit')
488 if actual_age_limit is None:
490 if age_limit < actual_age_limit:
491 return 'Skipping "' + title + '" because it is age restricted'
492 if self.in_download_archive(info_dict):
493 return '%s has already been recorded in archive' % video_title
497 def add_extra_info(info_dict, extra_info):
498 '''Set the keys from extra_info in info dict if they are missing'''
499 for key, value in extra_info.items():
500 info_dict.setdefault(key, value)
502 def extract_info(self, url, download=True, ie_key=None, extra_info={},
505 Returns a list with a dictionary for each video we find.
506 If 'download', also downloads the videos.
507 extra_info is a dict containing the extra values to add to each result
511 ies = [self.get_info_extractor(ie_key)]
516 if not ie.suitable(url):
520 self.report_warning('The program functionality for this site has been marked as broken, '
521 'and will probably not work.')
524 ie_result = ie.extract(url)
525 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
527 if isinstance(ie_result, list):
528 # Backwards compatibility: old IE result format
530 '_type': 'compat_list',
531 'entries': ie_result,
533 self.add_default_extra_info(ie_result, ie, url)
535 return self.process_ie_result(ie_result, download, extra_info)
538 except ExtractorError as de: # An error we somewhat expected
539 self.report_error(compat_str(de), de.format_traceback())
541 except MaxDownloadsReached:
543 except Exception as e:
544 if self.params.get('ignoreerrors', False):
545 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
550 self.report_error('no suitable InfoExtractor for URL %s' % url)
552 def add_default_extra_info(self, ie_result, ie, url):
553 self.add_extra_info(ie_result, {
554 'extractor': ie.IE_NAME,
556 'webpage_url_basename': url_basename(url),
557 'extractor_key': ie.ie_key(),
560 def process_ie_result(self, ie_result, download=True, extra_info={}):
562 Take the result of the ie(may be modified) and resolve all unresolved
563 references (URLs, playlist items).
565 It will also download the videos if 'download'.
566 Returns the resolved ie_result.
569 result_type = ie_result.get('_type', 'video')
571 if self.params.get('extract_flat', False):
572 if result_type in ('url', 'url_transparent'):
575 if result_type == 'video':
576 self.add_extra_info(ie_result, extra_info)
577 return self.process_video_result(ie_result, download=download)
578 elif result_type == 'url':
579 # We have to add extra_info to the results because it may be
580 # contained in a playlist
581 return self.extract_info(ie_result['url'],
583 ie_key=ie_result.get('ie_key'),
584 extra_info=extra_info)
585 elif result_type == 'url_transparent':
586 # Use the information from the embedding page
587 info = self.extract_info(
588 ie_result['url'], ie_key=ie_result.get('ie_key'),
589 extra_info=extra_info, download=False, process=False)
591 def make_result(embedded_info):
592 new_result = ie_result.copy()
593 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
594 'entries', 'ie_key', 'duration',
595 'subtitles', 'annotations', 'format',
596 'thumbnail', 'thumbnails'):
599 if f in embedded_info:
600 new_result[f] = embedded_info[f]
602 new_result = make_result(info)
604 assert new_result.get('_type') != 'url_transparent'
605 if new_result.get('_type') == 'compat_list':
606 new_result['entries'] = [
607 make_result(e) for e in new_result['entries']]
609 return self.process_ie_result(
610 new_result, download=download, extra_info=extra_info)
611 elif result_type == 'playlist':
612 # We process each entry in the playlist
613 playlist = ie_result.get('title', None) or ie_result.get('id', None)
614 self.to_screen('[download] Downloading playlist: %s' % playlist)
616 playlist_results = []
618 playliststart = self.params.get('playliststart', 1) - 1
619 playlistend = self.params.get('playlistend', None)
620 # For backwards compatibility, interpret -1 as whole list
621 if playlistend == -1:
624 if isinstance(ie_result['entries'], list):
625 n_all_entries = len(ie_result['entries'])
626 entries = ie_result['entries'][playliststart:playlistend]
627 n_entries = len(entries)
629 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
630 (ie_result['extractor'], playlist, n_all_entries, n_entries))
632 assert isinstance(ie_result['entries'], PagedList)
633 entries = ie_result['entries'].getslice(
634 playliststart, playlistend)
635 n_entries = len(entries)
637 "[%s] playlist %s: Downloading %d videos" %
638 (ie_result['extractor'], playlist, n_entries))
640 for i, entry in enumerate(entries, 1):
641 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
643 'n_entries': n_entries,
644 'playlist': playlist,
645 'playlist_index': i + playliststart,
646 'extractor': ie_result['extractor'],
647 'webpage_url': ie_result['webpage_url'],
648 'webpage_url_basename': url_basename(ie_result['webpage_url']),
649 'extractor_key': ie_result['extractor_key'],
652 reason = self._match_entry(entry)
653 if reason is not None:
654 self.to_screen('[download] ' + reason)
657 entry_result = self.process_ie_result(entry,
660 playlist_results.append(entry_result)
661 ie_result['entries'] = playlist_results
663 elif result_type == 'compat_list':
665 self.add_extra_info(r,
667 'extractor': ie_result['extractor'],
668 'webpage_url': ie_result['webpage_url'],
669 'webpage_url_basename': url_basename(ie_result['webpage_url']),
670 'extractor_key': ie_result['extractor_key'],
673 ie_result['entries'] = [
674 self.process_ie_result(_fixup(r), download, extra_info)
675 for r in ie_result['entries']
679 raise Exception('Invalid result type: %s' % result_type)
681 def select_format(self, format_spec, available_formats):
682 if format_spec == 'best' or format_spec is None:
683 return available_formats[-1]
684 elif format_spec == 'worst':
685 return available_formats[0]
686 elif format_spec == 'bestaudio':
688 f for f in available_formats
689 if f.get('vcodec') == 'none']
691 return audio_formats[-1]
692 elif format_spec == 'worstaudio':
694 f for f in available_formats
695 if f.get('vcodec') == 'none']
697 return audio_formats[0]
698 elif format_spec == 'bestvideo':
700 f for f in available_formats
701 if f.get('acodec') == 'none']
703 return video_formats[-1]
704 elif format_spec == 'worstvideo':
706 f for f in available_formats
707 if f.get('acodec') == 'none']
709 return video_formats[0]
711 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
712 if format_spec in extensions:
713 filter_f = lambda f: f['ext'] == format_spec
715 filter_f = lambda f: f['format_id'] == format_spec
716 matches = list(filter(filter_f, available_formats))
721 def process_video_result(self, info_dict, download=True):
722 assert info_dict.get('_type', 'video') == 'video'
724 if 'id' not in info_dict:
725 raise ExtractorError('Missing "id" field in extractor result')
726 if 'title' not in info_dict:
727 raise ExtractorError('Missing "title" field in extractor result')
729 if 'playlist' not in info_dict:
730 # It isn't part of a playlist
731 info_dict['playlist'] = None
732 info_dict['playlist_index'] = None
734 thumbnails = info_dict.get('thumbnails')
736 thumbnails.sort(key=lambda t: (
737 t.get('width'), t.get('height'), t.get('url')))
739 if 'width' in t and 'height' in t:
740 t['resolution'] = '%dx%d' % (t['width'], t['height'])
742 if thumbnails and 'thumbnail' not in info_dict:
743 info_dict['thumbnail'] = thumbnails[-1]['url']
745 if 'display_id' not in info_dict and 'id' in info_dict:
746 info_dict['display_id'] = info_dict['id']
748 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
749 upload_date = datetime.datetime.utcfromtimestamp(
750 info_dict['timestamp'])
751 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
753 # This extractors handle format selection themselves
754 if info_dict['extractor'] in ['Youku']:
756 self.process_info(info_dict)
759 # We now pick which formats have to be downloaded
760 if info_dict.get('formats') is None:
761 # There's only one format available
762 formats = [info_dict]
764 formats = info_dict['formats']
767 raise ExtractorError('No video formats found!')
769 # We check that all the formats have the format and format_id fields
770 for i, format in enumerate(formats):
771 if 'url' not in format:
772 raise ExtractorError('Missing "url" key in result (index %d)' % i)
774 if format.get('format_id') is None:
775 format['format_id'] = compat_str(i)
776 if format.get('format') is None:
777 format['format'] = '{id} - {res}{note}'.format(
778 id=format['format_id'],
779 res=self.format_resolution(format),
780 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
782 # Automatically determine file extension if missing
783 if 'ext' not in format:
784 format['ext'] = determine_ext(format['url']).lower()
786 format_limit = self.params.get('format_limit', None)
788 formats = list(takewhile_inclusive(
789 lambda f: f['format_id'] != format_limit, formats
792 # TODO Central sorting goes here
794 if formats[0] is not info_dict:
795 # only set the 'formats' fields if the original info_dict list them
796 # otherwise we end up with a circular reference, the first (and unique)
797 # element in the 'formats' field in info_dict is info_dict itself,
798 # wich can't be exported to json
799 info_dict['formats'] = formats
800 if self.params.get('listformats', None):
801 self.list_formats(info_dict)
804 req_format = self.params.get('format')
805 if req_format is None:
807 formats_to_download = []
808 # The -1 is for supporting YoutubeIE
809 if req_format in ('-1', 'all'):
810 formats_to_download = formats
812 for rfstr in req_format.split(','):
813 # We can accept formats requested in the format: 34/5/best, we pick
814 # the first that is available, starting from left
815 req_formats = rfstr.split('/')
816 for rf in req_formats:
817 if re.match(r'.+?\+.+?', rf) is not None:
818 # Two formats have been requested like '137+139'
819 format_1, format_2 = rf.split('+')
820 formats_info = (self.select_format(format_1, formats),
821 self.select_format(format_2, formats))
822 if all(formats_info):
824 'requested_formats': formats_info,
826 'ext': formats_info[0]['ext'],
829 selected_format = None
831 selected_format = self.select_format(rf, formats)
832 if selected_format is not None:
833 formats_to_download.append(selected_format)
835 if not formats_to_download:
836 raise ExtractorError('requested format not available',
840 if len(formats_to_download) > 1:
841 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
842 for format in formats_to_download:
843 new_info = dict(info_dict)
844 new_info.update(format)
845 self.process_info(new_info)
846 # We update the info dict with the best quality format (backwards compatibility)
847 info_dict.update(formats_to_download[-1])
850 def process_info(self, info_dict):
851 """Process a single resolved IE result."""
853 assert info_dict.get('_type', 'video') == 'video'
855 max_downloads = self.params.get('max_downloads')
856 if max_downloads is not None:
857 if self._num_downloads >= int(max_downloads):
858 raise MaxDownloadsReached()
860 info_dict['fulltitle'] = info_dict['title']
861 if len(info_dict['title']) > 200:
862 info_dict['title'] = info_dict['title'][:197] + '...'
864 # Keep for backwards compatibility
865 info_dict['stitle'] = info_dict['title']
867 if 'format' not in info_dict:
868 info_dict['format'] = info_dict['ext']
870 reason = self._match_entry(info_dict)
871 if reason is not None:
872 self.to_screen('[download] ' + reason)
875 self._num_downloads += 1
877 filename = self.prepare_filename(info_dict)
880 if self.params.get('forcetitle', False):
881 self.to_stdout(info_dict['fulltitle'])
882 if self.params.get('forceid', False):
883 self.to_stdout(info_dict['id'])
884 if self.params.get('forceurl', False):
885 # For RTMP URLs, also include the playpath
886 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
887 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
888 self.to_stdout(info_dict['thumbnail'])
889 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
890 self.to_stdout(info_dict['description'])
891 if self.params.get('forcefilename', False) and filename is not None:
892 self.to_stdout(filename)
893 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
894 self.to_stdout(formatSeconds(info_dict['duration']))
895 if self.params.get('forceformat', False):
896 self.to_stdout(info_dict['format'])
897 if self.params.get('forcejson', False):
898 info_dict['_filename'] = filename
899 self.to_stdout(json.dumps(info_dict))
901 # Do nothing else if in simulate mode
902 if self.params.get('simulate', False):
909 dn = os.path.dirname(encodeFilename(filename))
910 if dn and not os.path.exists(dn):
912 except (OSError, IOError) as err:
913 self.report_error('unable to create directory ' + compat_str(err))
916 if self.params.get('writedescription', False):
917 descfn = filename + '.description'
918 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
919 self.to_screen('[info] Video description is already present')
922 self.to_screen('[info] Writing video description to: ' + descfn)
923 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
924 descfile.write(info_dict['description'])
925 except (KeyError, TypeError):
926 self.report_warning('There\'s no description to write.')
927 except (OSError, IOError):
928 self.report_error('Cannot write description file ' + descfn)
931 if self.params.get('writeannotations', False):
932 annofn = filename + '.annotations.xml'
933 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
934 self.to_screen('[info] Video annotations are already present')
937 self.to_screen('[info] Writing video annotations to: ' + annofn)
938 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
939 annofile.write(info_dict['annotations'])
940 except (KeyError, TypeError):
941 self.report_warning('There are no annotations to write.')
942 except (OSError, IOError):
943 self.report_error('Cannot write annotations file: ' + annofn)
946 subtitles_are_requested = any([self.params.get('writesubtitles', False),
947 self.params.get('writeautomaticsub')])
949 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
950 # subtitles download errors are already managed as troubles in relevant IE
951 # that way it will silently go on when used with unsupporting IE
952 subtitles = info_dict['subtitles']
953 sub_format = self.params.get('subtitlesformat', 'srt')
954 for sub_lang in subtitles.keys():
955 sub = subtitles[sub_lang]
959 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
960 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
961 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
963 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
964 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
966 except (OSError, IOError):
967 self.report_error('Cannot write subtitles file ' + sub_filename)
970 if self.params.get('writeinfojson', False):
971 infofn = os.path.splitext(filename)[0] + '.info.json'
972 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
973 self.to_screen('[info] Video description metadata is already present')
975 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
977 write_json_file(info_dict, encodeFilename(infofn))
978 except (OSError, IOError):
979 self.report_error('Cannot write metadata to JSON file ' + infofn)
982 if self.params.get('writethumbnail', False):
983 if info_dict.get('thumbnail') is not None:
984 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
985 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
986 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
987 self.to_screen('[%s] %s: Thumbnail is already present' %
988 (info_dict['extractor'], info_dict['id']))
990 self.to_screen('[%s] %s: Downloading thumbnail ...' %
991 (info_dict['extractor'], info_dict['id']))
993 uf = self.urlopen(info_dict['thumbnail'])
994 with open(thumb_filename, 'wb') as thumbf:
995 shutil.copyfileobj(uf, thumbf)
996 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
997 (info_dict['extractor'], info_dict['id'], thumb_filename))
998 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
999 self.report_warning('Unable to download thumbnail "%s": %s' %
1000 (info_dict['thumbnail'], compat_str(err)))
1002 if not self.params.get('skip_download', False):
1003 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1008 fd = get_suitable_downloader(info)(self, self.params)
1009 for ph in self._progress_hooks:
1010 fd.add_progress_hook(ph)
1011 if self.params.get('verbose'):
1012 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1013 return fd.download(name, info)
1014 if info_dict.get('requested_formats') is not None:
1017 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1018 if not merger._get_executable():
1020 self.report_warning('You have requested multiple '
1021 'formats but ffmpeg or avconv are not installed.'
1022 ' The formats won\'t be merged')
1024 postprocessors = [merger]
1025 for f in info_dict['requested_formats']:
1026 new_info = dict(info_dict)
1028 fname = self.prepare_filename(new_info)
1029 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1030 downloaded.append(fname)
1031 partial_success = dl(fname, new_info)
1032 success = success and partial_success
1033 info_dict['__postprocessors'] = postprocessors
1034 info_dict['__files_to_merge'] = downloaded
1036 # Just a single file
1037 success = dl(filename, info_dict)
1038 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1039 self.report_error('unable to download video data: %s' % str(err))
1041 except (OSError, IOError) as err:
1042 raise UnavailableVideoError(err)
1043 except (ContentTooShortError, ) as err:
1044 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1049 self.post_process(filename, info_dict)
1050 except (PostProcessingError) as err:
1051 self.report_error('postprocessing: %s' % str(err))
1054 self.record_download_archive(info_dict)
1056 def download(self, url_list):
1057 """Download a given list of URLs."""
1058 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1059 if (len(url_list) > 1 and
1061 and self.params.get('max_downloads') != 1):
1062 raise SameFileError(outtmpl)
1064 for url in url_list:
1066 #It also downloads the videos
1067 self.extract_info(url)
1068 except UnavailableVideoError:
1069 self.report_error('unable to download video')
1070 except MaxDownloadsReached:
1071 self.to_screen('[info] Maximum number of downloaded files reached.')
1074 return self._download_retcode
1076 def download_with_info_file(self, info_filename):
1077 with io.open(info_filename, 'r', encoding='utf-8') as f:
1080 self.process_ie_result(info, download=True)
1081 except DownloadError:
1082 webpage_url = info.get('webpage_url')
1083 if webpage_url is not None:
1084 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1085 return self.download([webpage_url])
1088 return self._download_retcode
1090 def post_process(self, filename, ie_info):
1091 """Run all the postprocessors on the given file."""
1092 info = dict(ie_info)
1093 info['filepath'] = filename
1096 if ie_info.get('__postprocessors') is not None:
1097 pps_chain.extend(ie_info['__postprocessors'])
1098 pps_chain.extend(self._pps)
1099 for pp in pps_chain:
1101 keep_video_wish, new_info = pp.run(info)
1102 if keep_video_wish is not None:
1104 keep_video = keep_video_wish
1105 elif keep_video is None:
1106 # No clear decision yet, let IE decide
1107 keep_video = keep_video_wish
1108 except PostProcessingError as e:
1109 self.report_error(e.msg)
1110 if keep_video is False and not self.params.get('keepvideo', False):
1112 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1113 os.remove(encodeFilename(filename))
1114 except (IOError, OSError):
1115 self.report_warning('Unable to remove downloaded video file')
1117 def _make_archive_id(self, info_dict):
1118 # Future-proof against any change in case
1119 # and backwards compatibility with prior versions
1120 extractor = info_dict.get('extractor_key')
1121 if extractor is None:
1122 if 'id' in info_dict:
1123 extractor = info_dict.get('ie_key') # key in a playlist
1124 if extractor is None:
1125 return None # Incomplete video information
1126 return extractor.lower() + ' ' + info_dict['id']
1128 def in_download_archive(self, info_dict):
1129 fn = self.params.get('download_archive')
1133 vid_id = self._make_archive_id(info_dict)
1135 return False # Incomplete video information
1138 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1139 for line in archive_file:
1140 if line.strip() == vid_id:
1142 except IOError as ioe:
1143 if ioe.errno != errno.ENOENT:
1147 def record_download_archive(self, info_dict):
1148 fn = self.params.get('download_archive')
1151 vid_id = self._make_archive_id(info_dict)
1153 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1154 archive_file.write(vid_id + '\n')
1157 def format_resolution(format, default='unknown'):
1158 if format.get('vcodec') == 'none':
1160 if format.get('resolution') is not None:
1161 return format['resolution']
1162 if format.get('height') is not None:
1163 if format.get('width') is not None:
1164 res = '%sx%s' % (format['width'], format['height'])
1166 res = '%sp' % format['height']
1167 elif format.get('width') is not None:
1168 res = '?x%d' % format['width']
1173 def _format_note(self, fdict):
1175 if fdict.get('ext') in ['f4f', 'f4m']:
1176 res += '(unsupported) '
1177 if fdict.get('format_note') is not None:
1178 res += fdict['format_note'] + ' '
1179 if fdict.get('tbr') is not None:
1180 res += '%4dk ' % fdict['tbr']
1181 if fdict.get('container') is not None:
1184 res += '%s container' % fdict['container']
1185 if (fdict.get('vcodec') is not None and
1186 fdict.get('vcodec') != 'none'):
1189 res += fdict['vcodec']
1190 if fdict.get('vbr') is not None:
1192 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1194 if fdict.get('vbr') is not None:
1195 res += '%4dk' % fdict['vbr']
1196 if fdict.get('acodec') is not None:
1199 if fdict['acodec'] == 'none':
1202 res += '%-5s' % fdict['acodec']
1203 elif fdict.get('abr') is not None:
1207 if fdict.get('abr') is not None:
1208 res += '@%3dk' % fdict['abr']
1209 if fdict.get('asr') is not None:
1210 res += ' (%5dHz)' % fdict['asr']
1211 if fdict.get('filesize') is not None:
1214 res += format_bytes(fdict['filesize'])
1215 elif fdict.get('filesize_approx') is not None:
1218 res += '~' + format_bytes(fdict['filesize_approx'])
1221 def list_formats(self, info_dict):
1222 def line(format, idlen=20):
1223 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1224 format['format_id'],
1226 self.format_resolution(format),
1227 self._format_note(format),
1230 formats = info_dict.get('formats', [info_dict])
1231 idlen = max(len('format code'),
1232 max(len(f['format_id']) for f in formats))
1233 formats_s = [line(f, idlen) for f in formats]
1234 if len(formats) > 1:
1235 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1236 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1238 header_line = line({
1239 'format_id': 'format code', 'ext': 'extension',
1240 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1241 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1242 (info_dict['id'], header_line, '\n'.join(formats_s)))
1244 def urlopen(self, req):
1245 """ Start an HTTP download """
1247 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1248 # always respected by websites, some tend to give out URLs with non percent-encoded
1249 # non-ASCII characters (see telemb.py, ard.py [#3412])
1250 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1251 # To work around aforementioned issue we will replace request's original URL with
1252 # percent-encoded one
1253 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1254 url = req if req_is_string else req.get_full_url()
1255 url_escaped = escape_url(url)
1257 # Substitute URL if any change after escaping
1258 if url != url_escaped:
1262 req = compat_urllib_request.Request(
1263 url_escaped, data=req.data, headers=req.headers,
1264 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1266 return self._opener.open(req, timeout=self._socket_timeout)
1268 def print_debug_header(self):
1269 if not self.params.get('verbose'):
1272 if type('') is not compat_str:
1273 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1274 self.report_warning(
1275 'Your Python is broken! Update to a newer and supported version')
1278 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1279 locale.getpreferredencoding(),
1280 sys.getfilesystemencoding(),
1281 sys.stdout.encoding,
1282 self.get_encoding()))
1283 write_string(encoding_str, encoding=None)
1285 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1287 sp = subprocess.Popen(
1288 ['git', 'rev-parse', '--short', 'HEAD'],
1289 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1290 cwd=os.path.dirname(os.path.abspath(__file__)))
1291 out, err = sp.communicate()
1292 out = out.decode().strip()
1293 if re.match('[0-9a-f]+', out):
1294 self._write_string('[debug] Git HEAD: ' + out + '\n')
1300 self._write_string('[debug] Python version %s - %s' %
1301 (platform.python_version(), platform_name()) + '\n')
1304 for handler in self._opener.handlers:
1305 if hasattr(handler, 'proxies'):
1306 proxy_map.update(handler.proxies)
1307 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1309 def _setup_opener(self):
1310 timeout_val = self.params.get('socket_timeout')
1311 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1313 opts_cookiefile = self.params.get('cookiefile')
1314 opts_proxy = self.params.get('proxy')
1316 if opts_cookiefile is None:
1317 self.cookiejar = compat_cookiejar.CookieJar()
1319 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1321 if os.access(opts_cookiefile, os.R_OK):
1322 self.cookiejar.load()
1324 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1326 if opts_proxy is not None:
1327 if opts_proxy == '':
1330 proxies = {'http': opts_proxy, 'https': opts_proxy}
1332 proxies = compat_urllib_request.getproxies()
1333 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1334 if 'http' in proxies and 'https' not in proxies:
1335 proxies['https'] = proxies['http']
1336 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1338 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1339 https_handler = make_HTTPS_handler(
1340 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1341 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1342 opener = compat_urllib_request.build_opener(
1343 https_handler, proxy_handler, cookie_processor, ydlh)
1344 # Delete the default user-agent header, which would otherwise apply in
1345 # cases where our custom HTTP handler doesn't come into play
1346 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1347 opener.addheaders = []
1348 self._opener = opener
1350 def encode(self, s):
1351 if isinstance(s, bytes):
1352 return s # Already encoded
1355 return s.encode(self.get_encoding())
1356 except UnicodeEncodeError as err:
1357 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1360 def get_encoding(self):
1361 encoding = self.params.get('encoding')
1362 if encoding is None:
1363 encoding = preferredencoding()