2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
29 compat_urllib_request,
51 UnavailableVideoError,
58 from .extractor import get_info_extractor, gen_extractors
59 from .downloader import get_suitable_downloader
60 from .postprocessor import FFmpegMergerPP
61 from .version import __version__
64 class YoutubeDL(object):
67 YoutubeDL objects are the ones responsible of downloading the
68 actual video file and writing it to disk if the user has requested
69 it, among some other tasks. In most cases there should be one per
70 program. As, given a video URL, the downloader doesn't know how to
71 extract all the needed information, task that InfoExtractors do, it
72 has to pass the URL to one of them.
74 For this, YoutubeDL objects have a method that allows
75 InfoExtractors to be registered in a given order. When it is passed
76 a URL, the YoutubeDL object handles it to the first InfoExtractor it
77 finds that reports being able to handle it. The InfoExtractor extracts
78 all the information about the video or videos the URL refers to, and
79 YoutubeDL process the extracted information, possibly using a File
80 Downloader to download the video.
82 YoutubeDL objects accept a lot of parameters. In order not to saturate
83 the object constructor with arguments, it receives a dictionary of
84 options instead. These options are available through the params
85 attribute for the InfoExtractors to use. The YoutubeDL also
86 registers itself as the downloader in charge for the InfoExtractors
87 that are added to it, so this is a "mutual registration".
91 username: Username for authentication purposes.
92 password: Password for authentication purposes.
93 videopassword: Password for acces a video.
94 usenetrc: Use netrc for authentication instead.
95 verbose: Print additional info to stdout.
96 quiet: Do not print messages to stdout.
97 forceurl: Force printing final URL.
98 forcetitle: Force printing title.
99 forceid: Force printing ID.
100 forcethumbnail: Force printing thumbnail URL.
101 forcedescription: Force printing description.
102 forcefilename: Force printing final filename.
103 forceduration: Force printing duration.
104 forcejson: Force printing info_dict as JSON.
105 simulate: Do not download the video files.
106 format: Video format code.
107 format_limit: Highest quality format to try.
108 outtmpl: Template for output names.
109 restrictfilenames: Do not allow "&" and spaces in file names
110 ignoreerrors: Do not stop on download errors.
111 nooverwrites: Prevent overwriting files.
112 playliststart: Playlist item to start at.
113 playlistend: Playlist item to end at.
114 matchtitle: Download only matching titles.
115 rejecttitle: Reject downloads for matching titles.
116 logger: Log messages to a logging.Logger instance.
117 logtostderr: Log messages to stderr instead of stdout.
118 writedescription: Write the video description to a .description file
119 writeinfojson: Write the video description to a .info.json file
120 writeannotations: Write the video annotations to a .annotations.xml file
121 writethumbnail: Write the thumbnail image to a file
122 writesubtitles: Write the video subtitles to a file
123 writeautomaticsub: Write the automatic subtitles to a file
124 allsubtitles: Downloads all the subtitles of the video
125 (requires writesubtitles or writeautomaticsub)
126 listsubtitles: Lists all available subtitles for the video
127 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
128 subtitleslangs: List of languages of the subtitles to download
129 keepvideo: Keep the video file after post-processing
130 daterange: A DateRange object, download only if the upload_date is in the range.
131 skip_download: Skip the actual download of the video file
132 cachedir: Location of the cache files in the filesystem.
133 None to disable filesystem cache.
134 noplaylist: Download single video instead of a playlist if in doubt.
135 age_limit: An integer representing the user's age in years.
136 Unsuitable videos for the given age are skipped.
137 min_views: An integer representing the minimum view count the video
138 must have in order to not be skipped.
139 Videos without view count information are always
140 downloaded. None for no limit.
141 max_views: An integer representing the maximum view count.
142 Videos that are more popular than that are not
144 Videos without view count information are always
145 downloaded. None for no limit.
146 download_archive: File name of a file where all downloads are recorded.
147 Videos already present in the file are not downloaded
149 cookiefile: File name where cookies should be read from and dumped to.
150 nocheckcertificate:Do not verify SSL certificates
151 proxy: URL of the proxy server to use
152 socket_timeout: Time to wait for unresponsive hosts, in seconds
153 bidi_workaround: Work around buggy terminals without bidirectional text
154 support, using fridibi
155 debug_printtraffic:Print out sent and received HTTP traffic
156 include_ads: Download ads as well
157 default_search: Prepend this string if an input url is not valid.
158 'auto' for elaborate guessing
160 The following parameters are not used by YoutubeDL itself, they are used by
162 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
163 noresizebuffer, retries, continuedl, noprogress, consoletitle
165 The following options are used by the post processors:
166 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
167 otherwise prefer avconv.
173 _download_retcode = None
174 _num_downloads = None
177 def __init__(self, params=None):
178 """Create a FileDownloader object with the given options."""
182 self._ies_instances = {}
184 self._progress_hooks = []
185 self._download_retcode = 0
186 self._num_downloads = 0
187 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
188 self._err_file = sys.stderr
191 if params.get('bidi_workaround', False):
194 master, slave = pty.openpty()
195 width = get_term_width()
199 width_args = ['-w', str(width)]
201 stdin=subprocess.PIPE,
203 stderr=self._err_file)
205 self._output_process = subprocess.Popen(
206 ['bidiv'] + width_args, **sp_kwargs
209 self._output_process = subprocess.Popen(
210 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
211 self._output_channel = os.fdopen(master, 'rb')
212 except OSError as ose:
214 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
218 if (sys.version_info >= (3,) and sys.platform != 'win32' and
219 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
220 and not params['restrictfilenames']):
221 # On Python 3, the Unicode filesystem API will throw errors (#1474)
223 'Assuming --restrict-filenames since file system encoding '
224 'cannot encode all charactes. '
225 'Set the LC_ALL environment variable to fix this.')
226 self.params['restrictfilenames'] = True
228 if '%(stitle)s' in self.params.get('outtmpl', ''):
229 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
233 def add_info_extractor(self, ie):
234 """Add an InfoExtractor object to the end of the list."""
236 self._ies_instances[ie.ie_key()] = ie
237 ie.set_downloader(self)
239 def get_info_extractor(self, ie_key):
241 Get an instance of an IE with name ie_key, it will try to get one from
242 the _ies list, if there's no instance it will create a new one and add
243 it to the extractor list.
245 ie = self._ies_instances.get(ie_key)
247 ie = get_info_extractor(ie_key)()
248 self.add_info_extractor(ie)
251 def add_default_info_extractors(self):
253 Add the InfoExtractors returned by gen_extractors to the end of the list
255 for ie in gen_extractors():
256 self.add_info_extractor(ie)
258 def add_post_processor(self, pp):
259 """Add a PostProcessor object to the end of the chain."""
261 pp.set_downloader(self)
263 def add_progress_hook(self, ph):
264 """Add the progress hook (currently only for the file downloader)"""
265 self._progress_hooks.append(ph)
267 def _bidi_workaround(self, message):
268 if not hasattr(self, '_output_channel'):
271 assert hasattr(self, '_output_process')
272 assert type(message) == type('')
273 line_count = message.count('\n') + 1
274 self._output_process.stdin.write((message + '\n').encode('utf-8'))
275 self._output_process.stdin.flush()
276 res = ''.join(self._output_channel.readline().decode('utf-8')
277 for _ in range(line_count))
278 return res[:-len('\n')]
280 def to_screen(self, message, skip_eol=False):
281 """Print message to stdout if not in quiet mode."""
282 return self.to_stdout(message, skip_eol, check_quiet=True)
284 def to_stdout(self, message, skip_eol=False, check_quiet=False):
285 """Print message to stdout if not in quiet mode."""
286 if self.params.get('logger'):
287 self.params['logger'].debug(message)
288 elif not check_quiet or not self.params.get('quiet', False):
289 message = self._bidi_workaround(message)
290 terminator = ['\n', ''][skip_eol]
291 output = message + terminator
293 write_string(output, self._screen_file)
295 def to_stderr(self, message):
296 """Print message to stderr."""
297 assert type(message) == type('')
298 if self.params.get('logger'):
299 self.params['logger'].error(message)
301 message = self._bidi_workaround(message)
302 output = message + '\n'
303 write_string(output, self._err_file)
305 def to_console_title(self, message):
306 if not self.params.get('consoletitle', False):
308 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
309 # c_wchar_p() might not be necessary if `message` is
310 # already of type unicode()
311 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
312 elif 'TERM' in os.environ:
313 write_string('\033]0;%s\007' % message, self._screen_file)
315 def save_console_title(self):
316 if not self.params.get('consoletitle', False):
318 if 'TERM' in os.environ:
319 # Save the title on stack
320 write_string('\033[22;0t', self._screen_file)
322 def restore_console_title(self):
323 if not self.params.get('consoletitle', False):
325 if 'TERM' in os.environ:
326 # Restore the title from stack
327 write_string('\033[23;0t', self._screen_file)
330 self.save_console_title()
333 def __exit__(self, *args):
334 self.restore_console_title()
336 if self.params.get('cookiefile') is not None:
337 self.cookiejar.save()
339 def trouble(self, message=None, tb=None):
340 """Determine action to take when a download problem appears.
342 Depending on if the downloader has been configured to ignore
343 download errors or not, this method may throw an exception or
344 not when errors are found, after printing the message.
346 tb, if given, is additional traceback information.
348 if message is not None:
349 self.to_stderr(message)
350 if self.params.get('verbose'):
352 if sys.exc_info()[0]: # if .trouble has been called from an except block
354 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
355 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
356 tb += compat_str(traceback.format_exc())
358 tb_data = traceback.format_list(traceback.extract_stack())
359 tb = ''.join(tb_data)
361 if not self.params.get('ignoreerrors', False):
362 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
363 exc_info = sys.exc_info()[1].exc_info
365 exc_info = sys.exc_info()
366 raise DownloadError(message, exc_info)
367 self._download_retcode = 1
369 def report_warning(self, message):
371 Print the message to stderr, it will be prefixed with 'WARNING:'
372 If stderr is a tty file the 'WARNING:' will be colored
374 if self.params.get('logger') is not None:
375 self.params['logger'].warning(message)
377 if self._err_file.isatty() and os.name != 'nt':
378 _msg_header = '\033[0;33mWARNING:\033[0m'
380 _msg_header = 'WARNING:'
381 warning_message = '%s %s' % (_msg_header, message)
382 self.to_stderr(warning_message)
384 def report_error(self, message, tb=None):
386 Do the same as trouble, but prefixes the message with 'ERROR:', colored
387 in red if stderr is a tty file.
389 if self._err_file.isatty() and os.name != 'nt':
390 _msg_header = '\033[0;31mERROR:\033[0m'
392 _msg_header = 'ERROR:'
393 error_message = '%s %s' % (_msg_header, message)
394 self.trouble(error_message, tb)
396 def report_file_already_downloaded(self, file_name):
397 """Report file has already been fully downloaded."""
399 self.to_screen('[download] %s has already been downloaded' % file_name)
400 except UnicodeEncodeError:
401 self.to_screen('[download] The file has already been downloaded')
403 def prepare_filename(self, info_dict):
404 """Generate the output filename."""
406 template_dict = dict(info_dict)
408 template_dict['epoch'] = int(time.time())
409 autonumber_size = self.params.get('autonumber_size')
410 if autonumber_size is None:
412 autonumber_templ = '%0' + str(autonumber_size) + 'd'
413 template_dict['autonumber'] = autonumber_templ % self._num_downloads
414 if template_dict.get('playlist_index') is not None:
415 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
416 if template_dict.get('resolution') is None:
417 if template_dict.get('width') and template_dict.get('height'):
418 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
419 elif template_dict.get('height'):
420 template_dict['resolution'] = '%sp' % template_dict['height']
421 elif template_dict.get('width'):
422 template_dict['resolution'] = '?x%d' % template_dict['width']
424 sanitize = lambda k, v: sanitize_filename(
426 restricted=self.params.get('restrictfilenames'),
428 template_dict = dict((k, sanitize(k, v))
429 for k, v in template_dict.items()
431 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
433 tmpl = os.path.expanduser(self.params['outtmpl'])
434 filename = tmpl % template_dict
436 except ValueError as err:
437 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
440 def _match_entry(self, info_dict):
441 """ Returns None iff the file should be downloaded """
443 video_title = info_dict.get('title', info_dict.get('id', 'video'))
444 if 'title' in info_dict:
445 # This can happen when we're just evaluating the playlist
446 title = info_dict['title']
447 matchtitle = self.params.get('matchtitle', False)
449 if not re.search(matchtitle, title, re.IGNORECASE):
450 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
451 rejecttitle = self.params.get('rejecttitle', False)
453 if re.search(rejecttitle, title, re.IGNORECASE):
454 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
455 date = info_dict.get('upload_date', None)
457 dateRange = self.params.get('daterange', DateRange())
458 if date not in dateRange:
459 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
460 view_count = info_dict.get('view_count', None)
461 if view_count is not None:
462 min_views = self.params.get('min_views')
463 if min_views is not None and view_count < min_views:
464 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
465 max_views = self.params.get('max_views')
466 if max_views is not None and view_count > max_views:
467 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
468 age_limit = self.params.get('age_limit')
469 if age_limit is not None:
470 if age_limit < info_dict.get('age_limit', 0):
471 return 'Skipping "' + title + '" because it is age restricted'
472 if self.in_download_archive(info_dict):
473 return '%s has already been recorded in archive' % video_title
477 def add_extra_info(info_dict, extra_info):
478 '''Set the keys from extra_info in info dict if they are missing'''
479 for key, value in extra_info.items():
480 info_dict.setdefault(key, value)
482 def extract_info(self, url, download=True, ie_key=None, extra_info={},
485 Returns a list with a dictionary for each video we find.
486 If 'download', also downloads the videos.
487 extra_info is a dict containing the extra values to add to each result
491 ies = [self.get_info_extractor(ie_key)]
496 if not ie.suitable(url):
500 self.report_warning('The program functionality for this site has been marked as broken, '
501 'and will probably not work.')
504 ie_result = ie.extract(url)
505 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
507 if isinstance(ie_result, list):
508 # Backwards compatibility: old IE result format
510 '_type': 'compat_list',
511 'entries': ie_result,
513 self.add_extra_info(ie_result,
515 'extractor': ie.IE_NAME,
517 'webpage_url_basename': url_basename(url),
518 'extractor_key': ie.ie_key(),
521 return self.process_ie_result(ie_result, download, extra_info)
524 except ExtractorError as de: # An error we somewhat expected
525 self.report_error(compat_str(de), de.format_traceback())
527 except MaxDownloadsReached:
529 except Exception as e:
530 if self.params.get('ignoreerrors', False):
531 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
536 self.report_error('no suitable InfoExtractor for URL %s' % url)
538 def process_ie_result(self, ie_result, download=True, extra_info={}):
540 Take the result of the ie(may be modified) and resolve all unresolved
541 references (URLs, playlist items).
543 It will also download the videos if 'download'.
544 Returns the resolved ie_result.
547 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
548 if result_type == 'video':
549 self.add_extra_info(ie_result, extra_info)
550 return self.process_video_result(ie_result, download=download)
551 elif result_type == 'url':
552 # We have to add extra_info to the results because it may be
553 # contained in a playlist
554 return self.extract_info(ie_result['url'],
556 ie_key=ie_result.get('ie_key'),
557 extra_info=extra_info)
558 elif result_type == 'url_transparent':
559 # Use the information from the embedding page
560 info = self.extract_info(
561 ie_result['url'], ie_key=ie_result.get('ie_key'),
562 extra_info=extra_info, download=False, process=False)
564 def make_result(embedded_info):
565 new_result = ie_result.copy()
566 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
567 'entries', 'ie_key', 'duration',
568 'subtitles', 'annotations', 'format',
569 'thumbnail', 'thumbnails'):
572 if f in embedded_info:
573 new_result[f] = embedded_info[f]
575 new_result = make_result(info)
577 assert new_result.get('_type') != 'url_transparent'
578 if new_result.get('_type') == 'compat_list':
579 new_result['entries'] = [
580 make_result(e) for e in new_result['entries']]
582 return self.process_ie_result(
583 new_result, download=download, extra_info=extra_info)
584 elif result_type == 'playlist':
585 # We process each entry in the playlist
586 playlist = ie_result.get('title', None) or ie_result.get('id', None)
587 self.to_screen('[download] Downloading playlist: %s' % playlist)
589 playlist_results = []
591 playliststart = self.params.get('playliststart', 1) - 1
592 playlistend = self.params.get('playlistend', None)
593 # For backwards compatibility, interpret -1 as whole list
594 if playlistend == -1:
597 if isinstance(ie_result['entries'], list):
598 n_all_entries = len(ie_result['entries'])
599 entries = ie_result['entries'][playliststart:playlistend]
600 n_entries = len(entries)
602 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
603 (ie_result['extractor'], playlist, n_all_entries, n_entries))
605 assert isinstance(ie_result['entries'], PagedList)
606 entries = ie_result['entries'].getslice(
607 playliststart, playlistend)
608 n_entries = len(entries)
610 "[%s] playlist %s: Downloading %d videos" %
611 (ie_result['extractor'], playlist, n_entries))
613 for i, entry in enumerate(entries, 1):
614 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
616 'playlist': playlist,
617 'playlist_index': i + playliststart,
618 'extractor': ie_result['extractor'],
619 'webpage_url': ie_result['webpage_url'],
620 'webpage_url_basename': url_basename(ie_result['webpage_url']),
621 'extractor_key': ie_result['extractor_key'],
624 reason = self._match_entry(entry)
625 if reason is not None:
626 self.to_screen('[download] ' + reason)
629 entry_result = self.process_ie_result(entry,
632 playlist_results.append(entry_result)
633 ie_result['entries'] = playlist_results
635 elif result_type == 'compat_list':
637 self.add_extra_info(r,
639 'extractor': ie_result['extractor'],
640 'webpage_url': ie_result['webpage_url'],
641 'webpage_url_basename': url_basename(ie_result['webpage_url']),
642 'extractor_key': ie_result['extractor_key'],
645 ie_result['entries'] = [
646 self.process_ie_result(_fixup(r), download, extra_info)
647 for r in ie_result['entries']
651 raise Exception('Invalid result type: %s' % result_type)
653 def select_format(self, format_spec, available_formats):
654 if format_spec == 'best' or format_spec is None:
655 return available_formats[-1]
656 elif format_spec == 'worst':
657 return available_formats[0]
658 elif format_spec == 'bestaudio':
660 f for f in available_formats
661 if f.get('vcodec') == 'none']
663 return audio_formats[-1]
664 elif format_spec == 'worstaudio':
666 f for f in available_formats
667 if f.get('vcodec') == 'none']
669 return audio_formats[0]
670 elif format_spec == 'bestvideo':
672 f for f in available_formats
673 if f.get('acodec') == 'none']
675 return video_formats[-1]
676 elif format_spec == 'worstvideo':
678 f for f in available_formats
679 if f.get('acodec') == 'none']
681 return video_formats[0]
683 extensions = ['mp4', 'flv', 'webm', '3gp']
684 if format_spec in extensions:
685 filter_f = lambda f: f['ext'] == format_spec
687 filter_f = lambda f: f['format_id'] == format_spec
688 matches = list(filter(filter_f, available_formats))
693 def process_video_result(self, info_dict, download=True):
694 assert info_dict.get('_type', 'video') == 'video'
696 if 'playlist' not in info_dict:
697 # It isn't part of a playlist
698 info_dict['playlist'] = None
699 info_dict['playlist_index'] = None
701 if 'display_id' not in info_dict and 'id' in info_dict:
702 info_dict['display_id'] = info_dict['id']
704 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
705 upload_date = datetime.datetime.utcfromtimestamp(
706 info_dict['timestamp'])
707 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
709 # This extractors handle format selection themselves
710 if info_dict['extractor'] in ['Youku']:
712 self.process_info(info_dict)
715 # We now pick which formats have to be downloaded
716 if info_dict.get('formats') is None:
717 # There's only one format available
718 formats = [info_dict]
720 formats = info_dict['formats']
723 raise ExtractorError('No video formats found!')
725 # We check that all the formats have the format and format_id fields
726 for i, format in enumerate(formats):
727 if format.get('format_id') is None:
728 format['format_id'] = compat_str(i)
729 if format.get('format') is None:
730 format['format'] = '{id} - {res}{note}'.format(
731 id=format['format_id'],
732 res=self.format_resolution(format),
733 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
735 # Automatically determine file extension if missing
736 if 'ext' not in format:
737 format['ext'] = determine_ext(format['url'])
739 format_limit = self.params.get('format_limit', None)
741 formats = list(takewhile_inclusive(
742 lambda f: f['format_id'] != format_limit, formats
745 # TODO Central sorting goes here
747 if formats[0] is not info_dict:
748 # only set the 'formats' fields if the original info_dict list them
749 # otherwise we end up with a circular reference, the first (and unique)
750 # element in the 'formats' field in info_dict is info_dict itself,
751 # wich can't be exported to json
752 info_dict['formats'] = formats
753 if self.params.get('listformats', None):
754 self.list_formats(info_dict)
757 req_format = self.params.get('format')
758 if req_format is None:
760 formats_to_download = []
761 # The -1 is for supporting YoutubeIE
762 if req_format in ('-1', 'all'):
763 formats_to_download = formats
765 # We can accept formats requested in the format: 34/5/best, we pick
766 # the first that is available, starting from left
767 req_formats = req_format.split('/')
768 for rf in req_formats:
769 if re.match(r'.+?\+.+?', rf) is not None:
770 # Two formats have been requested like '137+139'
771 format_1, format_2 = rf.split('+')
772 formats_info = (self.select_format(format_1, formats),
773 self.select_format(format_2, formats))
774 if all(formats_info):
776 'requested_formats': formats_info,
778 'ext': formats_info[0]['ext'],
781 selected_format = None
783 selected_format = self.select_format(rf, formats)
784 if selected_format is not None:
785 formats_to_download = [selected_format]
787 if not formats_to_download:
788 raise ExtractorError('requested format not available',
792 if len(formats_to_download) > 1:
793 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
794 for format in formats_to_download:
795 new_info = dict(info_dict)
796 new_info.update(format)
797 self.process_info(new_info)
798 # We update the info dict with the best quality format (backwards compatibility)
799 info_dict.update(formats_to_download[-1])
802 def process_info(self, info_dict):
803 """Process a single resolved IE result."""
805 assert info_dict.get('_type', 'video') == 'video'
807 max_downloads = self.params.get('max_downloads')
808 if max_downloads is not None:
809 if self._num_downloads >= int(max_downloads):
810 raise MaxDownloadsReached()
812 info_dict['fulltitle'] = info_dict['title']
813 if len(info_dict['title']) > 200:
814 info_dict['title'] = info_dict['title'][:197] + '...'
816 # Keep for backwards compatibility
817 info_dict['stitle'] = info_dict['title']
819 if not 'format' in info_dict:
820 info_dict['format'] = info_dict['ext']
822 reason = self._match_entry(info_dict)
823 if reason is not None:
824 self.to_screen('[download] ' + reason)
827 self._num_downloads += 1
829 filename = self.prepare_filename(info_dict)
832 if self.params.get('forcetitle', False):
833 self.to_stdout(info_dict['fulltitle'])
834 if self.params.get('forceid', False):
835 self.to_stdout(info_dict['id'])
836 if self.params.get('forceurl', False):
837 # For RTMP URLs, also include the playpath
838 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
839 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
840 self.to_stdout(info_dict['thumbnail'])
841 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
842 self.to_stdout(info_dict['description'])
843 if self.params.get('forcefilename', False) and filename is not None:
844 self.to_stdout(filename)
845 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
846 self.to_stdout(formatSeconds(info_dict['duration']))
847 if self.params.get('forceformat', False):
848 self.to_stdout(info_dict['format'])
849 if self.params.get('forcejson', False):
850 info_dict['_filename'] = filename
851 self.to_stdout(json.dumps(info_dict))
853 # Do nothing else if in simulate mode
854 if self.params.get('simulate', False):
861 dn = os.path.dirname(encodeFilename(filename))
862 if dn != '' and not os.path.exists(dn):
864 except (OSError, IOError) as err:
865 self.report_error('unable to create directory ' + compat_str(err))
868 if self.params.get('writedescription', False):
869 descfn = filename + '.description'
870 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
871 self.to_screen('[info] Video description is already present')
874 self.to_screen('[info] Writing video description to: ' + descfn)
875 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
876 descfile.write(info_dict['description'])
877 except (KeyError, TypeError):
878 self.report_warning('There\'s no description to write.')
879 except (OSError, IOError):
880 self.report_error('Cannot write description file ' + descfn)
883 if self.params.get('writeannotations', False):
884 annofn = filename + '.annotations.xml'
885 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
886 self.to_screen('[info] Video annotations are already present')
889 self.to_screen('[info] Writing video annotations to: ' + annofn)
890 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
891 annofile.write(info_dict['annotations'])
892 except (KeyError, TypeError):
893 self.report_warning('There are no annotations to write.')
894 except (OSError, IOError):
895 self.report_error('Cannot write annotations file: ' + annofn)
898 subtitles_are_requested = any([self.params.get('writesubtitles', False),
899 self.params.get('writeautomaticsub')])
901 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
902 # subtitles download errors are already managed as troubles in relevant IE
903 # that way it will silently go on when used with unsupporting IE
904 subtitles = info_dict['subtitles']
905 sub_format = self.params.get('subtitlesformat', 'srt')
906 for sub_lang in subtitles.keys():
907 sub = subtitles[sub_lang]
911 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
912 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
913 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
915 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
916 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
918 except (OSError, IOError):
919 self.report_error('Cannot write subtitles file ' + descfn)
922 if self.params.get('writeinfojson', False):
923 infofn = os.path.splitext(filename)[0] + '.info.json'
924 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
925 self.to_screen('[info] Video description metadata is already present')
927 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
929 write_json_file(info_dict, encodeFilename(infofn))
930 except (OSError, IOError):
931 self.report_error('Cannot write metadata to JSON file ' + infofn)
934 if self.params.get('writethumbnail', False):
935 if info_dict.get('thumbnail') is not None:
936 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
937 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
938 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
939 self.to_screen('[%s] %s: Thumbnail is already present' %
940 (info_dict['extractor'], info_dict['id']))
942 self.to_screen('[%s] %s: Downloading thumbnail ...' %
943 (info_dict['extractor'], info_dict['id']))
945 uf = self.urlopen(info_dict['thumbnail'])
946 with open(thumb_filename, 'wb') as thumbf:
947 shutil.copyfileobj(uf, thumbf)
948 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
949 (info_dict['extractor'], info_dict['id'], thumb_filename))
950 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
951 self.report_warning('Unable to download thumbnail "%s": %s' %
952 (info_dict['thumbnail'], compat_str(err)))
954 if not self.params.get('skip_download', False):
955 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
960 fd = get_suitable_downloader(info)(self, self.params)
961 for ph in self._progress_hooks:
962 fd.add_progress_hook(ph)
963 return fd.download(name, info)
964 if info_dict.get('requested_formats') is not None:
967 merger = FFmpegMergerPP(self)
968 if not merger._get_executable():
970 self.report_warning('You have requested multiple '
971 'formats but ffmpeg or avconv are not installed.'
972 ' The formats won\'t be merged')
974 postprocessors = [merger]
975 for f in info_dict['requested_formats']:
976 new_info = dict(info_dict)
978 fname = self.prepare_filename(new_info)
979 fname = prepend_extension(fname, 'f%s' % f['format_id'])
980 downloaded.append(fname)
981 partial_success = dl(fname, new_info)
982 success = success and partial_success
983 info_dict['__postprocessors'] = postprocessors
984 info_dict['__files_to_merge'] = downloaded
987 success = dl(filename, info_dict)
988 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
989 self.report_error('unable to download video data: %s' % str(err))
991 except (OSError, IOError) as err:
992 raise UnavailableVideoError(err)
993 except (ContentTooShortError, ) as err:
994 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
999 self.post_process(filename, info_dict)
1000 except (PostProcessingError) as err:
1001 self.report_error('postprocessing: %s' % str(err))
1004 self.record_download_archive(info_dict)
1006 def download(self, url_list):
1007 """Download a given list of URLs."""
1008 if (len(url_list) > 1 and
1009 '%' not in self.params['outtmpl']
1010 and self.params.get('max_downloads') != 1):
1011 raise SameFileError(self.params['outtmpl'])
1013 for url in url_list:
1015 #It also downloads the videos
1016 self.extract_info(url)
1017 except UnavailableVideoError:
1018 self.report_error('unable to download video')
1019 except MaxDownloadsReached:
1020 self.to_screen('[info] Maximum number of downloaded files reached.')
1023 return self._download_retcode
1025 def download_with_info_file(self, info_filename):
1026 with io.open(info_filename, 'r', encoding='utf-8') as f:
1029 self.process_ie_result(info, download=True)
1030 except DownloadError:
1031 webpage_url = info.get('webpage_url')
1032 if webpage_url is not None:
1033 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1034 return self.download([webpage_url])
1037 return self._download_retcode
1039 def post_process(self, filename, ie_info):
1040 """Run all the postprocessors on the given file."""
1041 info = dict(ie_info)
1042 info['filepath'] = filename
1045 if ie_info.get('__postprocessors') is not None:
1046 pps_chain.extend(ie_info['__postprocessors'])
1047 pps_chain.extend(self._pps)
1048 for pp in pps_chain:
1050 keep_video_wish, new_info = pp.run(info)
1051 if keep_video_wish is not None:
1053 keep_video = keep_video_wish
1054 elif keep_video is None:
1055 # No clear decision yet, let IE decide
1056 keep_video = keep_video_wish
1057 except PostProcessingError as e:
1058 self.report_error(e.msg)
1059 if keep_video is False and not self.params.get('keepvideo', False):
1061 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1062 os.remove(encodeFilename(filename))
1063 except (IOError, OSError):
1064 self.report_warning('Unable to remove downloaded video file')
1066 def _make_archive_id(self, info_dict):
1067 # Future-proof against any change in case
1068 # and backwards compatibility with prior versions
1069 extractor = info_dict.get('extractor_key')
1070 if extractor is None:
1071 if 'id' in info_dict:
1072 extractor = info_dict.get('ie_key') # key in a playlist
1073 if extractor is None:
1074 return None # Incomplete video information
1075 return extractor.lower() + ' ' + info_dict['id']
1077 def in_download_archive(self, info_dict):
1078 fn = self.params.get('download_archive')
1082 vid_id = self._make_archive_id(info_dict)
1084 return False # Incomplete video information
1087 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1088 for line in archive_file:
1089 if line.strip() == vid_id:
1091 except IOError as ioe:
1092 if ioe.errno != errno.ENOENT:
1096 def record_download_archive(self, info_dict):
1097 fn = self.params.get('download_archive')
1100 vid_id = self._make_archive_id(info_dict)
1102 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1103 archive_file.write(vid_id + '\n')
1106 def format_resolution(format, default='unknown'):
1107 if format.get('vcodec') == 'none':
1109 if format.get('resolution') is not None:
1110 return format['resolution']
1111 if format.get('height') is not None:
1112 if format.get('width') is not None:
1113 res = '%sx%s' % (format['width'], format['height'])
1115 res = '%sp' % format['height']
1116 elif format.get('width') is not None:
1117 res = '?x%d' % format['width']
1122 def list_formats(self, info_dict):
1123 def format_note(fdict):
1125 if fdict.get('ext') in ['f4f', 'f4m']:
1126 res += '(unsupported) '
1127 if fdict.get('format_note') is not None:
1128 res += fdict['format_note'] + ' '
1129 if fdict.get('tbr') is not None:
1130 res += '%4dk ' % fdict['tbr']
1131 if fdict.get('container') is not None:
1134 res += '%s container' % fdict['container']
1135 if (fdict.get('vcodec') is not None and
1136 fdict.get('vcodec') != 'none'):
1139 res += fdict['vcodec']
1140 if fdict.get('vbr') is not None:
1142 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1144 if fdict.get('vbr') is not None:
1145 res += '%4dk' % fdict['vbr']
1146 if fdict.get('acodec') is not None:
1149 if fdict['acodec'] == 'none':
1152 res += '%-5s' % fdict['acodec']
1153 elif fdict.get('abr') is not None:
1157 if fdict.get('abr') is not None:
1158 res += '@%3dk' % fdict['abr']
1159 if fdict.get('asr') is not None:
1160 res += ' (%5dHz)' % fdict['asr']
1161 if fdict.get('filesize') is not None:
1164 res += format_bytes(fdict['filesize'])
1167 def line(format, idlen=20):
1168 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1169 format['format_id'],
1171 self.format_resolution(format),
1172 format_note(format),
1175 formats = info_dict.get('formats', [info_dict])
1176 idlen = max(len('format code'),
1177 max(len(f['format_id']) for f in formats))
1178 formats_s = [line(f, idlen) for f in formats]
1179 if len(formats) > 1:
1180 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1181 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1183 header_line = line({
1184 'format_id': 'format code', 'ext': 'extension',
1185 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1186 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1187 (info_dict['id'], header_line, '\n'.join(formats_s)))
1189 def urlopen(self, req):
1190 """ Start an HTTP download """
1191 return self._opener.open(req, timeout=self._socket_timeout)
1193 def print_debug_header(self):
1194 if not self.params.get('verbose'):
1196 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1198 sp = subprocess.Popen(
1199 ['git', 'rev-parse', '--short', 'HEAD'],
1200 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1201 cwd=os.path.dirname(os.path.abspath(__file__)))
1202 out, err = sp.communicate()
1203 out = out.decode().strip()
1204 if re.match('[0-9a-f]+', out):
1205 write_string('[debug] Git HEAD: ' + out + '\n')
1211 write_string('[debug] Python version %s - %s' %
1212 (platform.python_version(), platform_name()) + '\n')
1215 for handler in self._opener.handlers:
1216 if hasattr(handler, 'proxies'):
1217 proxy_map.update(handler.proxies)
1218 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1220 def _setup_opener(self):
1221 timeout_val = self.params.get('socket_timeout')
1222 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1224 opts_cookiefile = self.params.get('cookiefile')
1225 opts_proxy = self.params.get('proxy')
1227 if opts_cookiefile is None:
1228 self.cookiejar = compat_cookiejar.CookieJar()
1230 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1232 if os.access(opts_cookiefile, os.R_OK):
1233 self.cookiejar.load()
1235 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1237 if opts_proxy is not None:
1238 if opts_proxy == '':
1241 proxies = {'http': opts_proxy, 'https': opts_proxy}
1243 proxies = compat_urllib_request.getproxies()
1244 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1245 if 'http' in proxies and 'https' not in proxies:
1246 proxies['https'] = proxies['http']
1247 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1249 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1250 https_handler = make_HTTPS_handler(
1251 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1252 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1253 opener = compat_urllib_request.build_opener(
1254 https_handler, proxy_handler, cookie_processor, ydlh)
1255 # Delete the default user-agent header, which would otherwise apply in
1256 # cases where our custom HTTP handler doesn't come into play
1257 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1258 opener.addheaders = []
1259 self._opener = opener