2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
28 compat_urllib_request,
50 UnavailableVideoError,
57 from .extractor import get_info_extractor, gen_extractors
58 from .downloader import get_suitable_downloader
59 from .postprocessor import FFmpegMergerPP
60 from .version import __version__
63 class YoutubeDL(object):
66 YoutubeDL objects are the ones responsible of downloading the
67 actual video file and writing it to disk if the user has requested
68 it, among some other tasks. In most cases there should be one per
69 program. As, given a video URL, the downloader doesn't know how to
70 extract all the needed information, task that InfoExtractors do, it
71 has to pass the URL to one of them.
73 For this, YoutubeDL objects have a method that allows
74 InfoExtractors to be registered in a given order. When it is passed
75 a URL, the YoutubeDL object handles it to the first InfoExtractor it
76 finds that reports being able to handle it. The InfoExtractor extracts
77 all the information about the video or videos the URL refers to, and
78 YoutubeDL process the extracted information, possibly using a File
79 Downloader to download the video.
81 YoutubeDL objects accept a lot of parameters. In order not to saturate
82 the object constructor with arguments, it receives a dictionary of
83 options instead. These options are available through the params
84 attribute for the InfoExtractors to use. The YoutubeDL also
85 registers itself as the downloader in charge for the InfoExtractors
86 that are added to it, so this is a "mutual registration".
90 username: Username for authentication purposes.
91 password: Password for authentication purposes.
92 videopassword: Password for acces a video.
93 usenetrc: Use netrc for authentication instead.
94 verbose: Print additional info to stdout.
95 quiet: Do not print messages to stdout.
96 forceurl: Force printing final URL.
97 forcetitle: Force printing title.
98 forceid: Force printing ID.
99 forcethumbnail: Force printing thumbnail URL.
100 forcedescription: Force printing description.
101 forcefilename: Force printing final filename.
102 forceduration: Force printing duration.
103 forcejson: Force printing info_dict as JSON.
104 simulate: Do not download the video files.
105 format: Video format code.
106 format_limit: Highest quality format to try.
107 outtmpl: Template for output names.
108 restrictfilenames: Do not allow "&" and spaces in file names
109 ignoreerrors: Do not stop on download errors.
110 nooverwrites: Prevent overwriting files.
111 playliststart: Playlist item to start at.
112 playlistend: Playlist item to end at.
113 matchtitle: Download only matching titles.
114 rejecttitle: Reject downloads for matching titles.
115 logger: Log messages to a logging.Logger instance.
116 logtostderr: Log messages to stderr instead of stdout.
117 writedescription: Write the video description to a .description file
118 writeinfojson: Write the video description to a .info.json file
119 writeannotations: Write the video annotations to a .annotations.xml file
120 writethumbnail: Write the thumbnail image to a file
121 writesubtitles: Write the video subtitles to a file
122 writeautomaticsub: Write the automatic subtitles to a file
123 allsubtitles: Downloads all the subtitles of the video
124 (requires writesubtitles or writeautomaticsub)
125 listsubtitles: Lists all available subtitles for the video
126 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
127 subtitleslangs: List of languages of the subtitles to download
128 keepvideo: Keep the video file after post-processing
129 daterange: A DateRange object, download only if the upload_date is in the range.
130 skip_download: Skip the actual download of the video file
131 cachedir: Location of the cache files in the filesystem.
132 None to disable filesystem cache.
133 noplaylist: Download single video instead of a playlist if in doubt.
134 age_limit: An integer representing the user's age in years.
135 Unsuitable videos for the given age are skipped.
136 min_views: An integer representing the minimum view count the video
137 must have in order to not be skipped.
138 Videos without view count information are always
139 downloaded. None for no limit.
140 max_views: An integer representing the maximum view count.
141 Videos that are more popular than that are not
143 Videos without view count information are always
144 downloaded. None for no limit.
145 download_archive: File name of a file where all downloads are recorded.
146 Videos already present in the file are not downloaded
148 cookiefile: File name where cookies should be read from and dumped to.
149 nocheckcertificate:Do not verify SSL certificates
150 proxy: URL of the proxy server to use
151 socket_timeout: Time to wait for unresponsive hosts, in seconds
152 bidi_workaround: Work around buggy terminals without bidirectional text
153 support, using fridibi
154 debug_printtraffic:Print out sent and received HTTP traffic
155 include_ads: Download ads as well
156 default_search: Prepend this string if an input url is not valid.
157 'auto' for elaborate guessing
159 The following parameters are not used by YoutubeDL itself, they are used by
161 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
162 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 The following options are used by the post processors:
165 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
166 otherwise prefer avconv.
172 _download_retcode = None
173 _num_downloads = None
176 def __init__(self, params=None):
177 """Create a FileDownloader object with the given options."""
181 self._ies_instances = {}
183 self._progress_hooks = []
184 self._download_retcode = 0
185 self._num_downloads = 0
186 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
187 self._err_file = sys.stderr
190 if params.get('bidi_workaround', False):
193 master, slave = pty.openpty()
194 width = get_term_width()
198 width_args = ['-w', str(width)]
200 stdin=subprocess.PIPE,
202 stderr=self._err_file)
204 self._output_process = subprocess.Popen(
205 ['bidiv'] + width_args, **sp_kwargs
208 self._output_process = subprocess.Popen(
209 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
210 self._output_channel = os.fdopen(master, 'rb')
211 except OSError as ose:
213 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
217 if (sys.version_info >= (3,) and sys.platform != 'win32' and
218 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
219 and not params['restrictfilenames']):
220 # On Python 3, the Unicode filesystem API will throw errors (#1474)
222 'Assuming --restrict-filenames since file system encoding '
223 'cannot encode all charactes. '
224 'Set the LC_ALL environment variable to fix this.')
225 self.params['restrictfilenames'] = True
227 if '%(stitle)s' in self.params.get('outtmpl', ''):
228 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
232 def add_info_extractor(self, ie):
233 """Add an InfoExtractor object to the end of the list."""
235 self._ies_instances[ie.ie_key()] = ie
236 ie.set_downloader(self)
238 def get_info_extractor(self, ie_key):
240 Get an instance of an IE with name ie_key, it will try to get one from
241 the _ies list, if there's no instance it will create a new one and add
242 it to the extractor list.
244 ie = self._ies_instances.get(ie_key)
246 ie = get_info_extractor(ie_key)()
247 self.add_info_extractor(ie)
250 def add_default_info_extractors(self):
252 Add the InfoExtractors returned by gen_extractors to the end of the list
254 for ie in gen_extractors():
255 self.add_info_extractor(ie)
257 def add_post_processor(self, pp):
258 """Add a PostProcessor object to the end of the chain."""
260 pp.set_downloader(self)
262 def add_progress_hook(self, ph):
263 """Add the progress hook (currently only for the file downloader)"""
264 self._progress_hooks.append(ph)
266 def _bidi_workaround(self, message):
267 if not hasattr(self, '_output_channel'):
270 assert hasattr(self, '_output_process')
271 assert type(message) == type('')
272 line_count = message.count('\n') + 1
273 self._output_process.stdin.write((message + '\n').encode('utf-8'))
274 self._output_process.stdin.flush()
275 res = ''.join(self._output_channel.readline().decode('utf-8')
276 for _ in range(line_count))
277 return res[:-len('\n')]
279 def to_screen(self, message, skip_eol=False):
280 """Print message to stdout if not in quiet mode."""
281 return self.to_stdout(message, skip_eol, check_quiet=True)
283 def to_stdout(self, message, skip_eol=False, check_quiet=False):
284 """Print message to stdout if not in quiet mode."""
285 if self.params.get('logger'):
286 self.params['logger'].debug(message)
287 elif not check_quiet or not self.params.get('quiet', False):
288 message = self._bidi_workaround(message)
289 terminator = ['\n', ''][skip_eol]
290 output = message + terminator
292 write_string(output, self._screen_file)
294 def to_stderr(self, message):
295 """Print message to stderr."""
296 assert type(message) == type('')
297 if self.params.get('logger'):
298 self.params['logger'].error(message)
300 message = self._bidi_workaround(message)
301 output = message + '\n'
302 write_string(output, self._err_file)
304 def to_console_title(self, message):
305 if not self.params.get('consoletitle', False):
307 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
308 # c_wchar_p() might not be necessary if `message` is
309 # already of type unicode()
310 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
311 elif 'TERM' in os.environ:
312 write_string('\033]0;%s\007' % message, self._screen_file)
314 def save_console_title(self):
315 if not self.params.get('consoletitle', False):
317 if 'TERM' in os.environ:
318 # Save the title on stack
319 write_string('\033[22;0t', self._screen_file)
321 def restore_console_title(self):
322 if not self.params.get('consoletitle', False):
324 if 'TERM' in os.environ:
325 # Restore the title from stack
326 write_string('\033[23;0t', self._screen_file)
329 self.save_console_title()
332 def __exit__(self, *args):
333 self.restore_console_title()
335 if self.params.get('cookiefile') is not None:
336 self.cookiejar.save()
338 def trouble(self, message=None, tb=None):
339 """Determine action to take when a download problem appears.
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
345 tb, if given, is additional traceback information.
347 if message is not None:
348 self.to_stderr(message)
349 if self.params.get('verbose'):
351 if sys.exc_info()[0]: # if .trouble has been called from an except block
353 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
355 tb += compat_str(traceback.format_exc())
357 tb_data = traceback.format_list(traceback.extract_stack())
358 tb = ''.join(tb_data)
360 if not self.params.get('ignoreerrors', False):
361 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
362 exc_info = sys.exc_info()[1].exc_info
364 exc_info = sys.exc_info()
365 raise DownloadError(message, exc_info)
366 self._download_retcode = 1
368 def report_warning(self, message):
370 Print the message to stderr, it will be prefixed with 'WARNING:'
371 If stderr is a tty file the 'WARNING:' will be colored
373 if self._err_file.isatty() and os.name != 'nt':
374 _msg_header = '\033[0;33mWARNING:\033[0m'
376 _msg_header = 'WARNING:'
377 warning_message = '%s %s' % (_msg_header, message)
378 self.to_stderr(warning_message)
380 def report_error(self, message, tb=None):
382 Do the same as trouble, but prefixes the message with 'ERROR:', colored
383 in red if stderr is a tty file.
385 if self._err_file.isatty() and os.name != 'nt':
386 _msg_header = '\033[0;31mERROR:\033[0m'
388 _msg_header = 'ERROR:'
389 error_message = '%s %s' % (_msg_header, message)
390 self.trouble(error_message, tb)
392 def report_file_already_downloaded(self, file_name):
393 """Report file has already been fully downloaded."""
395 self.to_screen('[download] %s has already been downloaded' % file_name)
396 except UnicodeEncodeError:
397 self.to_screen('[download] The file has already been downloaded')
399 def prepare_filename(self, info_dict):
400 """Generate the output filename."""
402 template_dict = dict(info_dict)
404 template_dict['epoch'] = int(time.time())
405 autonumber_size = self.params.get('autonumber_size')
406 if autonumber_size is None:
408 autonumber_templ = '%0' + str(autonumber_size) + 'd'
409 template_dict['autonumber'] = autonumber_templ % self._num_downloads
410 if template_dict.get('playlist_index') is not None:
411 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
412 if template_dict.get('resolution') is None:
413 if template_dict.get('width') and template_dict.get('height'):
414 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
415 elif template_dict.get('height'):
416 res = '%sp' % template_dict['height']
417 elif template_dict.get('width'):
418 res = '?x%d' % template_dict['width']
420 sanitize = lambda k, v: sanitize_filename(
422 restricted=self.params.get('restrictfilenames'),
424 template_dict = dict((k, sanitize(k, v))
425 for k, v in template_dict.items()
427 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
429 tmpl = os.path.expanduser(self.params['outtmpl'])
430 filename = tmpl % template_dict
432 except ValueError as err:
433 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
436 def _match_entry(self, info_dict):
437 """ Returns None iff the file should be downloaded """
439 video_title = info_dict.get('title', info_dict.get('id', 'video'))
440 if 'title' in info_dict:
441 # This can happen when we're just evaluating the playlist
442 title = info_dict['title']
443 matchtitle = self.params.get('matchtitle', False)
445 if not re.search(matchtitle, title, re.IGNORECASE):
446 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
447 rejecttitle = self.params.get('rejecttitle', False)
449 if re.search(rejecttitle, title, re.IGNORECASE):
450 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
451 date = info_dict.get('upload_date', None)
453 dateRange = self.params.get('daterange', DateRange())
454 if date not in dateRange:
455 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
456 view_count = info_dict.get('view_count', None)
457 if view_count is not None:
458 min_views = self.params.get('min_views')
459 if min_views is not None and view_count < min_views:
460 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
461 max_views = self.params.get('max_views')
462 if max_views is not None and view_count > max_views:
463 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
464 age_limit = self.params.get('age_limit')
465 if age_limit is not None:
466 if age_limit < info_dict.get('age_limit', 0):
467 return 'Skipping "' + title + '" because it is age restricted'
468 if self.in_download_archive(info_dict):
469 return '%s has already been recorded in archive' % video_title
473 def add_extra_info(info_dict, extra_info):
474 '''Set the keys from extra_info in info dict if they are missing'''
475 for key, value in extra_info.items():
476 info_dict.setdefault(key, value)
478 def extract_info(self, url, download=True, ie_key=None, extra_info={},
481 Returns a list with a dictionary for each video we find.
482 If 'download', also downloads the videos.
483 extra_info is a dict containing the extra values to add to each result
487 ies = [self.get_info_extractor(ie_key)]
492 if not ie.suitable(url):
496 self.report_warning('The program functionality for this site has been marked as broken, '
497 'and will probably not work.')
500 ie_result = ie.extract(url)
501 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
503 if isinstance(ie_result, list):
504 # Backwards compatibility: old IE result format
506 '_type': 'compat_list',
507 'entries': ie_result,
509 self.add_extra_info(ie_result,
511 'extractor': ie.IE_NAME,
513 'webpage_url_basename': url_basename(url),
514 'extractor_key': ie.ie_key(),
517 return self.process_ie_result(ie_result, download, extra_info)
520 except ExtractorError as de: # An error we somewhat expected
521 self.report_error(compat_str(de), de.format_traceback())
523 except MaxDownloadsReached:
525 except Exception as e:
526 if self.params.get('ignoreerrors', False):
527 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
532 self.report_error('no suitable InfoExtractor: %s' % url)
534 def process_ie_result(self, ie_result, download=True, extra_info={}):
536 Take the result of the ie(may be modified) and resolve all unresolved
537 references (URLs, playlist items).
539 It will also download the videos if 'download'.
540 Returns the resolved ie_result.
543 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
544 if result_type == 'video':
545 self.add_extra_info(ie_result, extra_info)
546 return self.process_video_result(ie_result, download=download)
547 elif result_type == 'url':
548 # We have to add extra_info to the results because it may be
549 # contained in a playlist
550 return self.extract_info(ie_result['url'],
552 ie_key=ie_result.get('ie_key'),
553 extra_info=extra_info)
554 elif result_type == 'url_transparent':
555 # Use the information from the embedding page
556 info = self.extract_info(
557 ie_result['url'], ie_key=ie_result.get('ie_key'),
558 extra_info=extra_info, download=False, process=False)
560 def make_result(embedded_info):
561 new_result = ie_result.copy()
562 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
563 'entries', 'ie_key', 'duration',
564 'subtitles', 'annotations', 'format',
565 'thumbnail', 'thumbnails'):
568 if f in embedded_info:
569 new_result[f] = embedded_info[f]
571 new_result = make_result(info)
573 assert new_result.get('_type') != 'url_transparent'
574 if new_result.get('_type') == 'compat_list':
575 new_result['entries'] = [
576 make_result(e) for e in new_result['entries']]
578 return self.process_ie_result(
579 new_result, download=download, extra_info=extra_info)
580 elif result_type == 'playlist':
581 # We process each entry in the playlist
582 playlist = ie_result.get('title', None) or ie_result.get('id', None)
583 self.to_screen('[download] Downloading playlist: %s' % playlist)
585 playlist_results = []
587 playliststart = self.params.get('playliststart', 1) - 1
588 playlistend = self.params.get('playlistend', None)
589 # For backwards compatibility, interpret -1 as whole list
590 if playlistend == -1:
593 if isinstance(ie_result['entries'], list):
594 n_all_entries = len(ie_result['entries'])
595 entries = ie_result['entries'][playliststart:playlistend]
596 n_entries = len(entries)
598 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
599 (ie_result['extractor'], playlist, n_all_entries, n_entries))
601 assert isinstance(ie_result['entries'], PagedList)
602 entries = ie_result['entries'].getslice(
603 playliststart, playlistend)
604 n_entries = len(entries)
606 "[%s] playlist %s: Downloading %d videos" %
607 (ie_result['extractor'], playlist, n_entries))
609 for i, entry in enumerate(entries, 1):
610 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
612 'playlist': playlist,
613 'playlist_index': i + playliststart,
614 'extractor': ie_result['extractor'],
615 'webpage_url': ie_result['webpage_url'],
616 'webpage_url_basename': url_basename(ie_result['webpage_url']),
617 'extractor_key': ie_result['extractor_key'],
620 reason = self._match_entry(entry)
621 if reason is not None:
622 self.to_screen('[download] ' + reason)
625 entry_result = self.process_ie_result(entry,
628 playlist_results.append(entry_result)
629 ie_result['entries'] = playlist_results
631 elif result_type == 'compat_list':
633 self.add_extra_info(r,
635 'extractor': ie_result['extractor'],
636 'webpage_url': ie_result['webpage_url'],
637 'webpage_url_basename': url_basename(ie_result['webpage_url']),
638 'extractor_key': ie_result['extractor_key'],
641 ie_result['entries'] = [
642 self.process_ie_result(_fixup(r), download, extra_info)
643 for r in ie_result['entries']
647 raise Exception('Invalid result type: %s' % result_type)
649 def select_format(self, format_spec, available_formats):
650 if format_spec == 'best' or format_spec is None:
651 return available_formats[-1]
652 elif format_spec == 'worst':
653 return available_formats[0]
654 elif format_spec == 'bestaudio':
656 f for f in available_formats
657 if f.get('vcodec') == 'none']
659 return audio_formats[-1]
660 elif format_spec == 'worstaudio':
662 f for f in available_formats
663 if f.get('vcodec') == 'none']
665 return audio_formats[0]
667 extensions = ['mp4', 'flv', 'webm', '3gp']
668 if format_spec in extensions:
669 filter_f = lambda f: f['ext'] == format_spec
671 filter_f = lambda f: f['format_id'] == format_spec
672 matches = list(filter(filter_f, available_formats))
677 def process_video_result(self, info_dict, download=True):
678 assert info_dict.get('_type', 'video') == 'video'
680 if 'playlist' not in info_dict:
681 # It isn't part of a playlist
682 info_dict['playlist'] = None
683 info_dict['playlist_index'] = None
685 if 'display_id' not in info_dict and 'id' in info_dict:
686 info_dict['display_id'] = info_dict['id']
688 # This extractors handle format selection themselves
689 if info_dict['extractor'] in ['Youku']:
691 self.process_info(info_dict)
694 # We now pick which formats have to be downloaded
695 if info_dict.get('formats') is None:
696 # There's only one format available
697 formats = [info_dict]
699 formats = info_dict['formats']
701 # We check that all the formats have the format and format_id fields
702 for (i, format) in enumerate(formats):
703 if format.get('format_id') is None:
704 format['format_id'] = compat_str(i)
705 if format.get('format') is None:
706 format['format'] = '{id} - {res}{note}'.format(
707 id=format['format_id'],
708 res=self.format_resolution(format),
709 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
711 # Automatically determine file extension if missing
712 if 'ext' not in format:
713 format['ext'] = determine_ext(format['url'])
715 format_limit = self.params.get('format_limit', None)
717 formats = list(takewhile_inclusive(
718 lambda f: f['format_id'] != format_limit, formats
721 # TODO Central sorting goes here
723 if formats[0] is not info_dict:
724 # only set the 'formats' fields if the original info_dict list them
725 # otherwise we end up with a circular reference, the first (and unique)
726 # element in the 'formats' field in info_dict is info_dict itself,
727 # wich can't be exported to json
728 info_dict['formats'] = formats
729 if self.params.get('listformats', None):
730 self.list_formats(info_dict)
733 req_format = self.params.get('format')
734 if req_format is None:
736 formats_to_download = []
737 # The -1 is for supporting YoutubeIE
738 if req_format in ('-1', 'all'):
739 formats_to_download = formats
741 # We can accept formats requested in the format: 34/5/best, we pick
742 # the first that is available, starting from left
743 req_formats = req_format.split('/')
744 for rf in req_formats:
745 if re.match(r'.+?\+.+?', rf) is not None:
746 # Two formats have been requested like '137+139'
747 format_1, format_2 = rf.split('+')
748 formats_info = (self.select_format(format_1, formats),
749 self.select_format(format_2, formats))
750 if all(formats_info):
752 'requested_formats': formats_info,
754 'ext': formats_info[0]['ext'],
757 selected_format = None
759 selected_format = self.select_format(rf, formats)
760 if selected_format is not None:
761 formats_to_download = [selected_format]
763 if not formats_to_download:
764 raise ExtractorError('requested format not available',
768 if len(formats_to_download) > 1:
769 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
770 for format in formats_to_download:
771 new_info = dict(info_dict)
772 new_info.update(format)
773 self.process_info(new_info)
774 # We update the info dict with the best quality format (backwards compatibility)
775 info_dict.update(formats_to_download[-1])
778 def process_info(self, info_dict):
779 """Process a single resolved IE result."""
781 assert info_dict.get('_type', 'video') == 'video'
783 max_downloads = self.params.get('max_downloads')
784 if max_downloads is not None:
785 if self._num_downloads >= int(max_downloads):
786 raise MaxDownloadsReached()
788 info_dict['fulltitle'] = info_dict['title']
789 if len(info_dict['title']) > 200:
790 info_dict['title'] = info_dict['title'][:197] + '...'
792 # Keep for backwards compatibility
793 info_dict['stitle'] = info_dict['title']
795 if not 'format' in info_dict:
796 info_dict['format'] = info_dict['ext']
798 reason = self._match_entry(info_dict)
799 if reason is not None:
800 self.to_screen('[download] ' + reason)
803 self._num_downloads += 1
805 filename = self.prepare_filename(info_dict)
808 if self.params.get('forcetitle', False):
809 self.to_stdout(info_dict['fulltitle'])
810 if self.params.get('forceid', False):
811 self.to_stdout(info_dict['id'])
812 if self.params.get('forceurl', False):
813 # For RTMP URLs, also include the playpath
814 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
815 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
816 self.to_stdout(info_dict['thumbnail'])
817 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
818 self.to_stdout(info_dict['description'])
819 if self.params.get('forcefilename', False) and filename is not None:
820 self.to_stdout(filename)
821 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
822 self.to_stdout(formatSeconds(info_dict['duration']))
823 if self.params.get('forceformat', False):
824 self.to_stdout(info_dict['format'])
825 if self.params.get('forcejson', False):
826 info_dict['_filename'] = filename
827 self.to_stdout(json.dumps(info_dict))
829 # Do nothing else if in simulate mode
830 if self.params.get('simulate', False):
837 dn = os.path.dirname(encodeFilename(filename))
838 if dn != '' and not os.path.exists(dn):
840 except (OSError, IOError) as err:
841 self.report_error('unable to create directory ' + compat_str(err))
844 if self.params.get('writedescription', False):
845 descfn = filename + '.description'
846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
847 self.to_screen('[info] Video description is already present')
850 self.to_screen('[info] Writing video description to: ' + descfn)
851 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
852 descfile.write(info_dict['description'])
853 except (KeyError, TypeError):
854 self.report_warning('There\'s no description to write.')
855 except (OSError, IOError):
856 self.report_error('Cannot write description file ' + descfn)
859 if self.params.get('writeannotations', False):
860 annofn = filename + '.annotations.xml'
861 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
862 self.to_screen('[info] Video annotations are already present')
865 self.to_screen('[info] Writing video annotations to: ' + annofn)
866 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
867 annofile.write(info_dict['annotations'])
868 except (KeyError, TypeError):
869 self.report_warning('There are no annotations to write.')
870 except (OSError, IOError):
871 self.report_error('Cannot write annotations file: ' + annofn)
874 subtitles_are_requested = any([self.params.get('writesubtitles', False),
875 self.params.get('writeautomaticsub')])
877 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
878 # subtitles download errors are already managed as troubles in relevant IE
879 # that way it will silently go on when used with unsupporting IE
880 subtitles = info_dict['subtitles']
881 sub_format = self.params.get('subtitlesformat', 'srt')
882 for sub_lang in subtitles.keys():
883 sub = subtitles[sub_lang]
887 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
888 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
889 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
891 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
892 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
894 except (OSError, IOError):
895 self.report_error('Cannot write subtitles file ' + descfn)
898 if self.params.get('writeinfojson', False):
899 infofn = os.path.splitext(filename)[0] + '.info.json'
900 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
901 self.to_screen('[info] Video description metadata is already present')
903 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
905 write_json_file(info_dict, encodeFilename(infofn))
906 except (OSError, IOError):
907 self.report_error('Cannot write metadata to JSON file ' + infofn)
910 if self.params.get('writethumbnail', False):
911 if info_dict.get('thumbnail') is not None:
912 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
913 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
914 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
915 self.to_screen('[%s] %s: Thumbnail is already present' %
916 (info_dict['extractor'], info_dict['id']))
918 self.to_screen('[%s] %s: Downloading thumbnail ...' %
919 (info_dict['extractor'], info_dict['id']))
921 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
922 with open(thumb_filename, 'wb') as thumbf:
923 shutil.copyfileobj(uf, thumbf)
924 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
925 (info_dict['extractor'], info_dict['id'], thumb_filename))
926 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
927 self.report_warning('Unable to download thumbnail "%s": %s' %
928 (info_dict['thumbnail'], compat_str(err)))
930 if not self.params.get('skip_download', False):
931 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
936 fd = get_suitable_downloader(info)(self, self.params)
937 for ph in self._progress_hooks:
938 fd.add_progress_hook(ph)
939 return fd.download(name, info)
940 if info_dict.get('requested_formats') is not None:
943 merger = FFmpegMergerPP(self)
944 if not merger._get_executable():
946 self.report_warning('You have requested multiple '
947 'formats but ffmpeg or avconv are not installed.'
948 ' The formats won\'t be merged')
950 postprocessors = [merger]
951 for f in info_dict['requested_formats']:
952 new_info = dict(info_dict)
954 fname = self.prepare_filename(new_info)
955 fname = prepend_extension(fname, 'f%s' % f['format_id'])
956 downloaded.append(fname)
957 partial_success = dl(fname, new_info)
958 success = success and partial_success
959 info_dict['__postprocessors'] = postprocessors
960 info_dict['__files_to_merge'] = downloaded
963 success = dl(filename, info_dict)
964 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
965 self.report_error('unable to download video data: %s' % str(err))
967 except (OSError, IOError) as err:
968 raise UnavailableVideoError(err)
969 except (ContentTooShortError, ) as err:
970 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
975 self.post_process(filename, info_dict)
976 except (PostProcessingError) as err:
977 self.report_error('postprocessing: %s' % str(err))
980 self.record_download_archive(info_dict)
982 def download(self, url_list):
983 """Download a given list of URLs."""
984 if (len(url_list) > 1 and
985 '%' not in self.params['outtmpl']
986 and self.params.get('max_downloads') != 1):
987 raise SameFileError(self.params['outtmpl'])
991 #It also downloads the videos
992 self.extract_info(url)
993 except UnavailableVideoError:
994 self.report_error('unable to download video')
995 except MaxDownloadsReached:
996 self.to_screen('[info] Maximum number of downloaded files reached.')
999 return self._download_retcode
1001 def download_with_info_file(self, info_filename):
1002 with io.open(info_filename, 'r', encoding='utf-8') as f:
1005 self.process_ie_result(info, download=True)
1006 except DownloadError:
1007 webpage_url = info.get('webpage_url')
1008 if webpage_url is not None:
1009 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1010 return self.download([webpage_url])
1013 return self._download_retcode
1015 def post_process(self, filename, ie_info):
1016 """Run all the postprocessors on the given file."""
1017 info = dict(ie_info)
1018 info['filepath'] = filename
1021 if ie_info.get('__postprocessors') is not None:
1022 pps_chain.extend(ie_info['__postprocessors'])
1023 pps_chain.extend(self._pps)
1024 for pp in pps_chain:
1026 keep_video_wish, new_info = pp.run(info)
1027 if keep_video_wish is not None:
1029 keep_video = keep_video_wish
1030 elif keep_video is None:
1031 # No clear decision yet, let IE decide
1032 keep_video = keep_video_wish
1033 except PostProcessingError as e:
1034 self.report_error(e.msg)
1035 if keep_video is False and not self.params.get('keepvideo', False):
1037 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1038 os.remove(encodeFilename(filename))
1039 except (IOError, OSError):
1040 self.report_warning('Unable to remove downloaded video file')
1042 def _make_archive_id(self, info_dict):
1043 # Future-proof against any change in case
1044 # and backwards compatibility with prior versions
1045 extractor = info_dict.get('extractor_key')
1046 if extractor is None:
1047 if 'id' in info_dict:
1048 extractor = info_dict.get('ie_key') # key in a playlist
1049 if extractor is None:
1050 return None # Incomplete video information
1051 return extractor.lower() + ' ' + info_dict['id']
1053 def in_download_archive(self, info_dict):
1054 fn = self.params.get('download_archive')
1058 vid_id = self._make_archive_id(info_dict)
1060 return False # Incomplete video information
1063 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1064 for line in archive_file:
1065 if line.strip() == vid_id:
1067 except IOError as ioe:
1068 if ioe.errno != errno.ENOENT:
1072 def record_download_archive(self, info_dict):
1073 fn = self.params.get('download_archive')
1076 vid_id = self._make_archive_id(info_dict)
1078 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1079 archive_file.write(vid_id + '\n')
1082 def format_resolution(format, default='unknown'):
1083 if format.get('vcodec') == 'none':
1085 if format.get('resolution') is not None:
1086 return format['resolution']
1087 if format.get('height') is not None:
1088 if format.get('width') is not None:
1089 res = '%sx%s' % (format['width'], format['height'])
1091 res = '%sp' % format['height']
1092 elif format.get('width') is not None:
1093 res = '?x%d' % format['width']
1098 def list_formats(self, info_dict):
1099 def format_note(fdict):
1101 if fdict.get('ext') in ['f4f', 'f4m']:
1102 res += '(unsupported) '
1103 if fdict.get('format_note') is not None:
1104 res += fdict['format_note'] + ' '
1105 if fdict.get('tbr') is not None:
1106 res += '%4dk ' % fdict['tbr']
1107 if fdict.get('container') is not None:
1110 res += '%s container' % fdict['container']
1111 if (fdict.get('vcodec') is not None and
1112 fdict.get('vcodec') != 'none'):
1115 res += fdict['vcodec']
1116 if fdict.get('vbr') is not None:
1118 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1120 if fdict.get('vbr') is not None:
1121 res += '%4dk' % fdict['vbr']
1122 if fdict.get('acodec') is not None:
1125 if fdict['acodec'] == 'none':
1128 res += '%-5s' % fdict['acodec']
1129 elif fdict.get('abr') is not None:
1133 if fdict.get('abr') is not None:
1134 res += '@%3dk' % fdict['abr']
1135 if fdict.get('asr') is not None:
1136 res += ' (%5dHz)' % fdict['asr']
1137 if fdict.get('filesize') is not None:
1140 res += format_bytes(fdict['filesize'])
1143 def line(format, idlen=20):
1144 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1145 format['format_id'],
1147 self.format_resolution(format),
1148 format_note(format),
1151 formats = info_dict.get('formats', [info_dict])
1152 idlen = max(len('format code'),
1153 max(len(f['format_id']) for f in formats))
1154 formats_s = [line(f, idlen) for f in formats]
1155 if len(formats) > 1:
1156 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1157 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1159 header_line = line({
1160 'format_id': 'format code', 'ext': 'extension',
1161 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1162 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1163 (info_dict['id'], header_line, '\n'.join(formats_s)))
1165 def urlopen(self, req):
1166 """ Start an HTTP download """
1167 return self._opener.open(req)
1169 def print_debug_header(self):
1170 if not self.params.get('verbose'):
1172 write_string('[debug] youtube-dl version ' + __version__ + '\n')
1174 sp = subprocess.Popen(
1175 ['git', 'rev-parse', '--short', 'HEAD'],
1176 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1177 cwd=os.path.dirname(os.path.abspath(__file__)))
1178 out, err = sp.communicate()
1179 out = out.decode().strip()
1180 if re.match('[0-9a-f]+', out):
1181 write_string('[debug] Git HEAD: ' + out + '\n')
1187 write_string('[debug] Python version %s - %s' %
1188 (platform.python_version(), platform_name()) + '\n')
1191 for handler in self._opener.handlers:
1192 if hasattr(handler, 'proxies'):
1193 proxy_map.update(handler.proxies)
1194 write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1196 def _setup_opener(self):
1197 timeout_val = self.params.get('socket_timeout')
1198 timeout = 600 if timeout_val is None else float(timeout_val)
1200 opts_cookiefile = self.params.get('cookiefile')
1201 opts_proxy = self.params.get('proxy')
1203 if opts_cookiefile is None:
1204 self.cookiejar = compat_cookiejar.CookieJar()
1206 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1208 if os.access(opts_cookiefile, os.R_OK):
1209 self.cookiejar.load()
1211 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1213 if opts_proxy is not None:
1214 if opts_proxy == '':
1217 proxies = {'http': opts_proxy, 'https': opts_proxy}
1219 proxies = compat_urllib_request.getproxies()
1220 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1221 if 'http' in proxies and 'https' not in proxies:
1222 proxies['https'] = proxies['http']
1223 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1225 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1226 https_handler = make_HTTPS_handler(
1227 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1228 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1229 opener = compat_urllib_request.build_opener(
1230 https_handler, proxy_handler, cookie_processor, ydlh)
1231 # Delete the default user-agent header, which would otherwise apply in
1232 # cases where our custom HTTP handler doesn't come into play
1233 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1234 opener.addheaders = []
1235 self._opener = opener
1237 # TODO remove this global modification
1238 compat_urllib_request.install_opener(opener)
1239 socket.setdefaulttimeout(timeout)