2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
56 from .extractor import get_info_extractor, gen_extractors
57 from .downloader import get_suitable_downloader
58 from .PostProcessor import FFmpegMergerPP
59 from .version import __version__
62 class YoutubeDL(object):
65 YoutubeDL objects are the ones responsible of downloading the
66 actual video file and writing it to disk if the user has requested
67 it, among some other tasks. In most cases there should be one per
68 program. As, given a video URL, the downloader doesn't know how to
69 extract all the needed information, task that InfoExtractors do, it
70 has to pass the URL to one of them.
72 For this, YoutubeDL objects have a method that allows
73 InfoExtractors to be registered in a given order. When it is passed
74 a URL, the YoutubeDL object handles it to the first InfoExtractor it
75 finds that reports being able to handle it. The InfoExtractor extracts
76 all the information about the video or videos the URL refers to, and
77 YoutubeDL process the extracted information, possibly using a File
78 Downloader to download the video.
80 YoutubeDL objects accept a lot of parameters. In order not to saturate
81 the object constructor with arguments, it receives a dictionary of
82 options instead. These options are available through the params
83 attribute for the InfoExtractors to use. The YoutubeDL also
84 registers itself as the downloader in charge for the InfoExtractors
85 that are added to it, so this is a "mutual registration".
89 username: Username for authentication purposes.
90 password: Password for authentication purposes.
91 videopassword: Password for acces a video.
92 usenetrc: Use netrc for authentication instead.
93 verbose: Print additional info to stdout.
94 quiet: Do not print messages to stdout.
95 forceurl: Force printing final URL.
96 forcetitle: Force printing title.
97 forceid: Force printing ID.
98 forcethumbnail: Force printing thumbnail URL.
99 forcedescription: Force printing description.
100 forcefilename: Force printing final filename.
101 forceduration: Force printing duration.
102 forcejson: Force printing info_dict as JSON.
103 simulate: Do not download the video files.
104 format: Video format code.
105 format_limit: Highest quality format to try.
106 outtmpl: Template for output names.
107 restrictfilenames: Do not allow "&" and spaces in file names
108 ignoreerrors: Do not stop on download errors.
109 nooverwrites: Prevent overwriting files.
110 playliststart: Playlist item to start at.
111 playlistend: Playlist item to end at.
112 matchtitle: Download only matching titles.
113 rejecttitle: Reject downloads for matching titles.
114 logger: Log messages to a logging.Logger instance.
115 logtostderr: Log messages to stderr instead of stdout.
116 writedescription: Write the video description to a .description file
117 writeinfojson: Write the video description to a .info.json file
118 writeannotations: Write the video annotations to a .annotations.xml file
119 writethumbnail: Write the thumbnail image to a file
120 writesubtitles: Write the video subtitles to a file
121 writeautomaticsub: Write the automatic subtitles to a file
122 allsubtitles: Downloads all the subtitles of the video
123 (requires writesubtitles or writeautomaticsub)
124 listsubtitles: Lists all available subtitles for the video
125 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
126 subtitleslangs: List of languages of the subtitles to download
127 keepvideo: Keep the video file after post-processing
128 daterange: A DateRange object, download only if the upload_date is in the range.
129 skip_download: Skip the actual download of the video file
130 cachedir: Location of the cache files in the filesystem.
131 None to disable filesystem cache.
132 noplaylist: Download single video instead of a playlist if in doubt.
133 age_limit: An integer representing the user's age in years.
134 Unsuitable videos for the given age are skipped.
135 min_views: An integer representing the minimum view count the video
136 must have in order to not be skipped.
137 Videos without view count information are always
138 downloaded. None for no limit.
139 max_views: An integer representing the maximum view count.
140 Videos that are more popular than that are not
142 Videos without view count information are always
143 downloaded. None for no limit.
144 download_archive: File name of a file where all downloads are recorded.
145 Videos already present in the file are not downloaded
147 cookiefile: File name where cookies should be read from and dumped to.
148 nocheckcertificate:Do not verify SSL certificates
149 proxy: URL of the proxy server to use
150 socket_timeout: Time to wait for unresponsive hosts, in seconds
151 bidi_workaround: Work around buggy terminals without bidirectional text
152 support, using fridibi
153 debug_printtraffic:Print out sent and received HTTP traffic
155 The following parameters are not used by YoutubeDL itself, they are used by
157 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
158 noresizebuffer, retries, continuedl, noprogress, consoletitle
164 _download_retcode = None
165 _num_downloads = None
168 def __init__(self, params=None):
169 """Create a FileDownloader object with the given options."""
173 self._ies_instances = {}
175 self._progress_hooks = []
176 self._download_retcode = 0
177 self._num_downloads = 0
178 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
179 self._err_file = sys.stderr
182 if params.get('bidi_workaround', False):
185 master, slave = pty.openpty()
186 width = get_term_width()
190 width_args = ['-w', str(width)]
192 stdin=subprocess.PIPE,
194 stderr=self._err_file)
196 self._output_process = subprocess.Popen(
197 ['bidiv'] + width_args, **sp_kwargs
200 self._output_process = subprocess.Popen(
201 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
202 self._output_channel = os.fdopen(master, 'rb')
203 except OSError as ose:
205 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
209 if (sys.version_info >= (3,) and sys.platform != 'win32' and
210 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
211 and not params['restrictfilenames']):
212 # On Python 3, the Unicode filesystem API will throw errors (#1474)
214 u'Assuming --restrict-filenames since file system encoding '
215 u'cannot encode all charactes. '
216 u'Set the LC_ALL environment variable to fix this.')
217 self.params['restrictfilenames'] = True
219 if '%(stitle)s' in self.params.get('outtmpl', ''):
220 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
224 def add_info_extractor(self, ie):
225 """Add an InfoExtractor object to the end of the list."""
227 self._ies_instances[ie.ie_key()] = ie
228 ie.set_downloader(self)
230 def get_info_extractor(self, ie_key):
232 Get an instance of an IE with name ie_key, it will try to get one from
233 the _ies list, if there's no instance it will create a new one and add
234 it to the extractor list.
236 ie = self._ies_instances.get(ie_key)
238 ie = get_info_extractor(ie_key)()
239 self.add_info_extractor(ie)
242 def add_default_info_extractors(self):
244 Add the InfoExtractors returned by gen_extractors to the end of the list
246 for ie in gen_extractors():
247 self.add_info_extractor(ie)
249 def add_post_processor(self, pp):
250 """Add a PostProcessor object to the end of the chain."""
252 pp.set_downloader(self)
254 def add_progress_hook(self, ph):
255 """Add the progress hook (currently only for the file downloader)"""
256 self._progress_hooks.append(ph)
258 def _bidi_workaround(self, message):
259 if not hasattr(self, '_output_channel'):
262 assert hasattr(self, '_output_process')
263 assert type(message) == type(u'')
264 line_count = message.count(u'\n') + 1
265 self._output_process.stdin.write((message + u'\n').encode('utf-8'))
266 self._output_process.stdin.flush()
267 res = u''.join(self._output_channel.readline().decode('utf-8')
268 for _ in range(line_count))
269 return res[:-len(u'\n')]
271 def to_screen(self, message, skip_eol=False):
272 """Print message to stdout if not in quiet mode."""
273 return self.to_stdout(message, skip_eol, check_quiet=True)
275 def to_stdout(self, message, skip_eol=False, check_quiet=False):
276 """Print message to stdout if not in quiet mode."""
277 if self.params.get('logger'):
278 self.params['logger'].debug(message)
279 elif not check_quiet or not self.params.get('quiet', False):
280 message = self._bidi_workaround(message)
281 terminator = [u'\n', u''][skip_eol]
282 output = message + terminator
284 write_string(output, self._screen_file)
286 def to_stderr(self, message):
287 """Print message to stderr."""
288 assert type(message) == type(u'')
289 if self.params.get('logger'):
290 self.params['logger'].error(message)
292 message = self._bidi_workaround(message)
293 output = message + u'\n'
294 write_string(output, self._err_file)
296 def to_console_title(self, message):
297 if not self.params.get('consoletitle', False):
299 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
300 # c_wchar_p() might not be necessary if `message` is
301 # already of type unicode()
302 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
303 elif 'TERM' in os.environ:
304 write_string(u'\033]0;%s\007' % message, self._screen_file)
306 def save_console_title(self):
307 if not self.params.get('consoletitle', False):
309 if 'TERM' in os.environ:
310 # Save the title on stack
311 write_string(u'\033[22;0t', self._screen_file)
313 def restore_console_title(self):
314 if not self.params.get('consoletitle', False):
316 if 'TERM' in os.environ:
317 # Restore the title from stack
318 write_string(u'\033[23;0t', self._screen_file)
321 self.save_console_title()
324 def __exit__(self, *args):
325 self.restore_console_title()
327 if self.params.get('cookiefile') is not None:
328 self.cookiejar.save()
330 def trouble(self, message=None, tb=None):
331 """Determine action to take when a download problem appears.
333 Depending on if the downloader has been configured to ignore
334 download errors or not, this method may throw an exception or
335 not when errors are found, after printing the message.
337 tb, if given, is additional traceback information.
339 if message is not None:
340 self.to_stderr(message)
341 if self.params.get('verbose'):
343 if sys.exc_info()[0]: # if .trouble has been called from an except block
345 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
346 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
347 tb += compat_str(traceback.format_exc())
349 tb_data = traceback.format_list(traceback.extract_stack())
350 tb = u''.join(tb_data)
352 if not self.params.get('ignoreerrors', False):
353 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
354 exc_info = sys.exc_info()[1].exc_info
356 exc_info = sys.exc_info()
357 raise DownloadError(message, exc_info)
358 self._download_retcode = 1
360 def report_warning(self, message):
362 Print the message to stderr, it will be prefixed with 'WARNING:'
363 If stderr is a tty file the 'WARNING:' will be colored
365 if self._err_file.isatty() and os.name != 'nt':
366 _msg_header = u'\033[0;33mWARNING:\033[0m'
368 _msg_header = u'WARNING:'
369 warning_message = u'%s %s' % (_msg_header, message)
370 self.to_stderr(warning_message)
372 def report_error(self, message, tb=None):
374 Do the same as trouble, but prefixes the message with 'ERROR:', colored
375 in red if stderr is a tty file.
377 if self._err_file.isatty() and os.name != 'nt':
378 _msg_header = u'\033[0;31mERROR:\033[0m'
380 _msg_header = u'ERROR:'
381 error_message = u'%s %s' % (_msg_header, message)
382 self.trouble(error_message, tb)
384 def report_file_already_downloaded(self, file_name):
385 """Report file has already been fully downloaded."""
387 self.to_screen(u'[download] %s has already been downloaded' % file_name)
388 except UnicodeEncodeError:
389 self.to_screen(u'[download] The file has already been downloaded')
391 def increment_downloads(self):
392 """Increment the ordinal that assigns a number to each file."""
393 self._num_downloads += 1
395 def prepare_filename(self, info_dict):
396 """Generate the output filename."""
398 template_dict = dict(info_dict)
400 template_dict['epoch'] = int(time.time())
401 autonumber_size = self.params.get('autonumber_size')
402 if autonumber_size is None:
404 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
405 template_dict['autonumber'] = autonumber_templ % self._num_downloads
406 if template_dict.get('playlist_index') is not None:
407 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
409 sanitize = lambda k, v: sanitize_filename(
411 restricted=self.params.get('restrictfilenames'),
413 template_dict = dict((k, sanitize(k, v))
414 for k, v in template_dict.items()
416 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
418 tmpl = os.path.expanduser(self.params['outtmpl'])
419 filename = tmpl % template_dict
421 except ValueError as err:
422 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
425 def _match_entry(self, info_dict):
426 """ Returns None iff the file should be downloaded """
428 video_title = info_dict.get('title', info_dict.get('id', u'video'))
429 if 'title' in info_dict:
430 # This can happen when we're just evaluating the playlist
431 title = info_dict['title']
432 matchtitle = self.params.get('matchtitle', False)
434 if not re.search(matchtitle, title, re.IGNORECASE):
435 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
436 rejecttitle = self.params.get('rejecttitle', False)
438 if re.search(rejecttitle, title, re.IGNORECASE):
439 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
440 date = info_dict.get('upload_date', None)
442 dateRange = self.params.get('daterange', DateRange())
443 if date not in dateRange:
444 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
445 view_count = info_dict.get('view_count', None)
446 if view_count is not None:
447 min_views = self.params.get('min_views')
448 if min_views is not None and view_count < min_views:
449 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
450 max_views = self.params.get('max_views')
451 if max_views is not None and view_count > max_views:
452 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
453 age_limit = self.params.get('age_limit')
454 if age_limit is not None:
455 if age_limit < info_dict.get('age_limit', 0):
456 return u'Skipping "' + title + '" because it is age restricted'
457 if self.in_download_archive(info_dict):
458 return u'%s has already been recorded in archive' % video_title
462 def add_extra_info(info_dict, extra_info):
463 '''Set the keys from extra_info in info dict if they are missing'''
464 for key, value in extra_info.items():
465 info_dict.setdefault(key, value)
467 def extract_info(self, url, download=True, ie_key=None, extra_info={},
470 Returns a list with a dictionary for each video we find.
471 If 'download', also downloads the videos.
472 extra_info is a dict containing the extra values to add to each result
476 ies = [self.get_info_extractor(ie_key)]
481 if not ie.suitable(url):
485 self.report_warning(u'The program functionality for this site has been marked as broken, '
486 u'and will probably not work.')
489 ie_result = ie.extract(url)
490 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
492 if isinstance(ie_result, list):
493 # Backwards compatibility: old IE result format
495 '_type': 'compat_list',
496 'entries': ie_result,
498 self.add_extra_info(ie_result,
500 'extractor': ie.IE_NAME,
502 'webpage_url_basename': url_basename(url),
503 'extractor_key': ie.ie_key(),
506 return self.process_ie_result(ie_result, download, extra_info)
509 except ExtractorError as de: # An error we somewhat expected
510 self.report_error(compat_str(de), de.format_traceback())
512 except Exception as e:
513 if self.params.get('ignoreerrors', False):
514 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
519 self.report_error(u'no suitable InfoExtractor: %s' % url)
521 def process_ie_result(self, ie_result, download=True, extra_info={}):
523 Take the result of the ie(may be modified) and resolve all unresolved
524 references (URLs, playlist items).
526 It will also download the videos if 'download'.
527 Returns the resolved ie_result.
530 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
531 if result_type == 'video':
532 self.add_extra_info(ie_result, extra_info)
533 return self.process_video_result(ie_result, download=download)
534 elif result_type == 'url':
535 # We have to add extra_info to the results because it may be
536 # contained in a playlist
537 return self.extract_info(ie_result['url'],
539 ie_key=ie_result.get('ie_key'),
540 extra_info=extra_info)
541 elif result_type == 'url_transparent':
542 # Use the information from the embedding page
543 info = self.extract_info(
544 ie_result['url'], ie_key=ie_result.get('ie_key'),
545 extra_info=extra_info, download=False, process=False)
547 def make_result(embedded_info):
548 new_result = ie_result.copy()
549 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
550 'entries', 'ie_key', 'duration',
551 'subtitles', 'annotations', 'format',
552 'thumbnail', 'thumbnails'):
555 if f in embedded_info:
556 new_result[f] = embedded_info[f]
558 new_result = make_result(info)
560 assert new_result.get('_type') != 'url_transparent'
561 if new_result.get('_type') == 'compat_list':
562 new_result['entries'] = [
563 make_result(e) for e in new_result['entries']]
565 return self.process_ie_result(
566 new_result, download=download, extra_info=extra_info)
567 elif result_type == 'playlist':
568 # We process each entry in the playlist
569 playlist = ie_result.get('title', None) or ie_result.get('id', None)
570 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
572 playlist_results = []
574 n_all_entries = len(ie_result['entries'])
575 playliststart = self.params.get('playliststart', 1) - 1
576 playlistend = self.params.get('playlistend', None)
577 # For backwards compatibility, interpret -1 as whole list
578 if playlistend == -1:
581 entries = ie_result['entries'][playliststart:playlistend]
582 n_entries = len(entries)
585 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
586 (ie_result['extractor'], playlist, n_all_entries, n_entries))
588 for i, entry in enumerate(entries, 1):
589 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
591 'playlist': playlist,
592 'playlist_index': i + playliststart,
593 'extractor': ie_result['extractor'],
594 'webpage_url': ie_result['webpage_url'],
595 'webpage_url_basename': url_basename(ie_result['webpage_url']),
596 'extractor_key': ie_result['extractor_key'],
599 reason = self._match_entry(entry)
600 if reason is not None:
601 self.to_screen(u'[download] ' + reason)
604 entry_result = self.process_ie_result(entry,
607 playlist_results.append(entry_result)
608 ie_result['entries'] = playlist_results
610 elif result_type == 'compat_list':
612 self.add_extra_info(r,
614 'extractor': ie_result['extractor'],
615 'webpage_url': ie_result['webpage_url'],
616 'webpage_url_basename': url_basename(ie_result['webpage_url']),
617 'extractor_key': ie_result['extractor_key'],
620 ie_result['entries'] = [
621 self.process_ie_result(_fixup(r), download, extra_info)
622 for r in ie_result['entries']
626 raise Exception('Invalid result type: %s' % result_type)
628 def select_format(self, format_spec, available_formats):
629 if format_spec == 'best' or format_spec is None:
630 return available_formats[-1]
631 elif format_spec == 'worst':
632 return available_formats[0]
634 extensions = [u'mp4', u'flv', u'webm', u'3gp']
635 if format_spec in extensions:
636 filter_f = lambda f: f['ext'] == format_spec
638 filter_f = lambda f: f['format_id'] == format_spec
639 matches = list(filter(filter_f, available_formats))
644 def process_video_result(self, info_dict, download=True):
645 assert info_dict.get('_type', 'video') == 'video'
647 if 'playlist' not in info_dict:
648 # It isn't part of a playlist
649 info_dict['playlist'] = None
650 info_dict['playlist_index'] = None
652 # This extractors handle format selection themselves
653 if info_dict['extractor'] in [u'Youku']:
655 self.process_info(info_dict)
658 # We now pick which formats have to be downloaded
659 if info_dict.get('formats') is None:
660 # There's only one format available
661 formats = [info_dict]
663 formats = info_dict['formats']
665 # We check that all the formats have the format and format_id fields
666 for (i, format) in enumerate(formats):
667 if format.get('format_id') is None:
668 format['format_id'] = compat_str(i)
669 if format.get('format') is None:
670 format['format'] = u'{id} - {res}{note}'.format(
671 id=format['format_id'],
672 res=self.format_resolution(format),
673 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
675 # Automatically determine file extension if missing
676 if 'ext' not in format:
677 format['ext'] = determine_ext(format['url'])
679 format_limit = self.params.get('format_limit', None)
681 formats = list(takewhile_inclusive(
682 lambda f: f['format_id'] != format_limit, formats
685 # TODO Central sorting goes here
687 if formats[0] is not info_dict:
688 # only set the 'formats' fields if the original info_dict list them
689 # otherwise we end up with a circular reference, the first (and unique)
690 # element in the 'formats' field in info_dict is info_dict itself,
691 # wich can't be exported to json
692 info_dict['formats'] = formats
693 if self.params.get('listformats', None):
694 self.list_formats(info_dict)
697 req_format = self.params.get('format', 'best')
698 if req_format is None:
700 formats_to_download = []
701 # The -1 is for supporting YoutubeIE
702 if req_format in ('-1', 'all'):
703 formats_to_download = formats
705 # We can accept formats requestd in the format: 34/5/best, we pick
706 # the first that is available, starting from left
707 req_formats = req_format.split('/')
708 for rf in req_formats:
709 if re.match(r'.+?\+.+?', rf) is not None:
710 # Two formats have been requested like '137+139'
711 format_1, format_2 = rf.split('+')
712 formats_info = (self.select_format(format_1, formats),
713 self.select_format(format_2, formats))
714 if all(formats_info):
715 selected_format = {'requested_formats': formats_info}
717 selected_format = None
719 selected_format = self.select_format(rf, formats)
720 if selected_format is not None:
721 formats_to_download = [selected_format]
723 if not formats_to_download:
724 raise ExtractorError(u'requested format not available',
728 if len(formats_to_download) > 1:
729 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
730 for format in formats_to_download:
731 new_info = dict(info_dict)
732 new_info.update(format)
733 self.process_info(new_info)
734 # We update the info dict with the best quality format (backwards compatibility)
735 info_dict.update(formats_to_download[-1])
738 def process_info(self, info_dict):
739 """Process a single resolved IE result."""
741 assert info_dict.get('_type', 'video') == 'video'
742 #We increment the download the download count here to match the previous behaviour.
743 self.increment_downloads()
745 info_dict['fulltitle'] = info_dict['title']
746 if len(info_dict['title']) > 200:
747 info_dict['title'] = info_dict['title'][:197] + u'...'
749 # Keep for backwards compatibility
750 info_dict['stitle'] = info_dict['title']
752 if not 'format' in info_dict:
753 info_dict['format'] = info_dict['ext']
755 reason = self._match_entry(info_dict)
756 if reason is not None:
757 self.to_screen(u'[download] ' + reason)
760 max_downloads = self.params.get('max_downloads')
761 if max_downloads is not None:
762 if self._num_downloads > int(max_downloads):
763 raise MaxDownloadsReached()
765 filename = self.prepare_filename(info_dict)
768 if self.params.get('forcetitle', False):
769 self.to_stdout(info_dict['fulltitle'])
770 if self.params.get('forceid', False):
771 self.to_stdout(info_dict['id'])
772 if self.params.get('forceurl', False):
773 # For RTMP URLs, also include the playpath
774 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
775 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
776 self.to_stdout(info_dict['thumbnail'])
777 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
778 self.to_stdout(info_dict['description'])
779 if self.params.get('forcefilename', False) and filename is not None:
780 self.to_stdout(filename)
781 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
782 self.to_stdout(formatSeconds(info_dict['duration']))
783 if self.params.get('forceformat', False):
784 self.to_stdout(info_dict['format'])
785 if self.params.get('forcejson', False):
786 info_dict['_filename'] = filename
787 self.to_stdout(json.dumps(info_dict))
789 # Do nothing else if in simulate mode
790 if self.params.get('simulate', False):
797 dn = os.path.dirname(encodeFilename(filename))
798 if dn != '' and not os.path.exists(dn):
800 except (OSError, IOError) as err:
801 self.report_error(u'unable to create directory ' + compat_str(err))
804 if self.params.get('writedescription', False):
805 descfn = filename + u'.description'
806 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
807 self.to_screen(u'[info] Video description is already present')
810 self.to_screen(u'[info] Writing video description to: ' + descfn)
811 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
812 descfile.write(info_dict['description'])
813 except (KeyError, TypeError):
814 self.report_warning(u'There\'s no description to write.')
815 except (OSError, IOError):
816 self.report_error(u'Cannot write description file ' + descfn)
819 if self.params.get('writeannotations', False):
820 annofn = filename + u'.annotations.xml'
821 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
822 self.to_screen(u'[info] Video annotations are already present')
825 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
826 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
827 annofile.write(info_dict['annotations'])
828 except (KeyError, TypeError):
829 self.report_warning(u'There are no annotations to write.')
830 except (OSError, IOError):
831 self.report_error(u'Cannot write annotations file: ' + annofn)
834 subtitles_are_requested = any([self.params.get('writesubtitles', False),
835 self.params.get('writeautomaticsub')])
837 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
838 # subtitles download errors are already managed as troubles in relevant IE
839 # that way it will silently go on when used with unsupporting IE
840 subtitles = info_dict['subtitles']
841 sub_format = self.params.get('subtitlesformat', 'srt')
842 for sub_lang in subtitles.keys():
843 sub = subtitles[sub_lang]
847 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
848 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
849 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
851 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
852 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
854 except (OSError, IOError):
855 self.report_error(u'Cannot write subtitles file ' + descfn)
858 if self.params.get('writeinfojson', False):
859 infofn = os.path.splitext(filename)[0] + u'.info.json'
860 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
861 self.to_screen(u'[info] Video description metadata is already present')
863 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
865 write_json_file(info_dict, encodeFilename(infofn))
866 except (OSError, IOError):
867 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
870 if self.params.get('writethumbnail', False):
871 if info_dict.get('thumbnail') is not None:
872 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
873 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
874 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
875 self.to_screen(u'[%s] %s: Thumbnail is already present' %
876 (info_dict['extractor'], info_dict['id']))
878 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
879 (info_dict['extractor'], info_dict['id']))
881 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
882 with open(thumb_filename, 'wb') as thumbf:
883 shutil.copyfileobj(uf, thumbf)
884 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
885 (info_dict['extractor'], info_dict['id'], thumb_filename))
886 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
887 self.report_warning(u'Unable to download thumbnail "%s": %s' %
888 (info_dict['thumbnail'], compat_str(err)))
890 if not self.params.get('skip_download', False):
891 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
896 fd = get_suitable_downloader(info)(self, self.params)
897 for ph in self._progress_hooks:
898 fd.add_progress_hook(ph)
899 return fd.download(name, info)
900 if info_dict.get('requested_formats') is not None:
903 for f in info_dict['requested_formats']:
904 new_info = dict(info_dict)
906 fname = self.prepare_filename(new_info)
907 fname = prepend_extension(fname, 'f%s' % f['format_id'])
908 downloaded.append(fname)
909 partial_success = dl(fname, new_info)
910 success = success and partial_success
911 info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
912 info_dict['__files_to_merge'] = downloaded
915 success = dl(filename, info_dict)
916 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
917 self.report_error(u'unable to download video data: %s' % str(err))
919 except (OSError, IOError) as err:
920 raise UnavailableVideoError(err)
921 except (ContentTooShortError, ) as err:
922 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
927 self.post_process(filename, info_dict)
928 except (PostProcessingError) as err:
929 self.report_error(u'postprocessing: %s' % str(err))
932 self.record_download_archive(info_dict)
934 def download(self, url_list):
935 """Download a given list of URLs."""
936 if (len(url_list) > 1 and
937 '%' not in self.params['outtmpl']
938 and self.params.get('max_downloads') != 1):
939 raise SameFileError(self.params['outtmpl'])
943 #It also downloads the videos
944 self.extract_info(url)
945 except UnavailableVideoError:
946 self.report_error(u'unable to download video')
947 except MaxDownloadsReached:
948 self.to_screen(u'[info] Maximum number of downloaded files reached.')
951 return self._download_retcode
953 def download_with_info_file(self, info_filename):
954 with io.open(info_filename, 'r', encoding='utf-8') as f:
957 self.process_ie_result(info, download=True)
958 except DownloadError:
959 webpage_url = info.get('webpage_url')
960 if webpage_url is not None:
961 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
962 return self.download([webpage_url])
965 return self._download_retcode
967 def post_process(self, filename, ie_info):
968 """Run all the postprocessors on the given file."""
970 info['filepath'] = filename
973 if ie_info.get('__postprocessors') is not None:
974 pps_chain.extend(ie_info['__postprocessors'])
975 pps_chain.extend(self._pps)
978 keep_video_wish, new_info = pp.run(info)
979 if keep_video_wish is not None:
981 keep_video = keep_video_wish
982 elif keep_video is None:
983 # No clear decision yet, let IE decide
984 keep_video = keep_video_wish
985 except PostProcessingError as e:
986 self.report_error(e.msg)
987 if keep_video is False and not self.params.get('keepvideo', False):
989 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
990 os.remove(encodeFilename(filename))
991 except (IOError, OSError):
992 self.report_warning(u'Unable to remove downloaded video file')
994 def _make_archive_id(self, info_dict):
995 # Future-proof against any change in case
996 # and backwards compatibility with prior versions
997 extractor = info_dict.get('extractor_key')
998 if extractor is None:
999 if 'id' in info_dict:
1000 extractor = info_dict.get('ie_key') # key in a playlist
1001 if extractor is None:
1002 return None # Incomplete video information
1003 return extractor.lower() + u' ' + info_dict['id']
1005 def in_download_archive(self, info_dict):
1006 fn = self.params.get('download_archive')
1010 vid_id = self._make_archive_id(info_dict)
1012 return False # Incomplete video information
1015 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1016 for line in archive_file:
1017 if line.strip() == vid_id:
1019 except IOError as ioe:
1020 if ioe.errno != errno.ENOENT:
1024 def record_download_archive(self, info_dict):
1025 fn = self.params.get('download_archive')
1028 vid_id = self._make_archive_id(info_dict)
1030 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1031 archive_file.write(vid_id + u'\n')
1034 def format_resolution(format, default='unknown'):
1035 if format.get('vcodec') == 'none':
1037 if format.get('resolution') is not None:
1038 return format['resolution']
1039 if format.get('height') is not None:
1040 if format.get('width') is not None:
1041 res = u'%sx%s' % (format['width'], format['height'])
1043 res = u'%sp' % format['height']
1044 elif format.get('width') is not None:
1045 res = u'?x%d' % format['width']
1050 def list_formats(self, info_dict):
1051 def format_note(fdict):
1053 if fdict.get('ext') in ['f4f', 'f4m']:
1054 res += u'(unsupported) '
1055 if fdict.get('format_note') is not None:
1056 res += fdict['format_note'] + u' '
1057 if fdict.get('tbr') is not None:
1058 res += u'%4dk ' % fdict['tbr']
1059 if (fdict.get('vcodec') is not None and
1060 fdict.get('vcodec') != 'none'):
1061 res += u'%-5s@' % fdict['vcodec']
1062 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1064 if fdict.get('vbr') is not None:
1065 res += u'%4dk' % fdict['vbr']
1066 if fdict.get('acodec') is not None:
1069 res += u'%-5s' % fdict['acodec']
1070 elif fdict.get('abr') is not None:
1074 if fdict.get('abr') is not None:
1075 res += u'@%3dk' % fdict['abr']
1076 if fdict.get('filesize') is not None:
1079 res += format_bytes(fdict['filesize'])
1082 def line(format, idlen=20):
1083 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1084 format['format_id'],
1086 self.format_resolution(format),
1087 format_note(format),
1090 formats = info_dict.get('formats', [info_dict])
1091 idlen = max(len(u'format code'),
1092 max(len(f['format_id']) for f in formats))
1093 formats_s = [line(f, idlen) for f in formats]
1094 if len(formats) > 1:
1095 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1096 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1098 header_line = line({
1099 'format_id': u'format code', 'ext': u'extension',
1100 'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1101 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1102 (info_dict['id'], header_line, u"\n".join(formats_s)))
1104 def urlopen(self, req):
1105 """ Start an HTTP download """
1106 return self._opener.open(req)
1108 def print_debug_header(self):
1109 if not self.params.get('verbose'):
1111 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1113 sp = subprocess.Popen(
1114 ['git', 'rev-parse', '--short', 'HEAD'],
1115 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1116 cwd=os.path.dirname(os.path.abspath(__file__)))
1117 out, err = sp.communicate()
1118 out = out.decode().strip()
1119 if re.match('[0-9a-f]+', out):
1120 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1126 write_string(u'[debug] Python version %s - %s' %
1127 (platform.python_version(), platform_name()) + u'\n')
1130 for handler in self._opener.handlers:
1131 if hasattr(handler, 'proxies'):
1132 proxy_map.update(handler.proxies)
1133 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1135 def _setup_opener(self):
1136 timeout_val = self.params.get('socket_timeout')
1137 timeout = 600 if timeout_val is None else float(timeout_val)
1139 opts_cookiefile = self.params.get('cookiefile')
1140 opts_proxy = self.params.get('proxy')
1142 if opts_cookiefile is None:
1143 self.cookiejar = compat_cookiejar.CookieJar()
1145 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1147 if os.access(opts_cookiefile, os.R_OK):
1148 self.cookiejar.load()
1150 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1152 if opts_proxy is not None:
1153 if opts_proxy == '':
1156 proxies = {'http': opts_proxy, 'https': opts_proxy}
1158 proxies = compat_urllib_request.getproxies()
1159 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1160 if 'http' in proxies and 'https' not in proxies:
1161 proxies['https'] = proxies['http']
1162 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1164 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1165 https_handler = make_HTTPS_handler(
1166 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1167 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1168 opener = compat_urllib_request.build_opener(
1169 https_handler, proxy_handler, cookie_processor, ydlh)
1170 # Delete the default user-agent header, which would otherwise apply in
1171 # cases where our custom HTTP handler doesn't come into play
1172 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1173 opener.addheaders = []
1174 self._opener = opener
1176 # TODO remove this global modification
1177 compat_urllib_request.install_opener(opener)
1178 socket.setdefaulttimeout(timeout)