2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
48 UnavailableVideoError,
53 from .extractor import get_info_extractor, gen_extractors
54 from .FileDownloader import FileDownloader
55 from .version import __version__
58 class YoutubeDL(object):
61 YoutubeDL objects are the ones responsible of downloading the
62 actual video file and writing it to disk if the user has requested
63 it, among some other tasks. In most cases there should be one per
64 program. As, given a video URL, the downloader doesn't know how to
65 extract all the needed information, task that InfoExtractors do, it
66 has to pass the URL to one of them.
68 For this, YoutubeDL objects have a method that allows
69 InfoExtractors to be registered in a given order. When it is passed
70 a URL, the YoutubeDL object handles it to the first InfoExtractor it
71 finds that reports being able to handle it. The InfoExtractor extracts
72 all the information about the video or videos the URL refers to, and
73 YoutubeDL process the extracted information, possibly using a File
74 Downloader to download the video.
76 YoutubeDL objects accept a lot of parameters. In order not to saturate
77 the object constructor with arguments, it receives a dictionary of
78 options instead. These options are available through the params
79 attribute for the InfoExtractors to use. The YoutubeDL also
80 registers itself as the downloader in charge for the InfoExtractors
81 that are added to it, so this is a "mutual registration".
85 username: Username for authentication purposes.
86 password: Password for authentication purposes.
87 videopassword: Password for acces a video.
88 usenetrc: Use netrc for authentication instead.
89 verbose: Print additional info to stdout.
90 quiet: Do not print messages to stdout.
91 forceurl: Force printing final URL.
92 forcetitle: Force printing title.
93 forceid: Force printing ID.
94 forcethumbnail: Force printing thumbnail URL.
95 forcedescription: Force printing description.
96 forcefilename: Force printing final filename.
97 forcejson: Force printing info_dict as JSON.
98 simulate: Do not download the video files.
99 format: Video format code.
100 format_limit: Highest quality format to try.
101 outtmpl: Template for output names.
102 restrictfilenames: Do not allow "&" and spaces in file names
103 ignoreerrors: Do not stop on download errors.
104 nooverwrites: Prevent overwriting files.
105 playliststart: Playlist item to start at.
106 playlistend: Playlist item to end at.
107 matchtitle: Download only matching titles.
108 rejecttitle: Reject downloads for matching titles.
109 logger: Log messages to a logging.Logger instance.
110 logtostderr: Log messages to stderr instead of stdout.
111 writedescription: Write the video description to a .description file
112 writeinfojson: Write the video description to a .info.json file
113 writeannotations: Write the video annotations to a .annotations.xml file
114 writethumbnail: Write the thumbnail image to a file
115 writesubtitles: Write the video subtitles to a file
116 writeautomaticsub: Write the automatic subtitles to a file
117 allsubtitles: Downloads all the subtitles of the video
118 (requires writesubtitles or writeautomaticsub)
119 listsubtitles: Lists all available subtitles for the video
120 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
121 subtitleslangs: List of languages of the subtitles to download
122 keepvideo: Keep the video file after post-processing
123 daterange: A DateRange object, download only if the upload_date is in the range.
124 skip_download: Skip the actual download of the video file
125 cachedir: Location of the cache files in the filesystem.
126 None to disable filesystem cache.
127 noplaylist: Download single video instead of a playlist if in doubt.
128 age_limit: An integer representing the user's age in years.
129 Unsuitable videos for the given age are skipped.
130 download_archive: File name of a file where all downloads are recorded.
131 Videos already present in the file are not downloaded
133 cookiefile: File name where cookies should be read from and dumped to.
134 nocheckcertificate:Do not verify SSL certificates
135 proxy: URL of the proxy server to use
136 socket_timeout: Time to wait for unresponsive hosts, in seconds
137 bidi_workaround: Work around buggy terminals without bidirectional text
138 support, using fridibi
140 The following parameters are not used by YoutubeDL itself, they are used by
142 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
143 noresizebuffer, retries, continuedl, noprogress, consoletitle
149 _download_retcode = None
150 _num_downloads = None
153 def __init__(self, params=None):
154 """Create a FileDownloader object with the given options."""
156 self._ies_instances = {}
158 self._fd_progress_hooks = []
159 self._download_retcode = 0
160 self._num_downloads = 0
161 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
162 self._err_file = sys.stderr
163 self.params = {} if params is None else params
165 if params.get('bidi_workaround', False):
168 master, slave = pty.openpty()
169 width = get_term_width()
173 width_args = ['-w', str(width)]
174 self._fribidi = subprocess.Popen(
175 ['fribidi', '-c', 'UTF-8'] + width_args,
176 stdin=subprocess.PIPE,
178 stderr=self._err_file)
179 self._fribidi_channel = os.fdopen(master, 'rb')
180 except OSError as ose:
182 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
186 if (sys.version_info >= (3,) and sys.platform != 'win32' and
187 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
188 and not params['restrictfilenames']):
189 # On Python 3, the Unicode filesystem API will throw errors (#1474)
191 u'Assuming --restrict-filenames since file system encoding '
192 u'cannot encode all charactes. '
193 u'Set the LC_ALL environment variable to fix this.')
194 self.params['restrictfilenames'] = True
196 if '%(stitle)s' in self.params.get('outtmpl', ''):
197 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
201 def add_info_extractor(self, ie):
202 """Add an InfoExtractor object to the end of the list."""
204 self._ies_instances[ie.ie_key()] = ie
205 ie.set_downloader(self)
207 def get_info_extractor(self, ie_key):
209 Get an instance of an IE with name ie_key, it will try to get one from
210 the _ies list, if there's no instance it will create a new one and add
211 it to the extractor list.
213 ie = self._ies_instances.get(ie_key)
215 ie = get_info_extractor(ie_key)()
216 self.add_info_extractor(ie)
219 def add_default_info_extractors(self):
221 Add the InfoExtractors returned by gen_extractors to the end of the list
223 for ie in gen_extractors():
224 self.add_info_extractor(ie)
226 def add_post_processor(self, pp):
227 """Add a PostProcessor object to the end of the chain."""
229 pp.set_downloader(self)
231 def add_downloader_progress_hook(self, ph):
232 """Add the progress hook to the file downloader"""
233 self._fd_progress_hooks.append(ph)
235 def _bidi_workaround(self, message):
236 if not hasattr(self, '_fribidi_channel'):
239 assert type(message) == type(u'')
240 line_count = message.count(u'\n') + 1
241 self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
242 self._fribidi.stdin.flush()
243 res = u''.join(self._fribidi_channel.readline().decode('utf-8')
244 for _ in range(line_count))
245 return res[:-len(u'\n')]
247 def to_screen(self, message, skip_eol=False):
248 """Print message to stdout if not in quiet mode."""
249 return self.to_stdout(message, skip_eol, check_quiet=True)
251 def to_stdout(self, message, skip_eol=False, check_quiet=False):
252 """Print message to stdout if not in quiet mode."""
253 if self.params.get('logger'):
254 self.params['logger'].debug(message)
255 elif not check_quiet or not self.params.get('quiet', False):
256 message = self._bidi_workaround(message)
257 terminator = [u'\n', u''][skip_eol]
258 output = message + terminator
260 write_string(output, self._screen_file)
262 def to_stderr(self, message):
263 """Print message to stderr."""
264 assert type(message) == type(u'')
265 if self.params.get('logger'):
266 self.params['logger'].error(message)
268 message = self._bidi_workaround(message)
269 output = message + u'\n'
270 write_string(output, self._err_file)
272 def to_console_title(self, message):
273 if not self.params.get('consoletitle', False):
275 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
276 # c_wchar_p() might not be necessary if `message` is
277 # already of type unicode()
278 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
279 elif 'TERM' in os.environ:
280 write_string(u'\033]0;%s\007' % message, self._screen_file)
282 def save_console_title(self):
283 if not self.params.get('consoletitle', False):
285 if 'TERM' in os.environ:
286 # Save the title on stack
287 write_string(u'\033[22;0t', self._screen_file)
289 def restore_console_title(self):
290 if not self.params.get('consoletitle', False):
292 if 'TERM' in os.environ:
293 # Restore the title from stack
294 write_string(u'\033[23;0t', self._screen_file)
297 self.save_console_title()
300 def __exit__(self, *args):
301 self.restore_console_title()
303 if self.params.get('cookiefile') is not None:
304 self.cookiejar.save()
306 def trouble(self, message=None, tb=None):
307 """Determine action to take when a download problem appears.
309 Depending on if the downloader has been configured to ignore
310 download errors or not, this method may throw an exception or
311 not when errors are found, after printing the message.
313 tb, if given, is additional traceback information.
315 if message is not None:
316 self.to_stderr(message)
317 if self.params.get('verbose'):
319 if sys.exc_info()[0]: # if .trouble has been called from an except block
321 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
322 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
323 tb += compat_str(traceback.format_exc())
325 tb_data = traceback.format_list(traceback.extract_stack())
326 tb = u''.join(tb_data)
328 if not self.params.get('ignoreerrors', False):
329 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
330 exc_info = sys.exc_info()[1].exc_info
332 exc_info = sys.exc_info()
333 raise DownloadError(message, exc_info)
334 self._download_retcode = 1
336 def report_warning(self, message):
338 Print the message to stderr, it will be prefixed with 'WARNING:'
339 If stderr is a tty file the 'WARNING:' will be colored
341 if self._err_file.isatty() and os.name != 'nt':
342 _msg_header = u'\033[0;33mWARNING:\033[0m'
344 _msg_header = u'WARNING:'
345 warning_message = u'%s %s' % (_msg_header, message)
346 self.to_stderr(warning_message)
348 def report_error(self, message, tb=None):
350 Do the same as trouble, but prefixes the message with 'ERROR:', colored
351 in red if stderr is a tty file.
353 if self._err_file.isatty() and os.name != 'nt':
354 _msg_header = u'\033[0;31mERROR:\033[0m'
356 _msg_header = u'ERROR:'
357 error_message = u'%s %s' % (_msg_header, message)
358 self.trouble(error_message, tb)
360 def report_writedescription(self, descfn):
361 """ Report that the description file is being written """
362 self.to_screen(u'[info] Writing video description to: ' + descfn)
364 def report_writesubtitles(self, sub_filename):
365 """ Report that the subtitles file is being written """
366 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
368 def report_writeinfojson(self, infofn):
369 """ Report that the metadata file has been written """
370 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
372 def report_writeannotations(self, annofn):
373 """ Report that the annotations file has been written. """
374 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
376 def report_file_already_downloaded(self, file_name):
377 """Report file has already been fully downloaded."""
379 self.to_screen(u'[download] %s has already been downloaded' % file_name)
380 except UnicodeEncodeError:
381 self.to_screen(u'[download] The file has already been downloaded')
383 def increment_downloads(self):
384 """Increment the ordinal that assigns a number to each file."""
385 self._num_downloads += 1
387 def prepare_filename(self, info_dict):
388 """Generate the output filename."""
390 template_dict = dict(info_dict)
392 template_dict['epoch'] = int(time.time())
393 autonumber_size = self.params.get('autonumber_size')
394 if autonumber_size is None:
396 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
397 template_dict['autonumber'] = autonumber_templ % self._num_downloads
398 if template_dict.get('playlist_index') is not None:
399 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
401 sanitize = lambda k, v: sanitize_filename(
403 restricted=self.params.get('restrictfilenames'),
405 template_dict = dict((k, sanitize(k, v))
406 for k, v in template_dict.items()
408 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
410 tmpl = os.path.expanduser(self.params['outtmpl'])
411 filename = tmpl % template_dict
413 except ValueError as err:
414 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
417 def _match_entry(self, info_dict):
418 """ Returns None iff the file should be downloaded """
420 if 'title' in info_dict:
421 # This can happen when we're just evaluating the playlist
422 title = info_dict['title']
423 matchtitle = self.params.get('matchtitle', False)
425 if not re.search(matchtitle, title, re.IGNORECASE):
426 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
427 rejecttitle = self.params.get('rejecttitle', False)
429 if re.search(rejecttitle, title, re.IGNORECASE):
430 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
431 date = info_dict.get('upload_date', None)
433 dateRange = self.params.get('daterange', DateRange())
434 if date not in dateRange:
435 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
436 age_limit = self.params.get('age_limit')
437 if age_limit is not None:
438 if age_limit < info_dict.get('age_limit', 0):
439 return u'Skipping "' + title + '" because it is age restricted'
440 if self.in_download_archive(info_dict):
441 return (u'%s has already been recorded in archive'
442 % info_dict.get('title', info_dict.get('id', u'video')))
446 def add_extra_info(info_dict, extra_info):
447 '''Set the keys from extra_info in info dict if they are missing'''
448 for key, value in extra_info.items():
449 info_dict.setdefault(key, value)
451 def extract_info(self, url, download=True, ie_key=None, extra_info={},
454 Returns a list with a dictionary for each video we find.
455 If 'download', also downloads the videos.
456 extra_info is a dict containing the extra values to add to each result
460 ies = [self.get_info_extractor(ie_key)]
465 if not ie.suitable(url):
469 self.report_warning(u'The program functionality for this site has been marked as broken, '
470 u'and will probably not work.')
473 ie_result = ie.extract(url)
474 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
476 if isinstance(ie_result, list):
477 # Backwards compatibility: old IE result format
479 '_type': 'compat_list',
480 'entries': ie_result,
482 self.add_extra_info(ie_result,
484 'extractor': ie.IE_NAME,
486 'extractor_key': ie.ie_key(),
489 return self.process_ie_result(ie_result, download, extra_info)
492 except ExtractorError as de: # An error we somewhat expected
493 self.report_error(compat_str(de), de.format_traceback())
495 except Exception as e:
496 if self.params.get('ignoreerrors', False):
497 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
502 self.report_error(u'no suitable InfoExtractor: %s' % url)
504 def process_ie_result(self, ie_result, download=True, extra_info={}):
506 Take the result of the ie(may be modified) and resolve all unresolved
507 references (URLs, playlist items).
509 It will also download the videos if 'download'.
510 Returns the resolved ie_result.
513 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
514 if result_type == 'video':
515 self.add_extra_info(ie_result, extra_info)
516 return self.process_video_result(ie_result, download=download)
517 elif result_type == 'url':
518 # We have to add extra_info to the results because it may be
519 # contained in a playlist
520 return self.extract_info(ie_result['url'],
522 ie_key=ie_result.get('ie_key'),
523 extra_info=extra_info)
524 elif result_type == 'url_transparent':
525 # Use the information from the embedding page
526 info = self.extract_info(
527 ie_result['url'], ie_key=ie_result.get('ie_key'),
528 extra_info=extra_info, download=False, process=False)
530 def make_result(embedded_info):
531 new_result = ie_result.copy()
532 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
533 'entries', 'urlhandle', 'ie_key', 'duration',
534 'subtitles', 'annotations', 'format',
535 'thumbnail', 'thumbnails'):
538 if f in embedded_info:
539 new_result[f] = embedded_info[f]
541 new_result = make_result(info)
543 assert new_result.get('_type') != 'url_transparent'
544 if new_result.get('_type') == 'compat_list':
545 new_result['entries'] = [
546 make_result(e) for e in new_result['entries']]
548 return self.process_ie_result(
549 new_result, download=download, extra_info=extra_info)
550 elif result_type == 'playlist':
551 # We process each entry in the playlist
552 playlist = ie_result.get('title', None) or ie_result.get('id', None)
553 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
555 playlist_results = []
557 n_all_entries = len(ie_result['entries'])
558 playliststart = self.params.get('playliststart', 1) - 1
559 playlistend = self.params.get('playlistend', -1)
561 if playlistend == -1:
562 entries = ie_result['entries'][playliststart:]
564 entries = ie_result['entries'][playliststart:playlistend]
566 n_entries = len(entries)
568 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
569 (ie_result['extractor'], playlist, n_all_entries, n_entries))
571 for i, entry in enumerate(entries, 1):
572 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
574 'playlist': playlist,
575 'playlist_index': i + playliststart,
576 'extractor': ie_result['extractor'],
577 'webpage_url': ie_result['webpage_url'],
578 'extractor_key': ie_result['extractor_key'],
581 reason = self._match_entry(entry)
582 if reason is not None:
583 self.to_screen(u'[download] ' + reason)
586 entry_result = self.process_ie_result(entry,
589 playlist_results.append(entry_result)
590 ie_result['entries'] = playlist_results
592 elif result_type == 'compat_list':
594 self.add_extra_info(r,
596 'extractor': ie_result['extractor'],
597 'webpage_url': ie_result['webpage_url'],
598 'extractor_key': ie_result['extractor_key'],
601 ie_result['entries'] = [
602 self.process_ie_result(_fixup(r), download, extra_info)
603 for r in ie_result['entries']
607 raise Exception('Invalid result type: %s' % result_type)
609 def select_format(self, format_spec, available_formats):
610 if format_spec == 'best' or format_spec is None:
611 return available_formats[-1]
612 elif format_spec == 'worst':
613 return available_formats[0]
615 extensions = [u'mp4', u'flv', u'webm', u'3gp']
616 if format_spec in extensions:
617 filter_f = lambda f: f['ext'] == format_spec
619 filter_f = lambda f: f['format_id'] == format_spec
620 matches = list(filter(filter_f, available_formats))
625 def process_video_result(self, info_dict, download=True):
626 assert info_dict.get('_type', 'video') == 'video'
628 if 'playlist' not in info_dict:
629 # It isn't part of a playlist
630 info_dict['playlist'] = None
631 info_dict['playlist_index'] = None
633 # This extractors handle format selection themselves
634 if info_dict['extractor'] in [u'youtube', u'Youku']:
636 self.process_info(info_dict)
639 # We now pick which formats have to be downloaded
640 if info_dict.get('formats') is None:
641 # There's only one format available
642 formats = [info_dict]
644 formats = info_dict['formats']
646 # We check that all the formats have the format and format_id fields
647 for (i, format) in enumerate(formats):
648 if format.get('format_id') is None:
649 format['format_id'] = compat_str(i)
650 if format.get('format') is None:
651 format['format'] = u'{id} - {res}{note}'.format(
652 id=format['format_id'],
653 res=self.format_resolution(format),
654 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
656 # Automatically determine file extension if missing
657 if 'ext' not in format:
658 format['ext'] = determine_ext(format['url'])
660 if self.params.get('listformats', None):
661 self.list_formats(info_dict)
664 format_limit = self.params.get('format_limit', None)
666 formats = list(takewhile_inclusive(
667 lambda f: f['format_id'] != format_limit, formats
669 if self.params.get('prefer_free_formats'):
670 def _free_formats_key(f):
672 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
675 # We only compare the extension if they have the same height and width
676 return (f.get('height'), f.get('width'), ext_ord)
677 formats = sorted(formats, key=_free_formats_key)
679 req_format = self.params.get('format', 'best')
680 if req_format is None:
682 formats_to_download = []
683 # The -1 is for supporting YoutubeIE
684 if req_format in ('-1', 'all'):
685 formats_to_download = formats
687 # We can accept formats requestd in the format: 34/5/best, we pick
688 # the first that is available, starting from left
689 req_formats = req_format.split('/')
690 for rf in req_formats:
691 selected_format = self.select_format(rf, formats)
692 if selected_format is not None:
693 formats_to_download = [selected_format]
695 if not formats_to_download:
696 raise ExtractorError(u'requested format not available',
700 if len(formats_to_download) > 1:
701 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
702 for format in formats_to_download:
703 new_info = dict(info_dict)
704 new_info.update(format)
705 self.process_info(new_info)
706 # We update the info dict with the best quality format (backwards compatibility)
707 info_dict.update(formats_to_download[-1])
710 def process_info(self, info_dict):
711 """Process a single resolved IE result."""
713 assert info_dict.get('_type', 'video') == 'video'
714 #We increment the download the download count here to match the previous behaviour.
715 self.increment_downloads()
717 info_dict['fulltitle'] = info_dict['title']
718 if len(info_dict['title']) > 200:
719 info_dict['title'] = info_dict['title'][:197] + u'...'
721 # Keep for backwards compatibility
722 info_dict['stitle'] = info_dict['title']
724 if not 'format' in info_dict:
725 info_dict['format'] = info_dict['ext']
727 reason = self._match_entry(info_dict)
728 if reason is not None:
729 self.to_screen(u'[download] ' + reason)
732 max_downloads = self.params.get('max_downloads')
733 if max_downloads is not None:
734 if self._num_downloads > int(max_downloads):
735 raise MaxDownloadsReached()
737 filename = self.prepare_filename(info_dict)
740 if self.params.get('forcetitle', False):
741 self.to_stdout(info_dict['fulltitle'])
742 if self.params.get('forceid', False):
743 self.to_stdout(info_dict['id'])
744 if self.params.get('forceurl', False):
745 # For RTMP URLs, also include the playpath
746 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
747 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
748 self.to_stdout(info_dict['thumbnail'])
749 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
750 self.to_stdout(info_dict['description'])
751 if self.params.get('forcefilename', False) and filename is not None:
752 self.to_stdout(filename)
753 if self.params.get('forceformat', False):
754 self.to_stdout(info_dict['format'])
755 if self.params.get('forcejson', False):
756 info_dict['_filename'] = filename
757 self.to_stdout(json.dumps(info_dict))
759 # Do nothing else if in simulate mode
760 if self.params.get('simulate', False):
767 dn = os.path.dirname(encodeFilename(filename))
768 if dn != '' and not os.path.exists(dn):
770 except (OSError, IOError) as err:
771 self.report_error(u'unable to create directory ' + compat_str(err))
774 if self.params.get('writedescription', False):
776 descfn = filename + u'.description'
777 self.report_writedescription(descfn)
778 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
779 descfile.write(info_dict['description'])
780 except (KeyError, TypeError):
781 self.report_warning(u'There\'s no description to write.')
782 except (OSError, IOError):
783 self.report_error(u'Cannot write description file ' + descfn)
786 if self.params.get('writeannotations', False):
788 annofn = filename + u'.annotations.xml'
789 self.report_writeannotations(annofn)
790 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
791 annofile.write(info_dict['annotations'])
792 except (KeyError, TypeError):
793 self.report_warning(u'There are no annotations to write.')
794 except (OSError, IOError):
795 self.report_error(u'Cannot write annotations file: ' + annofn)
798 subtitles_are_requested = any([self.params.get('writesubtitles', False),
799 self.params.get('writeautomaticsub')])
801 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
802 # subtitles download errors are already managed as troubles in relevant IE
803 # that way it will silently go on when used with unsupporting IE
804 subtitles = info_dict['subtitles']
805 sub_format = self.params.get('subtitlesformat', 'srt')
806 for sub_lang in subtitles.keys():
807 sub = subtitles[sub_lang]
811 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
812 self.report_writesubtitles(sub_filename)
813 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
815 except (OSError, IOError):
816 self.report_error(u'Cannot write subtitles file ' + descfn)
819 if self.params.get('writeinfojson', False):
820 infofn = os.path.splitext(filename)[0] + u'.info.json'
821 self.report_writeinfojson(infofn)
823 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
824 write_json_file(json_info_dict, encodeFilename(infofn))
825 except (OSError, IOError):
826 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
829 if self.params.get('writethumbnail', False):
830 if info_dict.get('thumbnail') is not None:
831 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
832 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
833 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
834 (info_dict['extractor'], info_dict['id']))
836 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
837 with open(thumb_filename, 'wb') as thumbf:
838 shutil.copyfileobj(uf, thumbf)
839 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
840 (info_dict['extractor'], info_dict['id'], thumb_filename))
841 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
842 self.report_warning(u'Unable to download thumbnail "%s": %s' %
843 (info_dict['thumbnail'], compat_str(err)))
845 if not self.params.get('skip_download', False):
846 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
850 fd = FileDownloader(self, self.params)
851 for ph in self._fd_progress_hooks:
852 fd.add_progress_hook(ph)
853 success = fd._do_download(filename, info_dict)
854 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
855 self.report_error(u'unable to download video data: %s' % str(err))
857 except (OSError, IOError) as err:
858 raise UnavailableVideoError(err)
859 except (ContentTooShortError, ) as err:
860 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
865 self.post_process(filename, info_dict)
866 except (PostProcessingError) as err:
867 self.report_error(u'postprocessing: %s' % str(err))
870 self.record_download_archive(info_dict)
872 def download(self, url_list):
873 """Download a given list of URLs."""
874 if (len(url_list) > 1 and
875 '%' not in self.params['outtmpl']
876 and self.params.get('max_downloads') != 1):
877 raise SameFileError(self.params['outtmpl'])
881 #It also downloads the videos
882 self.extract_info(url)
883 except UnavailableVideoError:
884 self.report_error(u'unable to download video')
885 except MaxDownloadsReached:
886 self.to_screen(u'[info] Maximum number of downloaded files reached.')
889 return self._download_retcode
891 def download_with_info_file(self, info_filename):
892 with io.open(info_filename, 'r', encoding='utf-8') as f:
895 self.process_ie_result(info, download=True)
896 except DownloadError:
897 webpage_url = info.get('webpage_url')
898 if webpage_url is not None:
899 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
900 return self.download([webpage_url])
903 return self._download_retcode
905 def post_process(self, filename, ie_info):
906 """Run all the postprocessors on the given file."""
908 info['filepath'] = filename
912 keep_video_wish, new_info = pp.run(info)
913 if keep_video_wish is not None:
915 keep_video = keep_video_wish
916 elif keep_video is None:
917 # No clear decision yet, let IE decide
918 keep_video = keep_video_wish
919 except PostProcessingError as e:
920 self.report_error(e.msg)
921 if keep_video is False and not self.params.get('keepvideo', False):
923 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
924 os.remove(encodeFilename(filename))
925 except (IOError, OSError):
926 self.report_warning(u'Unable to remove downloaded video file')
928 def _make_archive_id(self, info_dict):
929 # Future-proof against any change in case
930 # and backwards compatibility with prior versions
931 extractor = info_dict.get('extractor_key')
932 if extractor is None:
933 if 'id' in info_dict:
934 extractor = info_dict.get('ie_key') # key in a playlist
935 if extractor is None:
936 return None # Incomplete video information
937 return extractor.lower() + u' ' + info_dict['id']
939 def in_download_archive(self, info_dict):
940 fn = self.params.get('download_archive')
944 vid_id = self._make_archive_id(info_dict)
946 return False # Incomplete video information
949 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
950 for line in archive_file:
951 if line.strip() == vid_id:
953 except IOError as ioe:
954 if ioe.errno != errno.ENOENT:
958 def record_download_archive(self, info_dict):
959 fn = self.params.get('download_archive')
962 vid_id = self._make_archive_id(info_dict)
964 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
965 archive_file.write(vid_id + u'\n')
968 def format_resolution(format, default='unknown'):
969 if format.get('vcodec') == 'none':
971 if format.get('_resolution') is not None:
972 return format['_resolution']
973 if format.get('height') is not None:
974 if format.get('width') is not None:
975 res = u'%sx%s' % (format['width'], format['height'])
977 res = u'%sp' % format['height']
982 def list_formats(self, info_dict):
983 def format_note(fdict):
985 if fdict.get('format_note') is not None:
986 res += fdict['format_note'] + u' '
987 if (fdict.get('vcodec') is not None and
988 fdict.get('vcodec') != 'none'):
989 res += u'%-5s' % fdict['vcodec']
990 elif fdict.get('vbr') is not None:
992 if fdict.get('vbr') is not None:
993 res += u'@%4dk' % fdict['vbr']
994 if fdict.get('acodec') is not None:
997 res += u'%-5s' % fdict['acodec']
998 elif fdict.get('abr') is not None:
1002 if fdict.get('abr') is not None:
1003 res += u'@%3dk' % fdict['abr']
1004 if fdict.get('filesize') is not None:
1007 res += format_bytes(fdict['filesize'])
1010 def line(format, idlen=20):
1011 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1012 format['format_id'],
1014 self.format_resolution(format),
1015 format_note(format),
1018 formats = info_dict.get('formats', [info_dict])
1019 idlen = max(len(u'format code'),
1020 max(len(f['format_id']) for f in formats))
1021 formats_s = [line(f, idlen) for f in formats]
1022 if len(formats) > 1:
1023 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1024 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1026 header_line = line({
1027 'format_id': u'format code', 'ext': u'extension',
1028 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1029 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1030 (info_dict['id'], header_line, u"\n".join(formats_s)))
1032 def urlopen(self, req):
1033 """ Start an HTTP download """
1034 return self._opener.open(req)
1036 def print_debug_header(self):
1037 if not self.params.get('verbose'):
1039 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1041 sp = subprocess.Popen(
1042 ['git', 'rev-parse', '--short', 'HEAD'],
1043 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1044 cwd=os.path.dirname(os.path.abspath(__file__)))
1045 out, err = sp.communicate()
1046 out = out.decode().strip()
1047 if re.match('[0-9a-f]+', out):
1048 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1054 write_string(u'[debug] Python version %s - %s' %
1055 (platform.python_version(), platform_name()) + u'\n')
1058 for handler in self._opener.handlers:
1059 if hasattr(handler, 'proxies'):
1060 proxy_map.update(handler.proxies)
1061 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1063 def _setup_opener(self):
1064 timeout_val = self.params.get('socket_timeout')
1065 timeout = 600 if timeout_val is None else float(timeout_val)
1067 opts_cookiefile = self.params.get('cookiefile')
1068 opts_proxy = self.params.get('proxy')
1070 if opts_cookiefile is None:
1071 self.cookiejar = compat_cookiejar.CookieJar()
1073 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1075 if os.access(opts_cookiefile, os.R_OK):
1076 self.cookiejar.load()
1078 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1080 if opts_proxy is not None:
1081 if opts_proxy == '':
1084 proxies = {'http': opts_proxy, 'https': opts_proxy}
1086 proxies = compat_urllib_request.getproxies()
1087 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1088 if 'http' in proxies and 'https' not in proxies:
1089 proxies['https'] = proxies['http']
1090 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1091 https_handler = make_HTTPS_handler(
1092 self.params.get('nocheckcertificate', False))
1093 opener = compat_urllib_request.build_opener(
1094 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1095 # Delete the default user-agent header, which would otherwise apply in
1096 # cases where our custom HTTP handler doesn't come into play
1097 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1098 opener.addheaders = []
1099 self._opener = opener
1101 # TODO remove this global modification
1102 compat_urllib_request.install_opener(opener)
1103 socket.setdefaulttimeout(timeout)