2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
49 UnavailableVideoError,
55 from .extractor import get_info_extractor, gen_extractors
56 from .downloader import get_suitable_downloader
57 from .version import __version__
60 class YoutubeDL(object):
63 YoutubeDL objects are the ones responsible of downloading the
64 actual video file and writing it to disk if the user has requested
65 it, among some other tasks. In most cases there should be one per
66 program. As, given a video URL, the downloader doesn't know how to
67 extract all the needed information, task that InfoExtractors do, it
68 has to pass the URL to one of them.
70 For this, YoutubeDL objects have a method that allows
71 InfoExtractors to be registered in a given order. When it is passed
72 a URL, the YoutubeDL object handles it to the first InfoExtractor it
73 finds that reports being able to handle it. The InfoExtractor extracts
74 all the information about the video or videos the URL refers to, and
75 YoutubeDL process the extracted information, possibly using a File
76 Downloader to download the video.
78 YoutubeDL objects accept a lot of parameters. In order not to saturate
79 the object constructor with arguments, it receives a dictionary of
80 options instead. These options are available through the params
81 attribute for the InfoExtractors to use. The YoutubeDL also
82 registers itself as the downloader in charge for the InfoExtractors
83 that are added to it, so this is a "mutual registration".
87 username: Username for authentication purposes.
88 password: Password for authentication purposes.
89 videopassword: Password for acces a video.
90 usenetrc: Use netrc for authentication instead.
91 verbose: Print additional info to stdout.
92 quiet: Do not print messages to stdout.
93 forceurl: Force printing final URL.
94 forcetitle: Force printing title.
95 forceid: Force printing ID.
96 forcethumbnail: Force printing thumbnail URL.
97 forcedescription: Force printing description.
98 forcefilename: Force printing final filename.
99 forceduration: Force printing duration.
100 forcejson: Force printing info_dict as JSON.
101 simulate: Do not download the video files.
102 format: Video format code.
103 format_limit: Highest quality format to try.
104 outtmpl: Template for output names.
105 restrictfilenames: Do not allow "&" and spaces in file names
106 ignoreerrors: Do not stop on download errors.
107 nooverwrites: Prevent overwriting files.
108 playliststart: Playlist item to start at.
109 playlistend: Playlist item to end at.
110 matchtitle: Download only matching titles.
111 rejecttitle: Reject downloads for matching titles.
112 logger: Log messages to a logging.Logger instance.
113 logtostderr: Log messages to stderr instead of stdout.
114 writedescription: Write the video description to a .description file
115 writeinfojson: Write the video description to a .info.json file
116 writeannotations: Write the video annotations to a .annotations.xml file
117 writethumbnail: Write the thumbnail image to a file
118 writesubtitles: Write the video subtitles to a file
119 writeautomaticsub: Write the automatic subtitles to a file
120 allsubtitles: Downloads all the subtitles of the video
121 (requires writesubtitles or writeautomaticsub)
122 listsubtitles: Lists all available subtitles for the video
123 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
124 subtitleslangs: List of languages of the subtitles to download
125 keepvideo: Keep the video file after post-processing
126 daterange: A DateRange object, download only if the upload_date is in the range.
127 skip_download: Skip the actual download of the video file
128 cachedir: Location of the cache files in the filesystem.
129 None to disable filesystem cache.
130 noplaylist: Download single video instead of a playlist if in doubt.
131 age_limit: An integer representing the user's age in years.
132 Unsuitable videos for the given age are skipped.
133 min_views: An integer representing the minimum view count the video
134 must have in order to not be skipped.
135 Videos without view count information are always
136 downloaded. None for no limit.
137 max_views: An integer representing the maximum view count.
138 Videos that are more popular than that are not
140 Videos without view count information are always
141 downloaded. None for no limit.
142 download_archive: File name of a file where all downloads are recorded.
143 Videos already present in the file are not downloaded
145 cookiefile: File name where cookies should be read from and dumped to.
146 nocheckcertificate:Do not verify SSL certificates
147 proxy: URL of the proxy server to use
148 socket_timeout: Time to wait for unresponsive hosts, in seconds
149 bidi_workaround: Work around buggy terminals without bidirectional text
150 support, using fridibi
151 debug_printtraffic:Print out sent and received HTTP traffic
153 The following parameters are not used by YoutubeDL itself, they are used by
155 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
156 noresizebuffer, retries, continuedl, noprogress, consoletitle
162 _download_retcode = None
163 _num_downloads = None
166 def __init__(self, params=None):
167 """Create a FileDownloader object with the given options."""
171 self._ies_instances = {}
173 self._progress_hooks = []
174 self._download_retcode = 0
175 self._num_downloads = 0
176 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
177 self._err_file = sys.stderr
180 if params.get('bidi_workaround', False):
183 master, slave = pty.openpty()
184 width = get_term_width()
188 width_args = ['-w', str(width)]
190 stdin=subprocess.PIPE,
192 stderr=self._err_file)
194 self._output_process = subprocess.Popen(
195 ['bidiv'] + width_args, **sp_kwargs
198 self._output_process = subprocess.Popen(
199 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
200 self._output_channel = os.fdopen(master, 'rb')
201 except OSError as ose:
203 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
207 if (sys.version_info >= (3,) and sys.platform != 'win32' and
208 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
209 and not params['restrictfilenames']):
210 # On Python 3, the Unicode filesystem API will throw errors (#1474)
212 u'Assuming --restrict-filenames since file system encoding '
213 u'cannot encode all charactes. '
214 u'Set the LC_ALL environment variable to fix this.')
215 self.params['restrictfilenames'] = True
217 if '%(stitle)s' in self.params.get('outtmpl', ''):
218 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
222 def add_info_extractor(self, ie):
223 """Add an InfoExtractor object to the end of the list."""
225 self._ies_instances[ie.ie_key()] = ie
226 ie.set_downloader(self)
228 def get_info_extractor(self, ie_key):
230 Get an instance of an IE with name ie_key, it will try to get one from
231 the _ies list, if there's no instance it will create a new one and add
232 it to the extractor list.
234 ie = self._ies_instances.get(ie_key)
236 ie = get_info_extractor(ie_key)()
237 self.add_info_extractor(ie)
240 def add_default_info_extractors(self):
242 Add the InfoExtractors returned by gen_extractors to the end of the list
244 for ie in gen_extractors():
245 self.add_info_extractor(ie)
247 def add_post_processor(self, pp):
248 """Add a PostProcessor object to the end of the chain."""
250 pp.set_downloader(self)
252 def add_progress_hook(self, ph):
253 """Add the progress hook (currently only for the file downloader)"""
254 self._progress_hooks.append(ph)
256 def _bidi_workaround(self, message):
257 if not hasattr(self, '_output_channel'):
260 assert hasattr(self, '_output_process')
261 assert type(message) == type(u'')
262 line_count = message.count(u'\n') + 1
263 self._output_process.stdin.write((message + u'\n').encode('utf-8'))
264 self._output_process.stdin.flush()
265 res = u''.join(self._output_channel.readline().decode('utf-8')
266 for _ in range(line_count))
267 return res[:-len(u'\n')]
269 def to_screen(self, message, skip_eol=False):
270 """Print message to stdout if not in quiet mode."""
271 return self.to_stdout(message, skip_eol, check_quiet=True)
273 def to_stdout(self, message, skip_eol=False, check_quiet=False):
274 """Print message to stdout if not in quiet mode."""
275 if self.params.get('logger'):
276 self.params['logger'].debug(message)
277 elif not check_quiet or not self.params.get('quiet', False):
278 message = self._bidi_workaround(message)
279 terminator = [u'\n', u''][skip_eol]
280 output = message + terminator
282 write_string(output, self._screen_file)
284 def to_stderr(self, message):
285 """Print message to stderr."""
286 assert type(message) == type(u'')
287 if self.params.get('logger'):
288 self.params['logger'].error(message)
290 message = self._bidi_workaround(message)
291 output = message + u'\n'
292 write_string(output, self._err_file)
294 def to_console_title(self, message):
295 if not self.params.get('consoletitle', False):
297 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
298 # c_wchar_p() might not be necessary if `message` is
299 # already of type unicode()
300 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
301 elif 'TERM' in os.environ:
302 write_string(u'\033]0;%s\007' % message, self._screen_file)
304 def save_console_title(self):
305 if not self.params.get('consoletitle', False):
307 if 'TERM' in os.environ:
308 # Save the title on stack
309 write_string(u'\033[22;0t', self._screen_file)
311 def restore_console_title(self):
312 if not self.params.get('consoletitle', False):
314 if 'TERM' in os.environ:
315 # Restore the title from stack
316 write_string(u'\033[23;0t', self._screen_file)
319 self.save_console_title()
322 def __exit__(self, *args):
323 self.restore_console_title()
325 if self.params.get('cookiefile') is not None:
326 self.cookiejar.save()
328 def trouble(self, message=None, tb=None):
329 """Determine action to take when a download problem appears.
331 Depending on if the downloader has been configured to ignore
332 download errors or not, this method may throw an exception or
333 not when errors are found, after printing the message.
335 tb, if given, is additional traceback information.
337 if message is not None:
338 self.to_stderr(message)
339 if self.params.get('verbose'):
341 if sys.exc_info()[0]: # if .trouble has been called from an except block
343 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
344 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
345 tb += compat_str(traceback.format_exc())
347 tb_data = traceback.format_list(traceback.extract_stack())
348 tb = u''.join(tb_data)
350 if not self.params.get('ignoreerrors', False):
351 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
352 exc_info = sys.exc_info()[1].exc_info
354 exc_info = sys.exc_info()
355 raise DownloadError(message, exc_info)
356 self._download_retcode = 1
358 def report_warning(self, message):
360 Print the message to stderr, it will be prefixed with 'WARNING:'
361 If stderr is a tty file the 'WARNING:' will be colored
363 if self._err_file.isatty() and os.name != 'nt':
364 _msg_header = u'\033[0;33mWARNING:\033[0m'
366 _msg_header = u'WARNING:'
367 warning_message = u'%s %s' % (_msg_header, message)
368 self.to_stderr(warning_message)
370 def report_error(self, message, tb=None):
372 Do the same as trouble, but prefixes the message with 'ERROR:', colored
373 in red if stderr is a tty file.
375 if self._err_file.isatty() and os.name != 'nt':
376 _msg_header = u'\033[0;31mERROR:\033[0m'
378 _msg_header = u'ERROR:'
379 error_message = u'%s %s' % (_msg_header, message)
380 self.trouble(error_message, tb)
382 def report_file_already_downloaded(self, file_name):
383 """Report file has already been fully downloaded."""
385 self.to_screen(u'[download] %s has already been downloaded' % file_name)
386 except UnicodeEncodeError:
387 self.to_screen(u'[download] The file has already been downloaded')
389 def increment_downloads(self):
390 """Increment the ordinal that assigns a number to each file."""
391 self._num_downloads += 1
393 def prepare_filename(self, info_dict):
394 """Generate the output filename."""
396 template_dict = dict(info_dict)
398 template_dict['epoch'] = int(time.time())
399 autonumber_size = self.params.get('autonumber_size')
400 if autonumber_size is None:
402 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
403 template_dict['autonumber'] = autonumber_templ % self._num_downloads
404 if template_dict.get('playlist_index') is not None:
405 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
407 sanitize = lambda k, v: sanitize_filename(
409 restricted=self.params.get('restrictfilenames'),
411 template_dict = dict((k, sanitize(k, v))
412 for k, v in template_dict.items()
414 template_dict = collections.defaultdict(lambda: u'NA', template_dict)
416 tmpl = os.path.expanduser(self.params['outtmpl'])
417 filename = tmpl % template_dict
419 except ValueError as err:
420 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
423 def _match_entry(self, info_dict):
424 """ Returns None iff the file should be downloaded """
426 video_title = info_dict.get('title', info_dict.get('id', u'video'))
427 if 'title' in info_dict:
428 # This can happen when we're just evaluating the playlist
429 title = info_dict['title']
430 matchtitle = self.params.get('matchtitle', False)
432 if not re.search(matchtitle, title, re.IGNORECASE):
433 return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
434 rejecttitle = self.params.get('rejecttitle', False)
436 if re.search(rejecttitle, title, re.IGNORECASE):
437 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
438 date = info_dict.get('upload_date', None)
440 dateRange = self.params.get('daterange', DateRange())
441 if date not in dateRange:
442 return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
443 view_count = info_dict.get('view_count', None)
444 if view_count is not None:
445 min_views = self.params.get('min_views')
446 if min_views is not None and view_count < min_views:
447 return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
448 max_views = self.params.get('max_views')
449 if max_views is not None and view_count > max_views:
450 return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
451 age_limit = self.params.get('age_limit')
452 if age_limit is not None:
453 if age_limit < info_dict.get('age_limit', 0):
454 return u'Skipping "' + title + '" because it is age restricted'
455 if self.in_download_archive(info_dict):
456 return u'%s has already been recorded in archive' % video_title
460 def add_extra_info(info_dict, extra_info):
461 '''Set the keys from extra_info in info dict if they are missing'''
462 for key, value in extra_info.items():
463 info_dict.setdefault(key, value)
465 def extract_info(self, url, download=True, ie_key=None, extra_info={},
468 Returns a list with a dictionary for each video we find.
469 If 'download', also downloads the videos.
470 extra_info is a dict containing the extra values to add to each result
474 ies = [self.get_info_extractor(ie_key)]
479 if not ie.suitable(url):
483 self.report_warning(u'The program functionality for this site has been marked as broken, '
484 u'and will probably not work.')
487 ie_result = ie.extract(url)
488 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
490 if isinstance(ie_result, list):
491 # Backwards compatibility: old IE result format
493 '_type': 'compat_list',
494 'entries': ie_result,
496 self.add_extra_info(ie_result,
498 'extractor': ie.IE_NAME,
500 'webpage_url_basename': url_basename(url),
501 'extractor_key': ie.ie_key(),
504 return self.process_ie_result(ie_result, download, extra_info)
507 except ExtractorError as de: # An error we somewhat expected
508 self.report_error(compat_str(de), de.format_traceback())
510 except Exception as e:
511 if self.params.get('ignoreerrors', False):
512 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
517 self.report_error(u'no suitable InfoExtractor: %s' % url)
519 def process_ie_result(self, ie_result, download=True, extra_info={}):
521 Take the result of the ie(may be modified) and resolve all unresolved
522 references (URLs, playlist items).
524 It will also download the videos if 'download'.
525 Returns the resolved ie_result.
528 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
529 if result_type == 'video':
530 self.add_extra_info(ie_result, extra_info)
531 return self.process_video_result(ie_result, download=download)
532 elif result_type == 'url':
533 # We have to add extra_info to the results because it may be
534 # contained in a playlist
535 return self.extract_info(ie_result['url'],
537 ie_key=ie_result.get('ie_key'),
538 extra_info=extra_info)
539 elif result_type == 'url_transparent':
540 # Use the information from the embedding page
541 info = self.extract_info(
542 ie_result['url'], ie_key=ie_result.get('ie_key'),
543 extra_info=extra_info, download=False, process=False)
545 def make_result(embedded_info):
546 new_result = ie_result.copy()
547 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
548 'entries', 'ie_key', 'duration',
549 'subtitles', 'annotations', 'format',
550 'thumbnail', 'thumbnails'):
553 if f in embedded_info:
554 new_result[f] = embedded_info[f]
556 new_result = make_result(info)
558 assert new_result.get('_type') != 'url_transparent'
559 if new_result.get('_type') == 'compat_list':
560 new_result['entries'] = [
561 make_result(e) for e in new_result['entries']]
563 return self.process_ie_result(
564 new_result, download=download, extra_info=extra_info)
565 elif result_type == 'playlist':
566 # We process each entry in the playlist
567 playlist = ie_result.get('title', None) or ie_result.get('id', None)
568 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
570 playlist_results = []
572 n_all_entries = len(ie_result['entries'])
573 playliststart = self.params.get('playliststart', 1) - 1
574 playlistend = self.params.get('playlistend', None)
575 # For backwards compatibility, interpret -1 as whole list
576 if playlistend == -1:
579 entries = ie_result['entries'][playliststart:playlistend]
580 n_entries = len(entries)
583 u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
584 (ie_result['extractor'], playlist, n_all_entries, n_entries))
586 for i, entry in enumerate(entries, 1):
587 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
589 'playlist': playlist,
590 'playlist_index': i + playliststart,
591 'extractor': ie_result['extractor'],
592 'webpage_url': ie_result['webpage_url'],
593 'webpage_url_basename': url_basename(ie_result['webpage_url']),
594 'extractor_key': ie_result['extractor_key'],
597 reason = self._match_entry(entry)
598 if reason is not None:
599 self.to_screen(u'[download] ' + reason)
602 entry_result = self.process_ie_result(entry,
605 playlist_results.append(entry_result)
606 ie_result['entries'] = playlist_results
608 elif result_type == 'compat_list':
610 self.add_extra_info(r,
612 'extractor': ie_result['extractor'],
613 'webpage_url': ie_result['webpage_url'],
614 'webpage_url_basename': url_basename(ie_result['webpage_url']),
615 'extractor_key': ie_result['extractor_key'],
618 ie_result['entries'] = [
619 self.process_ie_result(_fixup(r), download, extra_info)
620 for r in ie_result['entries']
624 raise Exception('Invalid result type: %s' % result_type)
626 def select_format(self, format_spec, available_formats):
627 if format_spec == 'best' or format_spec is None:
628 return available_formats[-1]
629 elif format_spec == 'worst':
630 return available_formats[0]
632 extensions = [u'mp4', u'flv', u'webm', u'3gp']
633 if format_spec in extensions:
634 filter_f = lambda f: f['ext'] == format_spec
636 filter_f = lambda f: f['format_id'] == format_spec
637 matches = list(filter(filter_f, available_formats))
642 def process_video_result(self, info_dict, download=True):
643 assert info_dict.get('_type', 'video') == 'video'
645 if 'playlist' not in info_dict:
646 # It isn't part of a playlist
647 info_dict['playlist'] = None
648 info_dict['playlist_index'] = None
650 # This extractors handle format selection themselves
651 if info_dict['extractor'] in [u'Youku']:
653 self.process_info(info_dict)
656 # We now pick which formats have to be downloaded
657 if info_dict.get('formats') is None:
658 # There's only one format available
659 formats = [info_dict]
661 formats = info_dict['formats']
663 # We check that all the formats have the format and format_id fields
664 for (i, format) in enumerate(formats):
665 if format.get('format_id') is None:
666 format['format_id'] = compat_str(i)
667 if format.get('format') is None:
668 format['format'] = u'{id} - {res}{note}'.format(
669 id=format['format_id'],
670 res=self.format_resolution(format),
671 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
673 # Automatically determine file extension if missing
674 if 'ext' not in format:
675 format['ext'] = determine_ext(format['url'])
677 format_limit = self.params.get('format_limit', None)
679 formats = list(takewhile_inclusive(
680 lambda f: f['format_id'] != format_limit, formats
683 # TODO Central sorting goes here
685 if formats[0] is not info_dict:
686 # only set the 'formats' fields if the original info_dict list them
687 # otherwise we end up with a circular reference, the first (and unique)
688 # element in the 'formats' field in info_dict is info_dict itself,
689 # wich can't be exported to json
690 info_dict['formats'] = formats
691 if self.params.get('listformats', None):
692 self.list_formats(info_dict)
695 req_format = self.params.get('format', 'best')
696 if req_format is None:
698 formats_to_download = []
699 # The -1 is for supporting YoutubeIE
700 if req_format in ('-1', 'all'):
701 formats_to_download = formats
703 # We can accept formats requestd in the format: 34/5/best, we pick
704 # the first that is available, starting from left
705 req_formats = req_format.split('/')
706 for rf in req_formats:
707 selected_format = self.select_format(rf, formats)
708 if selected_format is not None:
709 formats_to_download = [selected_format]
711 if not formats_to_download:
712 raise ExtractorError(u'requested format not available',
716 if len(formats_to_download) > 1:
717 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
718 for format in formats_to_download:
719 new_info = dict(info_dict)
720 new_info.update(format)
721 self.process_info(new_info)
722 # We update the info dict with the best quality format (backwards compatibility)
723 info_dict.update(formats_to_download[-1])
726 def process_info(self, info_dict):
727 """Process a single resolved IE result."""
729 assert info_dict.get('_type', 'video') == 'video'
730 #We increment the download the download count here to match the previous behaviour.
731 self.increment_downloads()
733 info_dict['fulltitle'] = info_dict['title']
734 if len(info_dict['title']) > 200:
735 info_dict['title'] = info_dict['title'][:197] + u'...'
737 # Keep for backwards compatibility
738 info_dict['stitle'] = info_dict['title']
740 if not 'format' in info_dict:
741 info_dict['format'] = info_dict['ext']
743 reason = self._match_entry(info_dict)
744 if reason is not None:
745 self.to_screen(u'[download] ' + reason)
748 max_downloads = self.params.get('max_downloads')
749 if max_downloads is not None:
750 if self._num_downloads > int(max_downloads):
751 raise MaxDownloadsReached()
753 filename = self.prepare_filename(info_dict)
756 if self.params.get('forcetitle', False):
757 self.to_stdout(info_dict['fulltitle'])
758 if self.params.get('forceid', False):
759 self.to_stdout(info_dict['id'])
760 if self.params.get('forceurl', False):
761 # For RTMP URLs, also include the playpath
762 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
763 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
764 self.to_stdout(info_dict['thumbnail'])
765 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
766 self.to_stdout(info_dict['description'])
767 if self.params.get('forcefilename', False) and filename is not None:
768 self.to_stdout(filename)
769 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
770 self.to_stdout(formatSeconds(info_dict['duration']))
771 if self.params.get('forceformat', False):
772 self.to_stdout(info_dict['format'])
773 if self.params.get('forcejson', False):
774 info_dict['_filename'] = filename
775 self.to_stdout(json.dumps(info_dict))
777 # Do nothing else if in simulate mode
778 if self.params.get('simulate', False):
785 dn = os.path.dirname(encodeFilename(filename))
786 if dn != '' and not os.path.exists(dn):
788 except (OSError, IOError) as err:
789 self.report_error(u'unable to create directory ' + compat_str(err))
792 if self.params.get('writedescription', False):
793 descfn = filename + u'.description'
794 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
795 self.to_screen(u'[info] Video description is already present')
798 self.to_screen(u'[info] Writing video description to: ' + descfn)
799 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
800 descfile.write(info_dict['description'])
801 except (KeyError, TypeError):
802 self.report_warning(u'There\'s no description to write.')
803 except (OSError, IOError):
804 self.report_error(u'Cannot write description file ' + descfn)
807 if self.params.get('writeannotations', False):
808 annofn = filename + u'.annotations.xml'
809 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
810 self.to_screen(u'[info] Video annotations are already present')
813 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
814 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
815 annofile.write(info_dict['annotations'])
816 except (KeyError, TypeError):
817 self.report_warning(u'There are no annotations to write.')
818 except (OSError, IOError):
819 self.report_error(u'Cannot write annotations file: ' + annofn)
822 subtitles_are_requested = any([self.params.get('writesubtitles', False),
823 self.params.get('writeautomaticsub')])
825 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
826 # subtitles download errors are already managed as troubles in relevant IE
827 # that way it will silently go on when used with unsupporting IE
828 subtitles = info_dict['subtitles']
829 sub_format = self.params.get('subtitlesformat', 'srt')
830 for sub_lang in subtitles.keys():
831 sub = subtitles[sub_lang]
835 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
836 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
837 self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
839 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
840 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
842 except (OSError, IOError):
843 self.report_error(u'Cannot write subtitles file ' + descfn)
846 if self.params.get('writeinfojson', False):
847 infofn = os.path.splitext(filename)[0] + u'.info.json'
848 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
849 self.to_screen(u'[info] Video description metadata is already present')
851 self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
853 write_json_file(info_dict, encodeFilename(infofn))
854 except (OSError, IOError):
855 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
858 if self.params.get('writethumbnail', False):
859 if info_dict.get('thumbnail') is not None:
860 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
861 thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
862 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
863 self.to_screen(u'[%s] %s: Thumbnail is already present' %
864 (info_dict['extractor'], info_dict['id']))
866 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
867 (info_dict['extractor'], info_dict['id']))
869 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
870 with open(thumb_filename, 'wb') as thumbf:
871 shutil.copyfileobj(uf, thumbf)
872 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
873 (info_dict['extractor'], info_dict['id'], thumb_filename))
874 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
875 self.report_warning(u'Unable to download thumbnail "%s": %s' %
876 (info_dict['thumbnail'], compat_str(err)))
878 if not self.params.get('skip_download', False):
879 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
883 fd = get_suitable_downloader(info_dict)(self, self.params)
884 for ph in self._progress_hooks:
885 fd.add_progress_hook(ph)
886 success = fd.download(filename, info_dict)
887 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
888 self.report_error(u'unable to download video data: %s' % str(err))
890 except (OSError, IOError) as err:
891 raise UnavailableVideoError(err)
892 except (ContentTooShortError, ) as err:
893 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
898 self.post_process(filename, info_dict)
899 except (PostProcessingError) as err:
900 self.report_error(u'postprocessing: %s' % str(err))
903 self.record_download_archive(info_dict)
905 def download(self, url_list):
906 """Download a given list of URLs."""
907 if (len(url_list) > 1 and
908 '%' not in self.params['outtmpl']
909 and self.params.get('max_downloads') != 1):
910 raise SameFileError(self.params['outtmpl'])
914 #It also downloads the videos
915 self.extract_info(url)
916 except UnavailableVideoError:
917 self.report_error(u'unable to download video')
918 except MaxDownloadsReached:
919 self.to_screen(u'[info] Maximum number of downloaded files reached.')
922 return self._download_retcode
924 def download_with_info_file(self, info_filename):
925 with io.open(info_filename, 'r', encoding='utf-8') as f:
928 self.process_ie_result(info, download=True)
929 except DownloadError:
930 webpage_url = info.get('webpage_url')
931 if webpage_url is not None:
932 self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
933 return self.download([webpage_url])
936 return self._download_retcode
938 def post_process(self, filename, ie_info):
939 """Run all the postprocessors on the given file."""
941 info['filepath'] = filename
945 keep_video_wish, new_info = pp.run(info)
946 if keep_video_wish is not None:
948 keep_video = keep_video_wish
949 elif keep_video is None:
950 # No clear decision yet, let IE decide
951 keep_video = keep_video_wish
952 except PostProcessingError as e:
953 self.report_error(e.msg)
954 if keep_video is False and not self.params.get('keepvideo', False):
956 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
957 os.remove(encodeFilename(filename))
958 except (IOError, OSError):
959 self.report_warning(u'Unable to remove downloaded video file')
961 def _make_archive_id(self, info_dict):
962 # Future-proof against any change in case
963 # and backwards compatibility with prior versions
964 extractor = info_dict.get('extractor_key')
965 if extractor is None:
966 if 'id' in info_dict:
967 extractor = info_dict.get('ie_key') # key in a playlist
968 if extractor is None:
969 return None # Incomplete video information
970 return extractor.lower() + u' ' + info_dict['id']
972 def in_download_archive(self, info_dict):
973 fn = self.params.get('download_archive')
977 vid_id = self._make_archive_id(info_dict)
979 return False # Incomplete video information
982 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
983 for line in archive_file:
984 if line.strip() == vid_id:
986 except IOError as ioe:
987 if ioe.errno != errno.ENOENT:
991 def record_download_archive(self, info_dict):
992 fn = self.params.get('download_archive')
995 vid_id = self._make_archive_id(info_dict)
997 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
998 archive_file.write(vid_id + u'\n')
1001 def format_resolution(format, default='unknown'):
1002 if format.get('vcodec') == 'none':
1004 if format.get('resolution') is not None:
1005 return format['resolution']
1006 if format.get('height') is not None:
1007 if format.get('width') is not None:
1008 res = u'%sx%s' % (format['width'], format['height'])
1010 res = u'%sp' % format['height']
1011 elif format.get('width') is not None:
1012 res = u'?x%d' % format['width']
1017 def list_formats(self, info_dict):
1018 def format_note(fdict):
1020 if f.get('ext') in ['f4f', 'f4m']:
1021 res += u'(unsupported) '
1022 if fdict.get('format_note') is not None:
1023 res += fdict['format_note'] + u' '
1024 if fdict.get('tbr') is not None:
1025 res += u'%4dk ' % fdict['tbr']
1026 if (fdict.get('vcodec') is not None and
1027 fdict.get('vcodec') != 'none'):
1028 res += u'%-5s@' % fdict['vcodec']
1029 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1031 if fdict.get('vbr') is not None:
1032 res += u'%4dk' % fdict['vbr']
1033 if fdict.get('acodec') is not None:
1036 res += u'%-5s' % fdict['acodec']
1037 elif fdict.get('abr') is not None:
1041 if fdict.get('abr') is not None:
1042 res += u'@%3dk' % fdict['abr']
1043 if fdict.get('filesize') is not None:
1046 res += format_bytes(fdict['filesize'])
1049 def line(format, idlen=20):
1050 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
1051 format['format_id'],
1053 self.format_resolution(format),
1054 format_note(format),
1057 formats = info_dict.get('formats', [info_dict])
1058 idlen = max(len(u'format code'),
1059 max(len(f['format_id']) for f in formats))
1060 formats_s = [line(f, idlen) for f in formats]
1061 if len(formats) > 1:
1062 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1063 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1065 header_line = line({
1066 'format_id': u'format code', 'ext': u'extension',
1067 'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1068 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1069 (info_dict['id'], header_line, u"\n".join(formats_s)))
1071 def urlopen(self, req):
1072 """ Start an HTTP download """
1073 return self._opener.open(req)
1075 def print_debug_header(self):
1076 if not self.params.get('verbose'):
1078 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1080 sp = subprocess.Popen(
1081 ['git', 'rev-parse', '--short', 'HEAD'],
1082 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1083 cwd=os.path.dirname(os.path.abspath(__file__)))
1084 out, err = sp.communicate()
1085 out = out.decode().strip()
1086 if re.match('[0-9a-f]+', out):
1087 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1093 write_string(u'[debug] Python version %s - %s' %
1094 (platform.python_version(), platform_name()) + u'\n')
1097 for handler in self._opener.handlers:
1098 if hasattr(handler, 'proxies'):
1099 proxy_map.update(handler.proxies)
1100 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1102 def _setup_opener(self):
1103 timeout_val = self.params.get('socket_timeout')
1104 timeout = 600 if timeout_val is None else float(timeout_val)
1106 opts_cookiefile = self.params.get('cookiefile')
1107 opts_proxy = self.params.get('proxy')
1109 if opts_cookiefile is None:
1110 self.cookiejar = compat_cookiejar.CookieJar()
1112 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1114 if os.access(opts_cookiefile, os.R_OK):
1115 self.cookiejar.load()
1117 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1119 if opts_proxy is not None:
1120 if opts_proxy == '':
1123 proxies = {'http': opts_proxy, 'https': opts_proxy}
1125 proxies = compat_urllib_request.getproxies()
1126 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1127 if 'http' in proxies and 'https' not in proxies:
1128 proxies['https'] = proxies['http']
1129 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1131 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1132 https_handler = make_HTTPS_handler(
1133 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1134 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1135 opener = compat_urllib_request.build_opener(
1136 https_handler, proxy_handler, cookie_processor, ydlh)
1137 # Delete the default user-agent header, which would otherwise apply in
1138 # cases where our custom HTTP handler doesn't come into play
1139 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1140 opener.addheaders = []
1141 self._opener = opener
1143 # TODO remove this global modification
1144 compat_urllib_request.install_opener(opener)
1145 socket.setdefaulttimeout(timeout)