2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
47 UnavailableVideoError,
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
57 class YoutubeDL(object):
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 downloadarchive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
136 The following parameters are not used by YoutubeDL itself, they are used by
138 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
139 noresizebuffer, retries, continuedl, noprogress, consoletitle
145 _download_retcode = None
146 _num_downloads = None
149 def __init__(self, params={}):
150 """Create a FileDownloader object with the given options."""
152 self._ies_instances = {}
154 self._progress_hooks = []
155 self._download_retcode = 0
156 self._num_downloads = 0
157 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
159 if (sys.version_info >= (3,) and sys.platform != 'win32' and
160 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
161 and not params['restrictfilenames']):
162 # On Python 3, the Unicode filesystem API will throw errors (#1474)
164 u'Assuming --restrict-filenames since file system encoding '
165 u'cannot encode all charactes. '
166 u'Set the LC_ALL environment variable to fix this.')
167 params['restrictfilenames'] = True
170 self.fd = FileDownloader(self, self.params)
172 if '%(stitle)s' in self.params.get('outtmpl', ''):
173 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
177 def add_info_extractor(self, ie):
178 """Add an InfoExtractor object to the end of the list."""
180 self._ies_instances[ie.ie_key()] = ie
181 ie.set_downloader(self)
183 def get_info_extractor(self, ie_key):
185 Get an instance of an IE with name ie_key, it will try to get one from
186 the _ies list, if there's no instance it will create a new one and add
187 it to the extractor list.
189 ie = self._ies_instances.get(ie_key)
191 ie = get_info_extractor(ie_key)()
192 self.add_info_extractor(ie)
195 def add_default_info_extractors(self):
197 Add the InfoExtractors returned by gen_extractors to the end of the list
199 for ie in gen_extractors():
200 self.add_info_extractor(ie)
202 def add_post_processor(self, pp):
203 """Add a PostProcessor object to the end of the chain."""
205 pp.set_downloader(self)
207 def to_screen(self, message, skip_eol=False):
208 """Print message to stdout if not in quiet mode."""
209 if self.params.get('logger'):
210 self.params['logger'].debug(message)
211 elif not self.params.get('quiet', False):
212 terminator = [u'\n', u''][skip_eol]
213 output = message + terminator
214 write_string(output, self._screen_file)
216 def to_stderr(self, message):
217 """Print message to stderr."""
218 assert type(message) == type(u'')
219 if self.params.get('logger'):
220 self.params['logger'].error(message)
222 output = message + u'\n'
223 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
224 output = output.encode(preferredencoding())
225 sys.stderr.write(output)
227 def to_console_title(self, message):
228 if not self.params.get('consoletitle', False):
230 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
231 # c_wchar_p() might not be necessary if `message` is
232 # already of type unicode()
233 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
234 elif 'TERM' in os.environ:
235 write_string(u'\033]0;%s\007' % message, self._screen_file)
237 def save_console_title(self):
238 if not self.params.get('consoletitle', False):
240 if 'TERM' in os.environ:
241 # Save the title on stack
242 write_string(u'\033[22;0t', self._screen_file)
244 def restore_console_title(self):
245 if not self.params.get('consoletitle', False):
247 if 'TERM' in os.environ:
248 # Restore the title from stack
249 write_string(u'\033[23;0t', self._screen_file)
252 self.save_console_title()
255 def __exit__(self, *args):
256 self.restore_console_title()
258 if self.params.get('cookiefile') is not None:
259 self.cookiejar.save()
261 def fixed_template(self):
262 """Checks if the output template is fixed."""
263 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
265 def trouble(self, message=None, tb=None):
266 """Determine action to take when a download problem appears.
268 Depending on if the downloader has been configured to ignore
269 download errors or not, this method may throw an exception or
270 not when errors are found, after printing the message.
272 tb, if given, is additional traceback information.
274 if message is not None:
275 self.to_stderr(message)
276 if self.params.get('verbose'):
278 if sys.exc_info()[0]: # if .trouble has been called from an except block
280 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
281 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
282 tb += compat_str(traceback.format_exc())
284 tb_data = traceback.format_list(traceback.extract_stack())
285 tb = u''.join(tb_data)
287 if not self.params.get('ignoreerrors', False):
288 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
289 exc_info = sys.exc_info()[1].exc_info
291 exc_info = sys.exc_info()
292 raise DownloadError(message, exc_info)
293 self._download_retcode = 1
295 def report_warning(self, message):
297 Print the message to stderr, it will be prefixed with 'WARNING:'
298 If stderr is a tty file the 'WARNING:' will be colored
300 if sys.stderr.isatty() and os.name != 'nt':
301 _msg_header = u'\033[0;33mWARNING:\033[0m'
303 _msg_header = u'WARNING:'
304 warning_message = u'%s %s' % (_msg_header, message)
305 self.to_stderr(warning_message)
307 def report_error(self, message, tb=None):
309 Do the same as trouble, but prefixes the message with 'ERROR:', colored
310 in red if stderr is a tty file.
312 if sys.stderr.isatty() and os.name != 'nt':
313 _msg_header = u'\033[0;31mERROR:\033[0m'
315 _msg_header = u'ERROR:'
316 error_message = u'%s %s' % (_msg_header, message)
317 self.trouble(error_message, tb)
319 def report_writedescription(self, descfn):
320 """ Report that the description file is being written """
321 self.to_screen(u'[info] Writing video description to: ' + descfn)
323 def report_writesubtitles(self, sub_filename):
324 """ Report that the subtitles file is being written """
325 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
327 def report_writeinfojson(self, infofn):
328 """ Report that the metadata file has been written """
329 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
331 def report_writeannotations(self, annofn):
332 """ Report that the annotations file has been written. """
333 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
335 def report_file_already_downloaded(self, file_name):
336 """Report file has already been fully downloaded."""
338 self.to_screen(u'[download] %s has already been downloaded' % file_name)
339 except UnicodeEncodeError:
340 self.to_screen(u'[download] The file has already been downloaded')
342 def increment_downloads(self):
343 """Increment the ordinal that assigns a number to each file."""
344 self._num_downloads += 1
346 def prepare_filename(self, info_dict):
347 """Generate the output filename."""
349 template_dict = dict(info_dict)
351 template_dict['epoch'] = int(time.time())
352 autonumber_size = self.params.get('autonumber_size')
353 if autonumber_size is None:
355 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
356 template_dict['autonumber'] = autonumber_templ % self._num_downloads
357 if template_dict.get('playlist_index') is not None:
358 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
360 sanitize = lambda k, v: sanitize_filename(
361 u'NA' if v is None else compat_str(v),
362 restricted=self.params.get('restrictfilenames'),
364 template_dict = dict((k, sanitize(k, v))
365 for k, v in template_dict.items())
367 tmpl = os.path.expanduser(self.params['outtmpl'])
368 filename = tmpl % template_dict
370 except KeyError as err:
371 self.report_error(u'Erroneous output template')
373 except ValueError as err:
374 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
377 def _match_entry(self, info_dict):
378 """ Returns None iff the file should be downloaded """
380 if 'title' in info_dict:
381 # This can happen when we're just evaluating the playlist
382 title = info_dict['title']
383 matchtitle = self.params.get('matchtitle', False)
385 if not re.search(matchtitle, title, re.IGNORECASE):
386 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
387 rejecttitle = self.params.get('rejecttitle', False)
389 if re.search(rejecttitle, title, re.IGNORECASE):
390 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
391 date = info_dict.get('upload_date', None)
393 dateRange = self.params.get('daterange', DateRange())
394 if date not in dateRange:
395 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
396 age_limit = self.params.get('age_limit')
397 if age_limit is not None:
398 if age_limit < info_dict.get('age_limit', 0):
399 return u'Skipping "' + title + '" because it is age restricted'
400 if self.in_download_archive(info_dict):
401 return (u'%s has already been recorded in archive'
402 % info_dict.get('title', info_dict.get('id', u'video')))
406 def add_extra_info(info_dict, extra_info):
407 '''Set the keys from extra_info in info dict if they are missing'''
408 for key, value in extra_info.items():
409 info_dict.setdefault(key, value)
411 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
413 Returns a list with a dictionary for each video we find.
414 If 'download', also downloads the videos.
415 extra_info is a dict containing the extra values to add to each result
419 ies = [self.get_info_extractor(ie_key)]
424 if not ie.suitable(url):
428 self.report_warning(u'The program functionality for this site has been marked as broken, '
429 u'and will probably not work.')
432 ie_result = ie.extract(url)
433 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
435 if isinstance(ie_result, list):
436 # Backwards compatibility: old IE result format
438 '_type': 'compat_list',
439 'entries': ie_result,
441 self.add_extra_info(ie_result,
443 'extractor': ie.IE_NAME,
445 'extractor_key': ie.ie_key(),
447 return self.process_ie_result(ie_result, download, extra_info)
448 except ExtractorError as de: # An error we somewhat expected
449 self.report_error(compat_str(de), de.format_traceback())
451 except Exception as e:
452 if self.params.get('ignoreerrors', False):
453 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
458 self.report_error(u'no suitable InfoExtractor: %s' % url)
460 def process_ie_result(self, ie_result, download=True, extra_info={}):
462 Take the result of the ie(may be modified) and resolve all unresolved
463 references (URLs, playlist items).
465 It will also download the videos if 'download'.
466 Returns the resolved ie_result.
469 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
470 if result_type == 'video':
471 self.add_extra_info(ie_result, extra_info)
472 return self.process_video_result(ie_result, download=download)
473 elif result_type == 'url':
474 # We have to add extra_info to the results because it may be
475 # contained in a playlist
476 return self.extract_info(ie_result['url'],
478 ie_key=ie_result.get('ie_key'),
479 extra_info=extra_info)
480 elif result_type == 'playlist':
482 # We process each entry in the playlist
483 playlist = ie_result.get('title', None) or ie_result.get('id', None)
484 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
486 playlist_results = []
488 n_all_entries = len(ie_result['entries'])
489 playliststart = self.params.get('playliststart', 1) - 1
490 playlistend = self.params.get('playlistend', -1)
492 if playlistend == -1:
493 entries = ie_result['entries'][playliststart:]
495 entries = ie_result['entries'][playliststart:playlistend]
497 n_entries = len(entries)
499 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
500 (ie_result['extractor'], playlist, n_all_entries, n_entries))
502 for i, entry in enumerate(entries, 1):
503 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
505 'playlist': playlist,
506 'playlist_index': i + playliststart,
507 'extractor': ie_result['extractor'],
508 'webpage_url': ie_result['webpage_url'],
509 'extractor_key': ie_result['extractor_key'],
512 reason = self._match_entry(entry)
513 if reason is not None:
514 self.to_screen(u'[download] ' + reason)
517 entry_result = self.process_ie_result(entry,
520 playlist_results.append(entry_result)
521 ie_result['entries'] = playlist_results
523 elif result_type == 'compat_list':
525 self.add_extra_info(r,
527 'extractor': ie_result['extractor'],
528 'webpage_url': ie_result['webpage_url'],
529 'extractor_key': ie_result['extractor_key'],
532 ie_result['entries'] = [
533 self.process_ie_result(_fixup(r), download, extra_info)
534 for r in ie_result['entries']
538 raise Exception('Invalid result type: %s' % result_type)
540 def select_format(self, format_spec, available_formats):
541 if format_spec == 'best' or format_spec is None:
542 return available_formats[-1]
543 elif format_spec == 'worst':
544 return available_formats[0]
546 extensions = [u'mp4', u'flv', u'webm', u'3gp']
547 if format_spec in extensions:
548 filter_f = lambda f: f['ext'] == format_spec
550 filter_f = lambda f: f['format_id'] == format_spec
551 matches = list(filter(filter_f, available_formats))
556 def process_video_result(self, info_dict, download=True):
557 assert info_dict.get('_type', 'video') == 'video'
559 if 'playlist' not in info_dict:
560 # It isn't part of a playlist
561 info_dict['playlist'] = None
562 info_dict['playlist_index'] = None
564 # This extractors handle format selection themselves
565 if info_dict['extractor'] in [u'youtube', u'Youku']:
567 self.process_info(info_dict)
570 # We now pick which formats have to be downloaded
571 if info_dict.get('formats') is None:
572 # There's only one format available
573 formats = [info_dict]
575 formats = info_dict['formats']
577 # We check that all the formats have the format and format_id fields
578 for (i, format) in enumerate(formats):
579 if format.get('format_id') is None:
580 format['format_id'] = compat_str(i)
581 if format.get('format') is None:
582 format['format'] = u'{id} - {res}{note}'.format(
583 id=format['format_id'],
584 res=self.format_resolution(format),
585 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
587 # Automatically determine file extension if missing
588 if 'ext' not in format:
589 format['ext'] = determine_ext(format['url'])
591 if self.params.get('listformats', None):
592 self.list_formats(info_dict)
595 format_limit = self.params.get('format_limit', None)
597 formats = list(takewhile_inclusive(
598 lambda f: f['format_id'] != format_limit, formats
600 if self.params.get('prefer_free_formats'):
601 def _free_formats_key(f):
603 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
606 # We only compare the extension if they have the same height and width
607 return (f.get('height'), f.get('width'), ext_ord)
608 formats = sorted(formats, key=_free_formats_key)
610 req_format = self.params.get('format', 'best')
611 if req_format is None:
613 formats_to_download = []
614 # The -1 is for supporting YoutubeIE
615 if req_format in ('-1', 'all'):
616 formats_to_download = formats
618 # We can accept formats requestd in the format: 34/5/best, we pick
619 # the first that is available, starting from left
620 req_formats = req_format.split('/')
621 for rf in req_formats:
622 selected_format = self.select_format(rf, formats)
623 if selected_format is not None:
624 formats_to_download = [selected_format]
626 if not formats_to_download:
627 raise ExtractorError(u'requested format not available',
631 if len(formats_to_download) > 1:
632 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
633 for format in formats_to_download:
634 new_info = dict(info_dict)
635 new_info.update(format)
636 self.process_info(new_info)
637 # We update the info dict with the best quality format (backwards compatibility)
638 info_dict.update(formats_to_download[-1])
641 def process_info(self, info_dict):
642 """Process a single resolved IE result."""
644 assert info_dict.get('_type', 'video') == 'video'
645 #We increment the download the download count here to match the previous behaviour.
646 self.increment_downloads()
648 info_dict['fulltitle'] = info_dict['title']
649 if len(info_dict['title']) > 200:
650 info_dict['title'] = info_dict['title'][:197] + u'...'
652 # Keep for backwards compatibility
653 info_dict['stitle'] = info_dict['title']
655 if not 'format' in info_dict:
656 info_dict['format'] = info_dict['ext']
658 reason = self._match_entry(info_dict)
659 if reason is not None:
660 self.to_screen(u'[download] ' + reason)
663 max_downloads = self.params.get('max_downloads')
664 if max_downloads is not None:
665 if self._num_downloads > int(max_downloads):
666 raise MaxDownloadsReached()
668 filename = self.prepare_filename(info_dict)
671 if self.params.get('forcetitle', False):
672 compat_print(info_dict['fulltitle'])
673 if self.params.get('forceid', False):
674 compat_print(info_dict['id'])
675 if self.params.get('forceurl', False):
676 # For RTMP URLs, also include the playpath
677 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
678 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
679 compat_print(info_dict['thumbnail'])
680 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
681 compat_print(info_dict['description'])
682 if self.params.get('forcefilename', False) and filename is not None:
683 compat_print(filename)
684 if self.params.get('forceformat', False):
685 compat_print(info_dict['format'])
686 if self.params.get('forcejson', False):
687 compat_print(json.dumps(info_dict))
689 # Do nothing else if in simulate mode
690 if self.params.get('simulate', False):
697 dn = os.path.dirname(encodeFilename(filename))
698 if dn != '' and not os.path.exists(dn):
700 except (OSError, IOError) as err:
701 self.report_error(u'unable to create directory ' + compat_str(err))
704 if self.params.get('writedescription', False):
706 descfn = filename + u'.description'
707 self.report_writedescription(descfn)
708 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
709 descfile.write(info_dict['description'])
710 except (KeyError, TypeError):
711 self.report_warning(u'There\'s no description to write.')
712 except (OSError, IOError):
713 self.report_error(u'Cannot write description file ' + descfn)
716 if self.params.get('writeannotations', False):
718 annofn = filename + u'.annotations.xml'
719 self.report_writeannotations(annofn)
720 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
721 annofile.write(info_dict['annotations'])
722 except (KeyError, TypeError):
723 self.report_warning(u'There are no annotations to write.')
724 except (OSError, IOError):
725 self.report_error(u'Cannot write annotations file: ' + annofn)
728 subtitles_are_requested = any([self.params.get('writesubtitles', False),
729 self.params.get('writeautomaticsub')])
731 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
732 # subtitles download errors are already managed as troubles in relevant IE
733 # that way it will silently go on when used with unsupporting IE
734 subtitles = info_dict['subtitles']
735 sub_format = self.params.get('subtitlesformat', 'srt')
736 for sub_lang in subtitles.keys():
737 sub = subtitles[sub_lang]
741 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
742 self.report_writesubtitles(sub_filename)
743 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
745 except (OSError, IOError):
746 self.report_error(u'Cannot write subtitles file ' + descfn)
749 if self.params.get('writeinfojson', False):
750 infofn = os.path.splitext(filename)[0] + u'.info.json'
751 self.report_writeinfojson(infofn)
753 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
754 write_json_file(json_info_dict, encodeFilename(infofn))
755 except (OSError, IOError):
756 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
759 if self.params.get('writethumbnail', False):
760 if info_dict.get('thumbnail') is not None:
761 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
762 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
763 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
764 (info_dict['extractor'], info_dict['id']))
766 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
767 with open(thumb_filename, 'wb') as thumbf:
768 shutil.copyfileobj(uf, thumbf)
769 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
770 (info_dict['extractor'], info_dict['id'], thumb_filename))
771 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
772 self.report_warning(u'Unable to download thumbnail "%s": %s' %
773 (info_dict['thumbnail'], compat_str(err)))
775 if not self.params.get('skip_download', False):
776 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
780 success = self.fd._do_download(filename, info_dict)
781 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
782 self.report_error(u'unable to download video data: %s' % str(err))
784 except (OSError, IOError) as err:
785 raise UnavailableVideoError(err)
786 except (ContentTooShortError, ) as err:
787 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
792 self.post_process(filename, info_dict)
793 except (PostProcessingError) as err:
794 self.report_error(u'postprocessing: %s' % str(err))
797 self.record_download_archive(info_dict)
799 def download(self, url_list):
800 """Download a given list of URLs."""
801 if len(url_list) > 1 and self.fixed_template():
802 raise SameFileError(self.params['outtmpl'])
806 #It also downloads the videos
807 self.extract_info(url)
808 except UnavailableVideoError:
809 self.report_error(u'unable to download video')
810 except MaxDownloadsReached:
811 self.to_screen(u'[info] Maximum number of downloaded files reached.')
814 return self._download_retcode
816 def post_process(self, filename, ie_info):
817 """Run all the postprocessors on the given file."""
819 info['filepath'] = filename
823 keep_video_wish, new_info = pp.run(info)
824 if keep_video_wish is not None:
826 keep_video = keep_video_wish
827 elif keep_video is None:
828 # No clear decision yet, let IE decide
829 keep_video = keep_video_wish
830 except PostProcessingError as e:
831 self.report_error(e.msg)
832 if keep_video is False and not self.params.get('keepvideo', False):
834 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
835 os.remove(encodeFilename(filename))
836 except (IOError, OSError):
837 self.report_warning(u'Unable to remove downloaded video file')
839 def _make_archive_id(self, info_dict):
840 # Future-proof against any change in case
841 # and backwards compatibility with prior versions
842 extractor = info_dict.get('extractor')
843 if extractor is None:
844 if 'id' in info_dict:
845 extractor = info_dict.get('ie_key') # key in a playlist
846 if extractor is None:
847 return None # Incomplete video information
848 return extractor.lower() + u' ' + info_dict['id']
850 def in_download_archive(self, info_dict):
851 fn = self.params.get('download_archive')
855 vid_id = self._make_archive_id(info_dict)
857 return False # Incomplete video information
860 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
861 for line in archive_file:
862 if line.strip() == vid_id:
864 except IOError as ioe:
865 if ioe.errno != errno.ENOENT:
869 def record_download_archive(self, info_dict):
870 fn = self.params.get('download_archive')
873 vid_id = self._make_archive_id(info_dict)
875 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
876 archive_file.write(vid_id + u'\n')
879 def format_resolution(format, default='unknown'):
880 if format.get('_resolution') is not None:
881 return format['_resolution']
882 if format.get('height') is not None:
883 if format.get('width') is not None:
884 res = u'%sx%s' % (format['width'], format['height'])
886 res = u'%sp' % format['height']
891 def list_formats(self, info_dict):
892 def format_note(fdict):
894 if fdict.get('format_note') is not None:
895 res += fdict['format_note'] + u' '
896 if fdict.get('vcodec') is not None:
897 res += u'%-5s' % fdict['vcodec']
898 elif fdict.get('vbr') is not None:
900 if fdict.get('vbr') is not None:
901 res += u'@%4dk' % fdict['vbr']
902 if fdict.get('acodec') is not None:
905 res += u'%-5s' % fdict['acodec']
906 elif fdict.get('abr') is not None:
910 if fdict.get('abr') is not None:
911 res += u'@%3dk' % fdict['abr']
912 if fdict.get('filesize') is not None:
915 res += format_bytes(fdict['filesize'])
918 def line(format, idlen=20):
919 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
922 self.format_resolution(format),
926 formats = info_dict.get('formats', [info_dict])
927 idlen = max(len(u'format code'),
928 max(len(f['format_id']) for f in formats))
929 formats_s = [line(f, idlen) for f in formats]
931 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
932 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
935 'format_id': u'format code', 'ext': u'extension',
936 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
937 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
938 (info_dict['id'], header_line, u"\n".join(formats_s)))
940 def urlopen(self, req):
941 """ Start an HTTP download """
942 return self._opener.open(req)
944 def print_debug_header(self):
945 if not self.params.get('verbose'):
947 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
949 sp = subprocess.Popen(
950 ['git', 'rev-parse', '--short', 'HEAD'],
951 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
952 cwd=os.path.dirname(os.path.abspath(__file__)))
953 out, err = sp.communicate()
954 out = out.decode().strip()
955 if re.match('[0-9a-f]+', out):
956 write_string(u'[debug] Git HEAD: ' + out + u'\n')
962 write_string(u'[debug] Python version %s - %s' %
963 (platform.python_version(), platform_name()) + u'\n')
966 for handler in self._opener.handlers:
967 if hasattr(handler, 'proxies'):
968 proxy_map.update(handler.proxies)
969 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
971 def _setup_opener(self, timeout=300):
972 opts_cookiefile = self.params.get('cookiefile')
973 opts_proxy = self.params.get('proxy')
975 if opts_cookiefile is None:
976 self.cookiejar = compat_cookiejar.CookieJar()
978 self.cookiejar = compat_cookiejar.MozillaCookieJar(
980 if os.access(opts_cookiefile, os.R_OK):
981 self.cookiejar.load()
983 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
985 if opts_proxy is not None:
989 proxies = {'http': opts_proxy, 'https': opts_proxy}
991 proxies = compat_urllib_request.getproxies()
992 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
993 if 'http' in proxies and 'https' not in proxies:
994 proxies['https'] = proxies['http']
995 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
996 https_handler = make_HTTPS_handler(
997 self.params.get('nocheckcertificate', False))
998 opener = compat_urllib_request.build_opener(
999 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1000 # Delete the default user-agent header, which would otherwise apply in
1001 # cases where our custom HTTP handler doesn't come into play
1002 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1003 opener.addheaders = []
1004 self._opener = opener
1006 # TODO remove this global modification
1007 compat_urllib_request.install_opener(opener)
1008 socket.setdefaulttimeout(timeout)