From: Philipp Hagemeister Date: Sun, 5 Jan 2014 02:16:19 +0000 (+0100) Subject: Merge remote-tracking branch 'rzhxeo/blip2' X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=67c20aebb77ead363fc5386f073cdc64fbd451ce;hp=531147dd5e8ed96671186cca9cd40e954c249366;p=youtube-dl.git Merge remote-tracking branch 'rzhxeo/blip2' --- diff --git a/README.md b/README.md index 91e18e372..0070617d4 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like. empty string (--proxy "") for direct connection --no-check-certificate Suppress HTTPS certificate validation. --cache-dir DIR Location in the filesystem where youtube-dl can - store downloaded information permanently. By + store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache - /youtube-dl . + /youtube-dl . At the moment, only YouTube player + files (for videos with obfuscated signatures) are + cached, but that may change. --no-cache-dir Disable filesystem caching --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi @@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services ### Is anyone going to need the feature? Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. diff --git a/setup.py b/setup.py index 653ca9a73..bf7e35e3e 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import print_function +from __future__ import print_function, unicode_literals import pkg_resources import sys diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py new file mode 100644 index 000000000..94497054a --- /dev/null +++ b/test/test_unicode_literals.py @@ -0,0 +1,40 @@ +from __future__ import unicode_literals + +import io +import os +import re +import unittest + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +class TestUnicodeLiterals(unittest.TestCase): + def test_all_files(self): + print('Skipping this test (not yet fully implemented)') + return + + for dirpath, _, filenames in os.walk(rootDir): + for basename in filenames: + if not basename.endswith('.py'): + continue + fn = os.path.join(dirpath, basename) + with io.open(fn, encoding='utf-8') as inf: + code = inf.read() + + if "'" not in code and '"' not in code: + continue + imps = 'from __future__ import unicode_literals' + self.assertTrue( + imps in code, + ' %s missing in %s' % (imps, fn)) + + m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code) + if m is not None: + self.assertTrue( + m is None, + 'u present in %s, around %s' % ( + fn, code[m.start() - 10:m.end() + 10])) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 69aedf87a..f6be275ff 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -10,6 +10,7 @@ from .utils import ( PostProcessingError, shell_quote, subtitles_filename, + prepend_extension, ) @@ -84,10 +85,10 @@ class FFmpegPostProcessor(PostProcessor): files_cmd = [] for path in input_paths: - files_cmd.extend(['-i', encodeFilename(path)]) + files_cmd.extend(['-i', encodeFilename(path, True)]) cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd + opts + - [encodeFilename(self._ffmpeg_filename_argument(out_path))]) + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) if self._downloader.params.get('verbose', False): self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) @@ -120,7 +121,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): if not self._exes['ffprobe'] and not self._exes['avprobe']: raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') try: - cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] + cmd = [ + self._exes['avprobe'] or self._exes['ffprobe'], + '-show_streams', + encodeFilename(self._ffmpeg_filename_argument(path), True)] handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: @@ -496,16 +500,22 @@ class FFmpegMetadataPP(FFmpegPostProcessor): return True, info filename = info['filepath'] - ext = os.path.splitext(filename)[1][1:] - temp_filename = filename + u'.temp' + temp_filename = prepend_extension(filename, 'temp') options = ['-c', 'copy'] for (name, value) in metadata.items(): options.extend(['-metadata', '%s=%s' % (name, value)]) - options.extend(['-f', ext]) self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, info + + +class FFmpegMergerPP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + args = ['-c', 'copy'] + self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) + return True, info diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 08037deda..3786799da 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import collections import errno @@ -51,9 +51,11 @@ from .utils import ( write_json_file, write_string, YoutubeDLHandler, + prepend_extension, ) from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader +from .PostProcessor import FFmpegMergerPP from .version import __version__ @@ -200,7 +202,7 @@ class YoutubeDL(object): self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == 2: - self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') + self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise @@ -209,13 +211,13 @@ class YoutubeDL(object): and not params['restrictfilenames']): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( - u'Assuming --restrict-filenames since file system encoding ' - u'cannot encode all charactes. ' - u'Set the LC_ALL environment variable to fix this.') + 'Assuming --restrict-filenames since file system encoding ' + 'cannot encode all charactes. ' + 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True if '%(stitle)s' in self.params.get('outtmpl', ''): - self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') self._setup_opener() @@ -258,13 +260,13 @@ class YoutubeDL(object): return message assert hasattr(self, '_output_process') - assert type(message) == type(u'') - line_count = message.count(u'\n') + 1 - self._output_process.stdin.write((message + u'\n').encode('utf-8')) + assert type(message) == type('') + line_count = message.count('\n') + 1 + self._output_process.stdin.write((message + '\n').encode('utf-8')) self._output_process.stdin.flush() - res = u''.join(self._output_channel.readline().decode('utf-8') + res = ''.join(self._output_channel.readline().decode('utf-8') for _ in range(line_count)) - return res[:-len(u'\n')] + return res[:-len('\n')] def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" @@ -276,19 +278,19 @@ class YoutubeDL(object): self.params['logger'].debug(message) elif not check_quiet or not self.params.get('quiet', False): message = self._bidi_workaround(message) - terminator = [u'\n', u''][skip_eol] + terminator = ['\n', ''][skip_eol] output = message + terminator write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" - assert type(message) == type(u'') + assert type(message) == type('') if self.params.get('logger'): self.params['logger'].error(message) else: message = self._bidi_workaround(message) - output = message + u'\n' + output = message + '\n' write_string(output, self._err_file) def to_console_title(self, message): @@ -299,21 +301,21 @@ class YoutubeDL(object): # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) elif 'TERM' in os.environ: - write_string(u'\033]0;%s\007' % message, self._screen_file) + write_string('\033]0;%s\007' % message, self._screen_file) def save_console_title(self): if not self.params.get('consoletitle', False): return if 'TERM' in os.environ: # Save the title on stack - write_string(u'\033[22;0t', self._screen_file) + write_string('\033[22;0t', self._screen_file) def restore_console_title(self): if not self.params.get('consoletitle', False): return if 'TERM' in os.environ: # Restore the title from stack - write_string(u'\033[23;0t', self._screen_file) + write_string('\033[23;0t', self._screen_file) def __enter__(self): self.save_console_title() @@ -339,13 +341,13 @@ class YoutubeDL(object): if self.params.get('verbose'): if tb is None: if sys.exc_info()[0]: # if .trouble has been called from an except block - tb = u'' + tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: - tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) + tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) tb += compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) - tb = u''.join(tb_data) + tb = ''.join(tb_data) self.to_stderr(tb) if not self.params.get('ignoreerrors', False): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: @@ -361,10 +363,10 @@ class YoutubeDL(object): If stderr is a tty file the 'WARNING:' will be colored ''' if self._err_file.isatty() and os.name != 'nt': - _msg_header = u'\033[0;33mWARNING:\033[0m' + _msg_header = '\033[0;33mWARNING:\033[0m' else: - _msg_header = u'WARNING:' - warning_message = u'%s %s' % (_msg_header, message) + _msg_header = 'WARNING:' + warning_message = '%s %s' % (_msg_header, message) self.to_stderr(warning_message) def report_error(self, message, tb=None): @@ -373,18 +375,18 @@ class YoutubeDL(object): in red if stderr is a tty file. ''' if self._err_file.isatty() and os.name != 'nt': - _msg_header = u'\033[0;31mERROR:\033[0m' + _msg_header = '\033[0;31mERROR:\033[0m' else: - _msg_header = u'ERROR:' - error_message = u'%s %s' % (_msg_header, message) + _msg_header = 'ERROR:' + error_message = '%s %s' % (_msg_header, message) self.trouble(error_message, tb) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_screen(u'[download] %s has already been downloaded' % file_name) + self.to_screen('[download] %s has already been downloaded' % file_name) except UnicodeEncodeError: - self.to_screen(u'[download] The file has already been downloaded') + self.to_screen('[download] The file has already been downloaded') def increment_downloads(self): """Increment the ordinal that assigns a number to each file.""" @@ -399,61 +401,61 @@ class YoutubeDL(object): autonumber_size = self.params.get('autonumber_size') if autonumber_size is None: autonumber_size = 5 - autonumber_templ = u'%0' + str(autonumber_size) + u'd' + autonumber_templ = '%0' + str(autonumber_size) + 'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads if template_dict.get('playlist_index') is not None: - template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] + template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k == u'id')) + is_id=(k == 'id')) template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items() if v is not None) - template_dict = collections.defaultdict(lambda: u'NA', template_dict) + template_dict = collections.defaultdict(lambda: 'NA', template_dict) tmpl = os.path.expanduser(self.params['outtmpl']) filename = tmpl % template_dict return filename except ValueError as err: - self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')') + self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None def _match_entry(self, info_dict): """ Returns None iff the file should be downloaded """ - video_title = info_dict.get('title', info_dict.get('id', u'video')) + video_title = info_dict.get('title', info_dict.get('id', 'video')) if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) if matchtitle: if not re.search(matchtitle, title, re.IGNORECASE): - return u'"' + title + '" title did not match pattern "' + matchtitle + '"' + return '"' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' date = info_dict.get('upload_date', None) if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: - return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) view_count = info_dict.get('view_count', None) if view_count is not None: min_views = self.params.get('min_views') if min_views is not None and view_count < min_views: - return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) + return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: - return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) + return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) age_limit = self.params.get('age_limit') if age_limit is not None: if age_limit < info_dict.get('age_limit', 0): - return u'Skipping "' + title + '" because it is age restricted' + return 'Skipping "' + title + '" because it is age restricted' if self.in_download_archive(info_dict): - return u'%s has already been recorded in archive' % video_title + return '%s has already been recorded in archive' % video_title return None @staticmethod @@ -480,8 +482,8 @@ class YoutubeDL(object): continue if not ie.working(): - self.report_warning(u'The program functionality for this site has been marked as broken, ' - u'and will probably not work.') + self.report_warning('The program functionality for this site has been marked as broken, ' + 'and will probably not work.') try: ie_result = ie.extract(url) @@ -514,7 +516,7 @@ class YoutubeDL(object): else: raise else: - self.report_error(u'no suitable InfoExtractor: %s' % url) + self.report_error('no suitable InfoExtractor: %s' % url) def process_ie_result(self, ie_result, download=True, extra_info={}): """ @@ -565,7 +567,7 @@ class YoutubeDL(object): elif result_type == 'playlist': # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) - self.to_screen(u'[download] Downloading playlist: %s' % playlist) + self.to_screen('[download] Downloading playlist: %s' % playlist) playlist_results = [] @@ -580,11 +582,11 @@ class YoutubeDL(object): n_entries = len(entries) self.to_screen( - u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % + "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) for i, entry in enumerate(entries, 1): - self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries)) + self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) extra = { 'playlist': playlist, 'playlist_index': i + playliststart, @@ -596,7 +598,7 @@ class YoutubeDL(object): reason = self._match_entry(entry) if reason is not None: - self.to_screen(u'[download] ' + reason) + self.to_screen('[download] ' + reason) continue entry_result = self.process_ie_result(entry, @@ -629,7 +631,7 @@ class YoutubeDL(object): elif format_spec == 'worst': return available_formats[0] else: - extensions = [u'mp4', u'flv', u'webm', u'3gp'] + extensions = ['mp4', 'flv', 'webm', '3gp'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: @@ -648,7 +650,7 @@ class YoutubeDL(object): info_dict['playlist_index'] = None # This extractors handle format selection themselves - if info_dict['extractor'] in [u'Youku']: + if info_dict['extractor'] in ['Youku']: if download: self.process_info(info_dict) return info_dict @@ -665,10 +667,10 @@ class YoutubeDL(object): if format.get('format_id') is None: format['format_id'] = compat_str(i) if format.get('format') is None: - format['format'] = u'{id} - {res}{note}'.format( + format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), - note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', + note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing if 'ext' not in format: @@ -704,17 +706,27 @@ class YoutubeDL(object): # the first that is available, starting from left req_formats = req_format.split('/') for rf in req_formats: - selected_format = self.select_format(rf, formats) + if re.match(r'.+?\+.+?', rf) is not None: + # Two formats have been requested like '137+139' + format_1, format_2 = rf.split('+') + formats_info = (self.select_format(format_1, formats), + self.select_format(format_2, formats)) + if all(formats_info): + selected_format = {'requested_formats': formats_info} + else: + selected_format = None + else: + selected_format = self.select_format(rf, formats) if selected_format is not None: formats_to_download = [selected_format] break if not formats_to_download: - raise ExtractorError(u'requested format not available', + raise ExtractorError('requested format not available', expected=True) if download: if len(formats_to_download) > 1: - self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) + self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) for format in formats_to_download: new_info = dict(info_dict) new_info.update(format) @@ -732,7 +744,7 @@ class YoutubeDL(object): info_dict['fulltitle'] = info_dict['title'] if len(info_dict['title']) > 200: - info_dict['title'] = info_dict['title'][:197] + u'...' + info_dict['title'] = info_dict['title'][:197] + '...' # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] @@ -742,7 +754,7 @@ class YoutubeDL(object): reason = self._match_entry(info_dict) if reason is not None: - self.to_screen(u'[download] ' + reason) + self.to_screen('[download] ' + reason) return max_downloads = self.params.get('max_downloads') @@ -759,7 +771,7 @@ class YoutubeDL(object): self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', u'')) + self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: @@ -786,37 +798,37 @@ class YoutubeDL(object): if dn != '' and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: - self.report_error(u'unable to create directory ' + compat_str(err)) + self.report_error('unable to create directory ' + compat_str(err)) return if self.params.get('writedescription', False): - descfn = filename + u'.description' + descfn = filename + '.description' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): - self.to_screen(u'[info] Video description is already present') + self.to_screen('[info] Video description is already present') else: try: - self.to_screen(u'[info] Writing video description to: ' + descfn) + self.to_screen('[info] Writing video description to: ' + descfn) with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) except (KeyError, TypeError): - self.report_warning(u'There\'s no description to write.') + self.report_warning('There\'s no description to write.') except (OSError, IOError): - self.report_error(u'Cannot write description file ' + descfn) + self.report_error('Cannot write description file ' + descfn) return if self.params.get('writeannotations', False): - annofn = filename + u'.annotations.xml' + annofn = filename + '.annotations.xml' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): - self.to_screen(u'[info] Video annotations are already present') + self.to_screen('[info] Video annotations are already present') else: try: - self.to_screen(u'[info] Writing video annotations to: ' + annofn) + self.to_screen('[info] Writing video annotations to: ' + annofn) with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): - self.report_warning(u'There are no annotations to write.') + self.report_warning('There are no annotations to write.') except (OSError, IOError): - self.report_error(u'Cannot write annotations file: ' + annofn) + self.report_error('Cannot write annotations file: ' + annofn) return subtitles_are_requested = any([self.params.get('writesubtitles', False), @@ -834,45 +846,45 @@ class YoutubeDL(object): try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) + self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) else: - self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) + self.to_screen('[info] Writing video subtitles to: ' + sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: subfile.write(sub) except (OSError, IOError): - self.report_error(u'Cannot write subtitles file ' + descfn) + self.report_error('Cannot write subtitles file ' + descfn) return if self.params.get('writeinfojson', False): - infofn = os.path.splitext(filename)[0] + u'.info.json' + infofn = os.path.splitext(filename)[0] + '.info.json' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): - self.to_screen(u'[info] Video description metadata is already present') + self.to_screen('[info] Video description metadata is already present') else: - self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn) + self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) try: write_json_file(info_dict, encodeFilename(infofn)) except (OSError, IOError): - self.report_error(u'Cannot write metadata to JSON file ' + infofn) + self.report_error('Cannot write metadata to JSON file ' + infofn) return if self.params.get('writethumbnail', False): if info_dict.get('thumbnail') is not None: - thumb_format = determine_ext(info_dict['thumbnail'], u'jpg') - thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format + thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') + thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen(u'[%s] %s: Thumbnail is already present' % + self.to_screen('[%s] %s: Thumbnail is already present' % (info_dict['extractor'], info_dict['id'])) else: - self.to_screen(u'[%s] %s: Downloading thumbnail ...' % + self.to_screen('[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) try: uf = compat_urllib_request.urlopen(info_dict['thumbnail']) with open(thumb_filename, 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) - self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % + self.to_screen('[%s] %s: Writing thumbnail to: %s' % (info_dict['extractor'], info_dict['id'], thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning(u'Unable to download thumbnail "%s": %s' % + self.report_warning('Unable to download thumbnail "%s": %s' % (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): @@ -880,24 +892,41 @@ class YoutubeDL(object): success = True else: try: - fd = get_suitable_downloader(info_dict)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - success = fd.download(filename, info_dict) + def dl(name, info): + fd = get_suitable_downloader(info)(self, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + return fd.download(name, info) + if info_dict.get('requested_formats') is not None: + downloaded = [] + success = True + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + new_info.update(f) + fname = self.prepare_filename(new_info) + fname = prepend_extension(fname, 'f%s' % f['format_id']) + downloaded.append(fname) + partial_success = dl(fname, new_info) + success = success and partial_success + info_dict['__postprocessors'] = [FFmpegMergerPP(self)] + info_dict['__files_to_merge'] = downloaded + else: + # Just a single file + success = dl(filename, info_dict) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_error(u'unable to download video data: %s' % str(err)) + self.report_error('unable to download video data: %s' % str(err)) return except (OSError, IOError) as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: - self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return if success: try: self.post_process(filename, info_dict) except (PostProcessingError) as err: - self.report_error(u'postprocessing: %s' % str(err)) + self.report_error('postprocessing: %s' % str(err)) return self.record_download_archive(info_dict) @@ -914,9 +943,9 @@ class YoutubeDL(object): #It also downloads the videos self.extract_info(url) except UnavailableVideoError: - self.report_error(u'unable to download video') + self.report_error('unable to download video') except MaxDownloadsReached: - self.to_screen(u'[info] Maximum number of downloaded files reached.') + self.to_screen('[info] Maximum number of downloaded files reached.') raise return self._download_retcode @@ -929,7 +958,7 @@ class YoutubeDL(object): except DownloadError: webpage_url = info.get('webpage_url') if webpage_url is not None: - self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url) + self.report_warning('The info failed to download, trying with "%s"' % webpage_url) return self.download([webpage_url]) else: raise @@ -940,7 +969,11 @@ class YoutubeDL(object): info = dict(ie_info) info['filepath'] = filename keep_video = None - for pp in self._pps: + pps_chain = [] + if ie_info.get('__postprocessors') is not None: + pps_chain.extend(ie_info['__postprocessors']) + pps_chain.extend(self._pps) + for pp in pps_chain: try: keep_video_wish, new_info = pp.run(info) if keep_video_wish is not None: @@ -953,10 +986,10 @@ class YoutubeDL(object): self.report_error(e.msg) if keep_video is False and not self.params.get('keepvideo', False): try: - self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) + self.to_screen('Deleting original file %s (pass -k to keep)' % filename) os.remove(encodeFilename(filename)) except (IOError, OSError): - self.report_warning(u'Unable to remove downloaded video file') + self.report_warning('Unable to remove downloaded video file') def _make_archive_id(self, info_dict): # Future-proof against any change in case @@ -967,7 +1000,7 @@ class YoutubeDL(object): extractor = info_dict.get('ie_key') # key in a playlist if extractor is None: return None # Incomplete video information - return extractor.lower() + u' ' + info_dict['id'] + return extractor.lower() + ' ' + info_dict['id'] def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -995,7 +1028,7 @@ class YoutubeDL(object): vid_id = self._make_archive_id(info_dict) assert vid_id with locked_file(fn, 'a', encoding='utf-8') as archive_file: - archive_file.write(vid_id + u'\n') + archive_file.write(vid_id + '\n') @staticmethod def format_resolution(format, default='unknown'): @@ -1005,49 +1038,49 @@ class YoutubeDL(object): return format['resolution'] if format.get('height') is not None: if format.get('width') is not None: - res = u'%sx%s' % (format['width'], format['height']) + res = '%sx%s' % (format['width'], format['height']) else: - res = u'%sp' % format['height'] + res = '%sp' % format['height'] elif format.get('width') is not None: - res = u'?x%d' % format['width'] + res = '?x%d' % format['width'] else: res = default return res def list_formats(self, info_dict): def format_note(fdict): - res = u'' + res = '' if fdict.get('ext') in ['f4f', 'f4m']: - res += u'(unsupported) ' + res += '(unsupported) ' if fdict.get('format_note') is not None: - res += fdict['format_note'] + u' ' + res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: - res += u'%4dk ' % fdict['tbr'] + res += '%4dk ' % fdict['tbr'] if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): - res += u'%-5s@' % fdict['vcodec'] + res += '%-5s@' % fdict['vcodec'] elif fdict.get('vbr') is not None and fdict.get('abr') is not None: - res += u'video@' + res += 'video@' if fdict.get('vbr') is not None: - res += u'%4dk' % fdict['vbr'] + res += '%4dk' % fdict['vbr'] if fdict.get('acodec') is not None: if res: - res += u', ' - res += u'%-5s' % fdict['acodec'] + res += ', ' + res += '%-5s' % fdict['acodec'] elif fdict.get('abr') is not None: if res: - res += u', ' + res += ', ' res += 'audio' if fdict.get('abr') is not None: - res += u'@%3dk' % fdict['abr'] + res += '@%3dk' % fdict['abr'] if fdict.get('filesize') is not None: if res: - res += u', ' + res += ', ' res += format_bytes(fdict['filesize']) return res def line(format, idlen=20): - return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % ( + return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( format['format_id'], format['ext'], self.format_resolution(format), @@ -1055,7 +1088,7 @@ class YoutubeDL(object): )) formats = info_dict.get('formats', [info_dict]) - idlen = max(len(u'format code'), + idlen = max(len('format code'), max(len(f['format_id']) for f in formats)) formats_s = [line(f, idlen) for f in formats] if len(formats) > 1: @@ -1063,10 +1096,10 @@ class YoutubeDL(object): formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' header_line = line({ - 'format_id': u'format code', 'ext': u'extension', - 'resolution': u'resolution', 'format_note': u'note'}, idlen=idlen) - self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, u"\n".join(formats_s))) + 'format_id': 'format code', 'ext': 'extension', + 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) + self.to_screen('[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, '\n'.join(formats_s))) def urlopen(self, req): """ Start an HTTP download """ @@ -1075,7 +1108,7 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') + write_string('[debug] youtube-dl version ' + __version__ + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -1084,20 +1117,20 @@ class YoutubeDL(object): out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): - write_string(u'[debug] Git HEAD: ' + out + u'\n') + write_string('[debug] Git HEAD: ' + out + '\n') except: try: sys.exc_clear() except: pass - write_string(u'[debug] Python version %s - %s' % - (platform.python_version(), platform_name()) + u'\n') + write_string('[debug] Python version %s - %s' % + (platform.python_version(), platform_name()) + '\n') proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') + write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') def _setup_opener(self): timeout_val = self.params.get('socket_timeout') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2e81556da..b29cf6758 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -186,7 +186,7 @@ def parseOpts(overrideArguments=None): general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5319a9292..21d564dba 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -95,12 +95,18 @@ from .ivi import ( from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE +from .jpopsukitv import JpopsukiIE from .kankan import KankanIE from .keezmovies import KeezMoviesIE from .kickstarter import KickStarterIE from .keek import KeekIE from .liveleak import LiveLeakIE from .livestream import LivestreamIE, LivestreamOriginalIE +from .lynda import ( + LyndaIE, + LyndaCourseIE +) +from .macgamestore import MacGameStoreIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index b27c1dfc5..d10b7bd0c 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -1,82 +1,68 @@ +from __future__ import unicode_literals + +import json import re from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse_urlparse, - determine_ext, - - ExtractorError, -) class CollegeHumorIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/?(?P.*)$' _TESTS = [{ - u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', - u'file': u'6902724.mp4', - u'md5': u'1264c12ad95dca142a9f0bf7968105a0', - u'info_dict': { - u'title': u'Comic-Con Cosplay Catastrophe', - u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.', + 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', + 'file': '6902724.mp4', + 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', + 'info_dict': { + 'title': 'Comic-Con Cosplay Catastrophe', + 'description': 'Fans get creative this year at San Diego. Too', + 'age_limit': 13, }, }, { - u'url': u'http://www.collegehumor.com/video/3505939/font-conference', - u'file': u'3505939.mp4', - u'md5': u'c51ca16b82bb456a4397987791a835f5', - u'info_dict': { - u'title': u'Font Conference', - u'description': u'This video wasn\'t long enough, so we made it double-spaced.', + 'url': 'http://www.collegehumor.com/video/3505939/font-conference', + 'file': '3505939.mp4', + 'md5': '72fa701d8ef38664a4dbb9e2ab721816', + 'info_dict': { + 'title': 'Font Conference', + 'description': 'This video wasn\'t long enough, so we made it double-spaced.', + 'age_limit': 10, }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('videoid') - info = { - 'id': video_id, - 'uploader': None, - 'upload_date': None, - } - - self.report_extraction(video_id) - xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id - mdoc = self._download_xml(xmlUrl, video_id, - u'Downloading info XML', - u'Unable to download video info XML') + jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json' + data = json.loads(self._download_webpage( + jsonUrl, video_id, 'Downloading info JSON')) + vdata = data['video'] - try: - videoNode = mdoc.findall('./video')[0] - youtubeIdNode = videoNode.find('./youtubeID') - if youtubeIdNode is not None: - return self.url_result(youtubeIdNode.text, 'Youtube') - info['description'] = videoNode.findall('./description')[0].text - info['title'] = videoNode.findall('./caption')[0].text - info['thumbnail'] = videoNode.findall('./thumbnail')[0].text - next_url = videoNode.findall('./file')[0].text - except IndexError: - raise ExtractorError(u'Invalid metadata XML file') - - if next_url.endswith(u'manifest.f4m'): - manifest_url = next_url + '?hdcore=2.10.3' - adoc = self._download_xml(manifest_url, video_id, - u'Downloading XML manifest', - u'Unable to download video info XML') - - try: - video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text - except IndexError: - raise ExtractorError(u'Invalid manifest file') - url_pr = compat_urllib_parse_urlparse(info['thumbnail']) - info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') - info['ext'] = 'mp4' + AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0} + rating = vdata.get('rating') + if rating: + age_limit = AGE_LIMITS.get(rating.lower()) else: - # Old-style direct links - info['url'] = next_url - info['ext'] = determine_ext(info['url']) + age_limit = None # None = No idea + + PREFS = {'high_quality': 2, 'low_quality': 0} + formats = [] + for format_key in ('mp4', 'webm'): + for qname, qurl in vdata[format_key].items(): + formats.append({ + 'format_id': format_key + '_' + qname, + 'url': qurl, + 'format': format_key, + 'preference': PREFS.get(qname), + }) + self._sort_formats(formats) - return info + return { + 'id': video_id, + 'title': vdata['title'], + 'description': vdata.get('description'), + 'thumbnail': vdata.get('thumbnail'), + 'formats': formats, + 'age_limit': age_limit, + } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index a54ce3ee7..27bd8256e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,7 +12,9 @@ from ..utils import ( class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P.*)' + _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/ + (video-clips|episodes|cc-studios|video-collections) + /(?P<title>.*)''' _FEED_URL = u'http://comedycentral.com/feeds/mrss/' _TEST = { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f34d36cb0..f498bcf6f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,7 +69,8 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp" or so. * preference Order number of this format. If this field is - present, the formats get sorted by this field. + present and not None, the formats get sorted + by this field. -1 for default (order by other properties), -2 or smaller for less than default. url: Final video URL. @@ -377,7 +378,7 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' - property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) + property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) template = r'<meta[^>]+?%s[^>]+?%s' return [ template % (property_re, content_re), diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index b0e4300bd..0b11d1f10 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -10,7 +10,7 @@ from ..utils import ( class DreiSatIE(InfoExtractor): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", u'file': u'36983.mp4', diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py new file mode 100644 index 000000000..aad782578 --- /dev/null +++ b/youtube_dl/extractor/jpopsukitv.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate, +) + + +class JpopsukiIE(InfoExtractor): + IE_NAME = 'jpopsuki.tv' + _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)' + + _TEST = { + 'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771', + 'md5': '88018c0c1a9b1387940e90ec9e7e198e', + 'file': '00be659d23b0b40508169cdee4545771.mp4', + 'info_dict': { + 'id': '00be659d23b0b40508169cdee4545771', + 'title': 'ayumi hamasaki - evolution', + 'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution', + 'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg', + 'uploader': 'plama_chan', + 'uploader_id': '404', + 'upload_date': '20121101' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = 'http://www.jpopsuki.tv' + self._html_search_regex( + r'<source src="(.*?)" type', webpage, 'video url') + + video_title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._html_search_regex( + r'<li>from: <a href="/user/view/user/(.*?)/uid/', + webpage, 'video uploader', fatal=False) + uploader_id = self._html_search_regex( + r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)', + webpage, 'video uploader_id', fatal=False) + upload_date = self._html_search_regex( + r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date', + fatal=False) + if upload_date is not None: + upload_date = unified_strdate(upload_date) + view_count_str = self._html_search_regex( + r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count', + fatal=False) + comment_count_str = self._html_search_regex( + r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count', + fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'view_count': int_or_none(view_count_str), + 'comment_count': int_or_none(comment_count_str), + } diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py new file mode 100644 index 000000000..592ed747a --- /dev/null +++ b/youtube_dl/extractor/lynda.py @@ -0,0 +1,102 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class LyndaIE(InfoExtractor): + IE_NAME = 'lynda' + IE_DESC = 'lynda.com videos' + _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' + + _TEST = { + 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', + 'file': '114408.mp4', + 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', + u"info_dict": { + 'title': 'Using the exercise files', + 'duration': 68 + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + + page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, + video_id, 'Downloading video JSON') + video_json = json.loads(page) + + if 'Status' in video_json and video_json['Status'] == 'NotFound': + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + if video_json['HasAccess'] is False: + raise ExtractorError('Video %s is only available for members' % video_id, expected=True) + + video_id = video_json['ID'] + duration = video_json['DurationInSeconds'] + title = video_json['Title'] + + formats = [{'url': fmt['Url'], + 'ext': fmt['Extension'], + 'width': fmt['Width'], + 'height': fmt['Height'], + 'filesize': fmt['FileSize'], + 'format_id': fmt['Resolution'] + } for fmt in video_json['Formats']] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'duration': duration, + 'formats': formats + } + + +class LyndaCourseIE(InfoExtractor): + IE_NAME = 'lynda:course' + IE_DESC = 'lynda.com online courses' + + # Course link equals to welcome/introduction video link of same course + # We will recognize it as course link + _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_path = mobj.group('coursepath') + course_id = mobj.group('courseid') + + page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, + course_id, 'Downloading course JSON') + course_json = json.loads(page) + + if 'Status' in course_json and course_json['Status'] == 'NotFound': + raise ExtractorError('Course %s does not exist' % course_id, expected=True) + + unaccessible_videos = 0 + videos = [] + + for chapter in course_json['Chapters']: + for video in chapter['Videos']: + if video['HasAccess'] is not True: + unaccessible_videos += 1 + continue + videos.append(video['ID']) + + if unaccessible_videos > 0: + self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos) + + entries = [ + self.url_result('http://www.lynda.com/%s/%s-4.html' % + (course_path, video_id), + 'Lynda') + for video_id in videos] + + course_title = course_json['Title'] + + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py new file mode 100644 index 000000000..b818cf50c --- /dev/null +++ b/youtube_dl/extractor/macgamestore.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class MacGameStoreIE(InfoExtractor): + IE_NAME = 'macgamestore' + IE_DESC = 'MacGameStore trailers' + _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', + 'file': '2450.m4v', + 'md5': '8649b8ea684b6666b4c5be736ecddc61', + 'info_dict': { + 'title': 'Crow', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id, 'Downloading trailer page') + + if re.search(r'>Missing Media<', webpage) is not None: + raise ExtractorError('Trailer %s does not exist' % video_id, expected=True) + + video_title = self._html_search_regex( + r'<title>MacGameStore: (.*?) Trailer', webpage, 'title') + + video_url = self._html_search_regex( + r'(?s)', + webpage, 'video URL') + + return { + 'id': video_id, + 'url': video_url, + 'title': video_title + } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 125d81551..7c54ea0f4 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor): info = json.loads(json_data) preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') - song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') + song_url = preview_url.replace('/previews/', '/c/originals/') template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) final_song_url = self._get_url(template_url) if final_song_url is None: diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 71abd5013..e9ff8d1af 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -5,7 +5,7 @@ from ..utils import compat_urllib_parse class PornHdIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P[0-9]+)/(?P.+)' + _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P[0-9]+)/(?P.+)' _TEST = { u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', u'file': u'1962.flv', diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e22ff9c38..951e977bd 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor): (?!sets/)(?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) - |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*) + |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' IE_NAME = u'soundcloud' @@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor): if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id - elif mobj.group('widget'): + elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) return self.url_result(query['url'][0], ie='Soundcloud') else: diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 584550455..bc31c2e64 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor): 'height': a['height'], 'filesize': a['size'], 'ext': a['ext'], + 'preference': 1 if atype == 'original' else None, }) self._sort_formats(formats) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index da5143c8e..536504e7e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -500,12 +500,13 @@ def unescapeHTML(s): result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s) return result -def encodeFilename(s): + +def encodeFilename(s, for_subprocess=False): """ @param s The name of the file """ - assert type(s) == type(u'') + assert type(s) == compat_str # Python 3 has a Unicode API if sys.version_info >= (3, 0): @@ -515,12 +516,18 @@ def encodeFilename(s): # Pass u'' directly to use Unicode APIs on Windows 2000 and up # (Detecting Windows NT 4 is tricky because 'major >= 4' would # match Windows 9x series as well. Besides, NT 4 is obsolete.) - return s + if not for_subprocess: + return s + else: + # For subprocess calls, encode with locale encoding + # Refer to http://stackoverflow.com/a/9951851/35070 + encoding = preferredencoding() else: encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' - return s.encode(encoding, 'ignore') + if encoding is None: + encoding = 'utf-8' + return s.encode(encoding, 'ignore') + def decodeOption(optval): if optval is None: @@ -850,12 +857,22 @@ def platform_name(): def write_string(s, out=None): if out is None: out = sys.stderr - assert type(s) == type(u'') + assert type(s) == compat_str if ('b' in getattr(out, 'mode', '') or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr s = s.encode(preferredencoding(), 'ignore') - out.write(s) + try: + out.write(s) + except UnicodeEncodeError: + # In Windows shells, this can fail even when the codec is just charmap!? + # See https://wiki.python.org/moin/PrintFails#Issue + if sys.platform == 'win32' and hasattr(out, 'encoding'): + s = s.encode(out.encoding, 'ignore').decode(out.encoding) + out.write(s) + else: + raise + out.flush() @@ -1071,7 +1088,7 @@ def fix_xml_all_ampersand(xml_str): def setproctitle(title): - assert isinstance(title, type(u'')) + assert isinstance(title, compat_str) try: libc = ctypes.cdll.LoadLibrary("libc.so.6") except OSError: @@ -1119,3 +1136,8 @@ def parse_duration(s): if m.group('hours'): res += int(m.group('hours')) * 60 * 60 return res + + +def prepend_extension(filename, ext): + name, real_ext = os.path.splitext(filename) + return u'{0}.{1}{2}'.format(name, ext, real_ext) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 332913b31..bf5fc8212 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.26' +__version__ = '2014.01.03'