From: Philipp Hagemeister Date: Tue, 7 Jan 2014 04:44:44 +0000 (+0100) Subject: Merge remote-tracking branch 'epitron/metadata-pp' X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=5f263296eaa72ddca232d734a2625bcd85771908;hp=-c;p=youtube-dl.git Merge remote-tracking branch 'epitron/metadata-pp' Conflicts: youtube_dl/PostProcessor.py --- 5f263296eaa72ddca232d734a2625bcd85771908 diff --combined README.md index e5d626cc7,5fa0103df..45cfbec5a --- a/README.md +++ b/README.md @@@ -34,16 -34,12 +34,16 @@@ which means you can modify it, redistri empty string (--proxy "") for direct connection --no-check-certificate Suppress HTTPS certificate validation. --cache-dir DIR Location in the filesystem where youtube-dl can - store downloaded information permanently. By + store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache - /youtube-dl . + /youtube-dl . At the moment, only YouTube player + files (for videos with obfuscated signatures) are + cached, but that may change. --no-cache-dir Disable filesystem caching + --socket-timeout None Time to wait before giving up, in seconds --bidi-workaround Work around terminals that lack bidirectional - text support. Requires fribidi executable in PATH + text support. Requires bidiv or fribidi + executable in PATH ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) @@@ -193,7 -189,9 +193,9 @@@ processed files are overwritten by default --embed-subs embed subtitles in the video (only for mp4 videos) - --add-metadata add metadata to the files + --add-metadata write metadata to the video file + --xattrs write metadata to the video file's xattrs (using + dublin core and xdg standards) # CONFIGURATION @@@ -338,7 -336,3 +340,7 @@@ In particular, every site support reque ### Is anyone going to need the feature? Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. diff --combined youtube_dl/PostProcessor.py index f6be275ff,da95f1a87..481c07a94 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@@ -10,7 -10,6 +10,7 @@@ from .utils import PostProcessingError, shell_quote, subtitles_filename, + prepend_extension, ) @@@ -63,6 -62,7 +63,7 @@@ class FFmpegPostProcessorError(PostProc class AudioConversionError(PostProcessingError): pass + class FFmpegPostProcessor(PostProcessor): def __init__(self,downloader=None): PostProcessor.__init__(self, downloader) @@@ -85,10 -85,10 +86,10 @@@ files_cmd = [] for path in input_paths: - files_cmd.extend(['-i', encodeFilename(path)]) + files_cmd.extend(['-i', encodeFilename(path, True)]) cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd + opts + - [encodeFilename(self._ffmpeg_filename_argument(out_path))]) + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) if self._downloader.params.get('verbose', False): self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) @@@ -108,6 -108,7 +109,7 @@@ return u'./' + fn return fn + class FFmpegExtractAudioPP(FFmpegPostProcessor): def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) @@@ -121,10 -122,7 +123,10 @@@ if not self._exes['ffprobe'] and not self._exes['avprobe']: raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') try: - cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] + cmd = [ + self._exes['avprobe'] or self._exes['ffprobe'], + '-show_streams', + encodeFilename(self._ffmpeg_filename_argument(path), True)] handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: @@@ -236,6 -234,7 +238,7 @@@ information['filepath'] = new_path return self._nopostoverwrites,information + class FFmpegVideoConvertor(FFmpegPostProcessor): def __init__(self, downloader=None,preferedformat=None): super(FFmpegVideoConvertor, self).__init__(downloader) @@@ -500,11 -499,13 +503,11 @@@ class FFmpegMetadataPP(FFmpegPostProces return True, info filename = info['filepath'] - ext = os.path.splitext(filename)[1][1:] - temp_filename = filename + u'.temp' + temp_filename = prepend_extension(filename, 'temp') options = ['-c', 'copy'] for (name, value) in metadata.items(): options.extend(['-metadata', '%s=%s' % (name, value)]) - options.extend(['-f', ext]) self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) self.run_ffmpeg(filename, temp_filename, options) @@@ -513,9 -514,118 +516,125 @@@ return True, info +class FFmpegMergerPP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + args = ['-c', 'copy'] + self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) + return True, info ++ + class XAttrMetadataPP(PostProcessor): + + # + # More info about extended attributes for media: + # http://freedesktop.org/wiki/CommonExtendedAttributes/ + # http://www.freedesktop.org/wiki/PhreedomDraft/ + # http://dublincore.org/documents/usageguide/elements.shtml + # + # TODO: + # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + # + + def run(self, info): + """ Set extended attributes on downloaded file (if xattr support is found). """ + + from .utils import hyphenate_date + + # This mess below finds the best xattr tool for the job and creates a + # "write_xattr" function. + try: + # try the pyxattr module... + import xattr + def write_xattr(path, key, value): + return xattr.setxattr(path, key, value) + + except ImportError: + + if os.name == 'posix': + def which(bin): + for dir in os.environ["PATH"].split(":"): + path = os.path.join(dir, bin) + if os.path.exists(path): + return path + + user_has_setfattr = which("setfattr") + user_has_xattr = which("xattr") + + if user_has_setfattr or user_has_xattr: + + def write_xattr(path, key, value): + import errno + potential_errors = { + # setfattr: /tmp/blah: Operation not supported + "Operation not supported": errno.EOPNOTSUPP, + # setfattr: ~/blah: No such file or directory + # xattr: No such file: ~/blah + "No such file": errno.ENOENT, + } + + if user_has_setfattr: + cmd = ['setfattr', '-n', key, '-v', value, path] + elif user_has_xattr: + cmd = ['xattr', '-w', key, value, path] + + try: + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + errorstr = e.output.strip().decode() + for potential_errorstr, potential_errno in potential_errors.items(): + if errorstr.find(potential_errorstr) > -1: + e = OSError(potential_errno, potential_errorstr) + e.__cause__ = None + raise e + raise # Reraise unhandled error + + else: + # On Unix, and can't find pyxattr, setfattr, or xattr. + if sys.platform.startswith('linux'): + self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).") + elif sys.platform == 'darwin': + self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.") + else: + # Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + def write_xattr(path, key, value): + assert(key.find(":") < 0) + assert(path.find(":") < 0) + assert(os.path.exists(path)) + + f = open(path+":"+key, "w") + f.write(value) + f.close() + + # Write the metadata to the file's xattrs + self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs...') + + filename = info['filepath'] + + try: + xattr_mapping = { + 'user.xdg.referrer.url': 'webpage_url', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } + + for xattrname, infoname in xattr_mapping.items(): + + value = info.get(infoname) + + if value: + if infoname == "upload_date": + value = hyphenate_date(value) + + write_xattr(filename, xattrname, value) + + return True, info + + except OSError: + self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)") + return False, info + diff --combined youtube_dl/__init__.py index edaf1f1cd,03f98f504..ba243d4d2 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@@ -38,13 -38,13 +38,14 @@@ __authors__ = 'Takuya Tsuchida', 'Sergey M.', 'Michael Orlitzky', + 'Chris Gahan', ) __license__ = 'Public Domain' import codecs import getpass +import locale import optparse import os import random @@@ -79,6 -79,7 +80,7 @@@ from .PostProcessor import FFmpegVideoConvertor, FFmpegExtractAudioPP, FFmpegEmbedSubtitlePP, + XAttrMetadataPP, ) @@@ -186,16 -187,16 +188,16 @@@ def parseOpts(overrideArguments=None) general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') general.add_option( '--socket-timeout', dest='socket_timeout', - type=float, default=None, help=optparse.SUPPRESS_HELP) + type=float, default=None, help=u'Time to wait before giving up, in seconds') general.add_option( '--bidi-workaround', dest='bidi_workaround', action='store_true', - help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH') + help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') selection.add_option( @@@ -334,9 -335,7 +336,9 @@@ verbosity.add_option('--youtube-print-sig-code', action='store_true', dest='youtube_print_sig_code', default=False, help=optparse.SUPPRESS_HELP) - + verbosity.add_option('--print-traffic', + dest='debug_printtraffic', action='store_true', default=False, + help=optparse.SUPPRESS_HELP) filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name (default)', default=False) @@@ -415,7 -414,9 +417,9 @@@ postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, help='embed subtitles in the video (only for mp4 videos)') postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, - help='add metadata to the files') + help='write metadata to the video file') + postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, + help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') parser.add_option_group(general) @@@ -476,8 -477,6 +480,8 @@@ write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') + write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' % + (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding())) return parser, opts, args @@@ -522,8 -521,6 +526,8 @@@ def _real_main(argv=None) sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args all_urls = [url.strip() for url in all_urls] + _enc = preferredencoding() + all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] extractors = gen_extractors() @@@ -700,7 -697,6 +704,7 @@@ 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, + 'debug_printtraffic': opts.debug_printtraffic, } with YoutubeDL(ydl_opts) as ydl: @@@ -717,6 -713,8 +721,8 @@@ ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) if opts.embedsubtitles: ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) + if opts.xattrs: + ydl.add_post_processor(XAttrMetadataPP()) # Update version if opts.update_self: diff --combined youtube_dl/utils.py index 0b0d1eb90,20ebea38c..a509f8e2f --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@@ -500,13 -500,12 +500,13 @@@ def unescapeHTML(s) result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s) return result -def encodeFilename(s): + +def encodeFilename(s, for_subprocess=False): """ @param s The name of the file """ - assert type(s) == type(u'') + assert type(s) == compat_str # Python 3 has a Unicode API if sys.version_info >= (3, 0): @@@ -516,18 -515,12 +516,18 @@@ # Pass u'' directly to use Unicode APIs on Windows 2000 and up # (Detecting Windows NT 4 is tricky because 'major >= 4' would # match Windows 9x series as well. Besides, NT 4 is obsolete.) - return s + if not for_subprocess: + return s + else: + # For subprocess calls, encode with locale encoding + # Refer to http://stackoverflow.com/a/9951851/35070 + encoding = preferredencoding() else: encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' - return s.encode(encoding, 'ignore') + if encoding is None: + encoding = 'utf-8' + return s.encode(encoding, 'ignore') + def decodeOption(optval): if optval is None: @@@ -546,8 -539,7 +546,8 @@@ def formatSeconds(secs) else: return '%d' % secs -def make_HTTPS_handler(opts_no_check_certificate): + +def make_HTTPS_handler(opts_no_check_certificate, **kwargs): if sys.version_info < (3, 2): import httplib @@@ -568,7 -560,7 +568,7 @@@ class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) - return HTTPSHandlerV3() + return HTTPSHandlerV3(**kwargs) else: context = ssl.SSLContext(ssl.PROTOCOL_SSLv3) context.verify_mode = (ssl.CERT_NONE @@@ -579,7 -571,7 +579,7 @@@ context.load_default_certs() except AttributeError: pass # Python < 3.4 - return compat_urllib_request.HTTPSHandler(context=context) + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) class ExtractorError(Exception): """Error during info extraction.""" @@@ -764,7 -756,6 +764,7 @@@ def unified_strdate(date_str) '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', + '%Y-%m-%d %H:%M:%S', '%d.%m.%Y %H:%M', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S.%fZ', @@@ -818,6 -809,15 +818,15 @@@ def date_from_str(date_str) return today + delta return datetime.datetime.strptime(date_str, "%Y%m%d").date() + def hyphenate_date(date_str): + """ + Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" + match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str) + if match is not None: + return '-'.join(match.groups()) + else: + return date_str + class DateRange(object): """Represents a time interval between two dates""" def __init__(self, start=None, end=None): @@@ -858,22 -858,12 +867,22 @@@ def platform_name() def write_string(s, out=None): if out is None: out = sys.stderr - assert type(s) == type(u'') + assert type(s) == compat_str if ('b' in getattr(out, 'mode', '') or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr s = s.encode(preferredencoding(), 'ignore') - out.write(s) + try: + out.write(s) + except UnicodeEncodeError: + # In Windows shells, this can fail even when the codec is just charmap!? + # See https://wiki.python.org/moin/PrintFails#Issue + if sys.platform == 'win32' and hasattr(out, 'encoding'): + s = s.encode(out.encoding, 'ignore').decode(out.encoding) + out.write(s) + else: + raise + out.flush() @@@ -1027,9 -1017,9 +1036,9 @@@ def smuggle_url(url, data) return url + u'#' + sdata -def unsmuggle_url(smug_url): +def unsmuggle_url(smug_url, default=None): if not '#__youtubedl_smuggle' in smug_url: - return smug_url, None + return smug_url, default url, _, sdata = smug_url.rpartition(u'#') jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] data = json.loads(jsond) @@@ -1089,7 -1079,7 +1098,7 @@@ def fix_xml_all_ampersand(xml_str) def setproctitle(title): - assert isinstance(title, type(u'')) + assert isinstance(title, compat_str) try: libc = ctypes.cdll.LoadLibrary("libc.so.6") except OSError: @@@ -1117,28 -1107,3 +1126,28 @@@ def url_basename(url) class HEADRequest(compat_urllib_request.Request): def get_method(self): return "HEAD" + + +def int_or_none(v): + return v if v is None else int(v) + + +def parse_duration(s): + if s is None: + return None + + m = re.match( + r'(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)$', s) + if not m: + return None + res = int(m.group('secs')) + if m.group('mins'): + res += int(m.group('mins')) * 60 + if m.group('hours'): + res += int(m.group('hours')) * 60 * 60 + return res + + +def prepend_extension(filename, ext): + name, real_ext = os.path.splitext(filename) + return u'{0}.{1}{2}'.format(name, ext, real_ext)