X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube-dl;h=6cb58f1a8c886c38b71135c81ea17881218ca27f;hb=1eff9ac0c58196b9da4909b63e590304a1d55801;hp=e304df5572d02c6841542182761cb75121512367;hpb=10e7194db16c769cd8da98a8d541f7f5452afe84;p=youtube-dl.git diff --git a/youtube-dl b/youtube-dl index e304df557..6cb58f1a8 100755 --- a/youtube-dl +++ b/youtube-dl @@ -15,7 +15,7 @@ __author__ = ( ) __license__ = 'Public Domain' -__version__ = '2011.09.16' +__version__ = '2011.09.27' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' @@ -766,7 +766,8 @@ class FileDownloader(object): try: infof = open(infofn, 'wb') try: - json.dump(info_dict, infof) + json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',)) + json.dump(json_info_dict, infof) finally: infof.close() except (OSError, IOError): @@ -775,8 +776,7 @@ class FileDownloader(object): if not self.params.get('skip_download', False): try: - success,add_data = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) - info_dict.update(add_data) + success = self._do_download(filename, info_dict) except (OSError, IOError), err: raise UnavailableVideoError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -865,7 +865,10 @@ class FileDownloader(object): self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False - def _do_download(self, filename, url, player_url): + def _do_download(self, filename, info_dict): + url = info_dict['url'] + player_url = info_dict.get('player_url', None) + # Check file already present if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) @@ -903,6 +906,8 @@ class FileDownloader(object): while count <= retries: # Establish connection try: + if count == 0 and 'urlhandle' in info_dict: + data = info_dict['urlhandle'] data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: @@ -980,10 +985,13 @@ class FileDownloader(object): block_size = self.best_block_size(after - before, len(data_block)) # Progress message - percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) - self.report_progress(percent_str, data_len_str, speed_str, eta_str) + if data_len is None: + self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') + else: + percent_str = self.calc_percent(byte_counter, data_len) + eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + self.report_progress(percent_str, data_len_str, speed_str, eta_str) # Apply rate limit self.slow_down(start, byte_counter - resume_len) @@ -998,11 +1006,10 @@ class FileDownloader(object): self.try_rename(tmpfilename, filename) # Update file modification time - filetime = None if self.params.get('updatetime', True): - filetime = self.try_utime(filename, data.info().get('last-modified', None)) + info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) - return True, {'filetime': filetime} + return True class InfoExtractor(object): @@ -1078,13 +1085,13 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -1093,6 +1100,7 @@ class YoutubeIE(InfoExtractor): '37': 'mp4', '38': 'video', # You actually don't know if this will be MOV, AVI or whatever '43': 'webm', + '44': 'webm', '45': 'webm', } IE_NAME = u'youtube' @@ -2427,7 +2435,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -2501,7 +2509,7 @@ class YoutubePlaylistIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" - _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' @@ -2589,7 +2597,7 @@ class YoutubeUserIE(InfoExtractor): class DepositFilesIE(InfoExtractor): """Information extractor for depositfiles.com""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' + _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' IE_NAME = u'DepositFiles' def __init__(self, downloader=None): @@ -2666,7 +2674,7 @@ class DepositFilesIE(InfoExtractor): class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P\d+)(?:.*)' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/video/video\.php\?(?:.*?)v=(?P\d+)(?:.*)' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _NETRC_MACHINE = 'facebook' _available_formats = ['highqual', 'lowqual'] @@ -2890,7 +2898,11 @@ class BlipTVIE(InfoExtractor): def report_extraction(self, file_id): """Report information extraction.""" - self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id) + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id)) + + def report_direct_download(self, title): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title)) def _simplify_title(self, title): res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) @@ -2910,43 +2922,64 @@ class BlipTVIE(InfoExtractor): json_url = url + cchar + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) self.report_extraction(mobj.group(1)) + info = None try: - json_code = urllib2.urlopen(request).read() + urlh = urllib2.urlopen(request) + if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download + basename = url.split('/')[-1] + title,ext = os.path.splitext(basename) + ext = ext.replace('.', '') + self.report_direct_download(title) + info = { + 'id': title, + 'url': url, + 'title': title, + 'stitle': self._simplify_title(title), + 'ext': ext, + 'urlhandle': urlh + } except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return - try: - json_data = json.loads(json_code) - if 'Post' in json_data: - data = json_data['Post'] - else: - data = json_data - - upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') - video_url = data['media']['url'] - umobj = re.match(self._URL_EXT, video_url) - if umobj is None: - raise ValueError('Can not determine filename extension') - ext = umobj.group(1) + if info is None: # Regular URL + try: + json_code = urlh.read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err)) + return - self._downloader.increment_downloads() + try: + json_data = json.loads(json_code) + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data + + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + video_url = data['media']['url'] + umobj = re.match(self._URL_EXT, video_url) + if umobj is None: + raise ValueError('Can not determine filename extension') + ext = umobj.group(1) + + info = { + 'id': data['item_id'], + 'url': video_url, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'stitle': self._simplify_title(data['title']), + 'ext': ext, + 'format': data['media']['mimeType'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'player_url': data['embedUrl'] + } + except (ValueError,KeyError), err: + self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) + return - info = { - 'id': data['item_id'], - 'url': video_url, - 'uploader': data['display_name'], - 'upload_date': upload_date, - 'title': data['title'], - 'stitle': self._simplify_title(data['title']), - 'ext': ext, - 'format': data['media']['mimeType'], - 'thumbnail': data['thumbnailUrl'], - 'description': data['description'], - 'player_url': data['embedUrl'] - } - except (ValueError,KeyError), err: - self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) - return + self._downloader.increment_downloads() try: self._downloader.process_info(info) @@ -3012,7 +3045,6 @@ class MyVideoIE(InfoExtractor): video_title = sanitize_title(video_title) try: - print(video_url) self._downloader.process_info({ 'id': video_id, 'url': video_url, @@ -3171,7 +3203,7 @@ class ComedyCentralIE(InfoExtractor): class EscapistIE(InfoExtractor): """Information extractor for The Escapist """ - _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?].*$' + _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?]?.*$' IE_NAME = u'escapist' def report_extraction(self, showName): @@ -3346,12 +3378,14 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec: - if filecodec == 'aac' or filecodec == 'mp3': + if filecodec in ['aac', 'mp3', 'vorbis']: # Lossless if possible acodec = 'copy' extension = filecodec if filecodec == 'aac': more_opts = ['-f', 'adts'] + if filecodec == 'vorbis': + extension = 'ogg' else: # MP3 otherwise. acodec = 'libmp3lame' @@ -3361,13 +3395,15 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts += ['-ab', self._preferredquality] else: # We convert the audio (lossy) - acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec] + acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self._preferredcodec] extension = self._preferredcodec more_opts = [] if self._preferredquality is not None: more_opts += ['-ab', self._preferredquality] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] + if self._preferredcodec == 'vorbis': + extension = 'ogg' (prefix, ext) = os.path.splitext(path) new_path = prefix + '.' + extension @@ -3408,6 +3444,11 @@ def updateSelf(downloader, filename): try: urlh = urllib.urlopen(UPDATE_URL) newcontent = urlh.read() + + vmatch = re.search("__version__ = '([^']+)'", newcontent) + if vmatch is not None and vmatch.group(1) == __version__: + downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')') + return finally: urlh.close() except (IOError, OSError), err: @@ -3422,7 +3463,7 @@ def updateSelf(downloader, filename): except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated youtube-dl. Restart to use the new version.') + downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.') def parseOpts(): # Deferred imports @@ -3562,15 +3603,18 @@ def parseOpts(): action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template') + dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent') filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--no-continue', + action='store_false', dest='continue_dl', + help='do not resume partially downloaded files (restart from beginning)') filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') filesystem.add_option('--no-part', action='store_true', dest='nopart', help='do not use .part files', default=False) filesystem.add_option('--no-mtime', @@ -3587,7 +3631,7 @@ def parseOpts(): postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, help='convert video files to audio-only files (requires ffmpeg and ffprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac" or "mp3"; best by default') + help='"best", "aac", "vorbis" or "mp3"; best by default') postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', help='ffmpeg audio bitrate specification, 128k by default') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, @@ -3614,12 +3658,12 @@ def gen_extractors(): google_ie = GoogleIE() yahoo_ie = YahooIE() return [ - youtube_ie, - MetacafeIE(youtube_ie), - DailymotionIE(), YoutubePlaylistIE(youtube_ie), YoutubeUserIE(youtube_ie), YoutubeSearchIE(youtube_ie), + youtube_ie, + MetacafeIE(youtube_ie), + DailymotionIE(), google_ie, GoogleSearchIE(google_ie), PhotobucketIE(), @@ -3721,7 +3765,7 @@ def main(): except (TypeError, ValueError), err: parser.error(u'invalid playlist end number specified') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3']: + if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis']: parser.error(u'invalid audio format specified') # File downloader