From: Jaime Marquínez Ferrándiz Date: Mon, 27 Jan 2014 11:21:00 +0000 (+0100) Subject: Merge remote-tracking branch 'sahutd/master' X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=96d7b8873ad47c1f52193e84fc6f8cfe12891aa7;hp=53bfd6b24c48ae052b73e9ab19a9c9906d57fa44;p=youtube-dl.git Merge remote-tracking branch 'sahutd/master' --- diff --git a/setup.py b/setup.py index 1f45159cd..03e7b358e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,9 @@ from __future__ import print_function +import os.path import pkg_resources +import warnings import sys try: @@ -44,12 +46,24 @@ py2exe_params = { if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': params = py2exe_params else: + files_spec = [ + ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('share/doc/youtube_dl', ['README.txt']), + ('share/man/man1', ['youtube-dl.1']) + ] + root = os.path.dirname(os.path.abspath(__file__)) + data_files = [] + for dirname, files in files_spec: + resfiles = [] + for fn in files: + if not os.path.exists(fn): + warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn) + else: + resfiles.append(fn) + data_files.append((dirname, resfiles)) + params = { - 'data_files': [ # Installing system-wide would require sudo... - ('etc/bash_completion.d', ['youtube-dl.bash-completion']), - ('share/doc/youtube_dl', ['README.txt']), - ('share/man/man1', ['youtube-dl.1']) - ] + 'data_files': data_files, } if setuptools_available: params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 294fccb44..08cf2f934 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -40,6 +40,7 @@ __authors__ = ( 'Michael Orlitzky', 'Chris Gahan', 'Saimadhav Heblikar', + 'Mike Col', ) __license__ = 'Public Domain' diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 0d9eb0001..aaa92bc75 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from .common import FileDownloader from .hls import HlsFD from .http import HttpFD @@ -12,10 +14,11 @@ from ..utils import ( def get_suitable_downloader(info_dict): """Get the downloader class that can handle the info dict.""" url = info_dict['url'] + protocol = info_dict.get('protocol') if url.startswith('rtmp'): return RtmpFD - if determine_ext(url) == u'm3u8': + if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): return HlsFD if url.startswith('mms') or url.startswith('rtsp'): return MplayerFD diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ba3d4ac0e..192baa9b8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -84,6 +84,7 @@ from .googlesearch import GoogleSearchIE from .hark import HarkIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE +from .huffpost import HuffPostIE from .hypem import HypemIE from .ign import IGNIE, OneUPIE from .imdb import ( @@ -107,6 +108,7 @@ from .keezmovies import KeezMoviesIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE +from .la7 import LA7IE from .liveleak import LiveLeakIE from .livestream import LivestreamIE, LivestreamOriginalIE from .lynda import ( @@ -114,6 +116,7 @@ from .lynda import ( LyndaCourseIE ) from .macgamestore import MacGameStoreIE +from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE @@ -220,7 +223,6 @@ from .vine import VineIE from .viki import VikiIE from .vk import VKIE from .wat import WatIE -from .websurg import WeBSurgIE from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3cf742a3b..f7478d459 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -71,7 +71,7 @@ class InfoExtractor(object): * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. - "http", "https", "rtsp", "rtmp" or so. + "http", "https", "rtsp", "rtmp", "m3u8" or so. * preference Order number of this format. If this field is present and not None, the formats get sorted by this field. @@ -466,6 +466,9 @@ class InfoExtractor(object): return RATING_TABLE.get(rating.lower(), None) def _sort_formats(self, formats): + if not formats: + raise ExtractorError(u'No video formats found') + def _formats_key(f): # TODO remove the following workaround from ..utils import determine_ext diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e1933837d..829e5894f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -332,10 +332,16 @@ class GenericIE(InfoExtractor): # Look for embedded Facebook player mobj = re.search( - r']+?src=(["\'])(?Phttps://www.facebook.com/video/embed.+?)\1', webpage) + r']+?src=(["\'])(?Phttps://www\.facebook\.com/video/embed.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'Facebook') + # Look for embedded Huffington Post player + mobj = re.search( + r']+?src=(["\'])(?Phttps?://embed\.live.huffingtonpost\.com/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'HuffPost') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py new file mode 100644 index 000000000..0d1ea6802 --- /dev/null +++ b/youtube_dl/extractor/huffpost.py @@ -0,0 +1,82 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, +) + + +class HuffPostIE(InfoExtractor): + IE_DESC = 'Huffington Post' + _VALID_URL = r'''(?x) + https?://(embed\.)?live\.huffingtonpost\.com/ + (?: + r/segment/[^/]+/| + HPLEmbedPlayer/\?segmentId= + ) + (?P[0-9a-f]+)''' + + _TEST = { + 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', + 'file': '52dd3e4b02a7602131000677.mp4', + 'md5': '55f5e8981c1c80a64706a44b74833de8', + 'info_dict': { + 'title': 'Legalese It! with @MikeSacksHP', + 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ', + 'duration': 1549, + 'upload_date': '20140124', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id + data = self._download_json(api_url, video_id)['data'] + + video_title = data['title'] + duration = parse_duration(data['running_time']) + upload_date = unified_strdate(data['schedule']['starts_at']) + description = data.get('description') + + thumbnails = [] + for url in data['images'].values(): + m = re.match('.*-([0-9]+x[0-9]+)\.', url) + if not m: + continue + thumbnails.append({ + 'url': url, + 'resolution': m.group(1), + }) + + formats = [{ + 'format': key, + 'format_id': key.replace('/', '.'), + 'ext': 'mp4', + 'url': url, + 'vcodec': 'none' if key.startswith('audio/') else None, + } for key, url in data['sources']['live'].items()] + if data.get('fivemin_id'): + fid = data['fivemin_id'] + fcat = str(int(fid) // 100 + 1) + furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4' + formats.append({ + 'format': 'fivemin', + 'url': furl, + 'preference': 1, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_title, + 'description': description, + 'formats': formats, + 'duration': duration, + 'upload_date': upload_date, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py new file mode 100644 index 000000000..6d61f9a90 --- /dev/null +++ b/youtube_dl/extractor/la7.py @@ -0,0 +1,62 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, +) + + +class LA7IE(InfoExtractor): + IE_NAME = 'la7.tv' + _VALID_URL = r'''(?x) + https?://(?:www\.)?la7\.tv/ + (?: + richplayer/\?assetid=| + \?contentId= + ) + (?P[0-9]+)''' + + _TEST = { + 'url': 'http://www.la7.tv/richplayer/?assetid=50355319', + 'file': '50355319.mp4', + 'md5': 'ec7d1f0224d20ba293ab56cf2259651f', + 'info_dict': { + 'title': 'IL DIVO', + 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', + 'duration': 6254, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id + doc = self._download_xml(xml_url, video_id) + + video_title = doc.find('title').text + description = doc.find('description').text + duration = parse_duration(doc.find('duration').text) + thumbnail = doc.find('img').text + view_count = int(doc.find('views').text) + + prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') + + formats = [{ + 'format': vnode.find('quality').text, + 'tbr': int(vnode.find('quality').text), + 'url': vnode.find('fms').text.strip().replace('mp4:', prefix), + } for vnode in doc.findall('.//videos/video')] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'view_count': view_count, + } diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py new file mode 100644 index 000000000..62e99091d --- /dev/null +++ b/youtube_dl/extractor/malemotion.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) + +class MalemotionIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P.+?)(#|$)' + _TEST = { + 'url': 'http://malemotion.com/video/bien-dur.10ew', + 'file': '10ew.mp4', + 'md5': 'b3cc49f953b107e4a363cdff07d100ce', + 'info_dict': { + "title": "Bien dur", + "age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group("id") + + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + + # Extract video URL + video_url = compat_urllib_parse.unquote( + self._search_regex(r'(.*?).*?)\.tumblr\.com/((post)|(video))/(?P\d*)/(.*?)' _TEST = { - u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', - u'file': u'54196191430.mp4', - u'md5': u'479bb068e5b16462f5176a6828829767', - u'info_dict': { - u"title": u"tatiana maslany news" + 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', + 'file': '54196191430.mp4', + 'md5': '479bb068e5b16462f5176a6828829767', + 'info_dict': { + "title": "tatiana maslany news" } } @@ -28,18 +30,20 @@ class TumblrIE(InfoExtractor): re_video = r'src=\\x22(?Phttp://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P.*?)\\x22' % (blog, video_id) video = re.search(re_video, webpage) if video is None: - raise ExtractorError(u'Unable to extract video') + raise ExtractorError('Unable to extract video') video_url = video.group('video_url') ext = video.group('ext') - video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P.*?)\\x22', - webpage, u'thumbnail', fatal=False) # We pick the first poster - if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '') + video_thumbnail = self._search_regex( + r'posters.*?\[\\x22(.*?)\\x22', + webpage, 'thumbnail', fatal=False) # We pick the first poster + if video_thumbnail: + video_thumbnail = video_thumbnail.replace('\\\\/', '/') # The only place where you can get a title, it's not complete, # but searching in other places doesn't work for all videos video_title = self._html_search_regex(r'(?P<title>.*?)(?: \| Tumblr)?', - webpage, u'title', flags=re.DOTALL) + webpage, 'title', flags=re.DOTALL) return [{'id': video_id, 'url': video_url, diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py deleted file mode 100644 index 43953bfdd..000000000 --- a/youtube_dl/extractor/websurg.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 - -import re - -from ..utils import ( - compat_urllib_request, - compat_urllib_parse -) - -from .common import InfoExtractor - -class WeBSurgIE(InfoExtractor): - IE_NAME = u'websurg.com' - _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)' - - _TEST = { - u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012', - u'file': u'vd01en4012.mp4', - u'params': { - u'skip_download': True, - }, - u'skip': u'Requires login information', - } - - _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' - - def _real_initialize(self): - - login_form = { - 'username': self._downloader.params['username'], - 'password': self._downloader.params['password'], - 'Submit': 1 - } - - request = compat_urllib_request.Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) - request.add_header( - 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') - compat_urllib_request.urlopen(request).info() - webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in') - - if webpage != 'OK': - self._downloader.report_error( - u'Unable to log in: bad username/password') - - def _real_extract(self, url): - video_id = re.match(self._VALID_URL, url).group(1) - - webpage = self._download_webpage(url, video_id) - - url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) - - return {'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'ext' : 'mp4', - 'url' : url_info.group(1) + '/' + url_info.group(2), - 'thumbnail': self._og_search_thumbnail(webpage) - } diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0701961a5..dd3c37007 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.01.23.4' +__version__ = '2014.01.27.1'