X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdailymotion.py;h=1a41c0db181c43c9c35d24988b9fd118c248f399;hb=5b251628e9f45c89c1becb3f62c4212874eb74ea;hp=47d58330b6cfd5b1df32374e9debb11a89347554;hpb=4669393070e3ea620cc19a1490af2bd51ec0ee35;p=youtube-dl.git diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 47d58330b..1a41c0db1 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -52,6 +52,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'uploader': 'IGN', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', + 'upload_date': '20150306', + 'duration': 74, } }, # Vevo video @@ -85,7 +87,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'http://www.dailymotion.com/video/%s' % video_id + url = 'https://www.dailymotion.com/video/%s' % video_id # Retrieve video webpage to extract further information request = self._build_request(url) @@ -106,11 +108,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor): age_limit = self._rta_search(webpage) video_upload_date = None - mobj = re.search(r'
([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage) + mobj = re.search(r'', webpage) if mobj is not None: - video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) + video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) - embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id + embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id embed_request = self._build_request(embed_url) embed_page = self._download_webpage( embed_request, video_id, 'Downloading embed page') @@ -163,6 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'thumbnail': info['thumbnail_url'], 'age_limit': age_limit, 'view_count': view_count, + 'duration': info['duration'] } def _get_subtitles(self, video_id, webpage): @@ -224,7 +227,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P[^/]+)$' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', @@ -238,7 +241,8 @@ class DailymotionUserIE(DailymotionPlaylistIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user = mobj.group('user') - webpage = self._download_webpage(url, user) + webpage = self._download_webpage( + 'https://www.dailymotion.com/user/%s' % user, user) full_user = unescapeHTML(self._html_search_regex( r'' % re.escape(user), webpage, 'user')) @@ -249,3 +253,53 @@ class DailymotionUserIE(DailymotionPlaylistIE): 'title': full_user, 'entries': self._extract_entries(user), } + + +class DailymotionCloudIE(DailymotionBaseInfoExtractor): + _VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/' + _VALID_URL = r'%s[^/]+/(?P[^/?]+)' % _VALID_URL_PREFIX + _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX + + _TESTS = [{ + # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html + # Tested at FranceTvInfo_2 + 'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1', + 'only_matching': True, + }, { + # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html + 'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1', + 'only_matching': True, + }] + + @classmethod + def _extract_dmcloud_url(self, webpage): + mobj = re.search(r']+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage) + if mobj: + return mobj.group(1) + + mobj = re.search( + r']+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, + webpage) + if mobj: + return mobj.group(1) + + def _real_extract(self, url): + video_id = self._match_id(url) + + request = self._build_request(url) + webpage = self._download_webpage(request, video_id) + + title = self._html_search_regex(r'([^>]+)', webpage, 'title') + + video_info = self._parse_json(self._search_regex( + r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) + + # TODO: parse ios_url, which is in fact a manifest + video_url = video_info['mp4_url'] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': video_info.get('thumbnail_url'), + }