X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fsafari.py;h=10c7f39b0402b785369d8bfce6936ba1521b993f;hb=e9c8999ede2e07c64f2b592b12158d763dc933c8;hp=a602af6928d2a9d054fc8670342a6ddf7d9ef4da;hpb=c571dea9532e468ba294b933d16d9366baf825d5;p=youtube-dl.git diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index a602af692..10c7f39b0 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -4,16 +4,15 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .brightcove import BrightcoveIE +from .brightcove import BrightcoveLegacyIE -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) from ..utils import ( ExtractorError, + sanitized_Request, smuggle_url, std_headers, + urlencode_postdata, + update_url_query, ) @@ -22,28 +21,30 @@ class SafariBaseIE(InfoExtractor): _SUCCESSFUL_LOGIN_REGEX = r']*>Sign Out' _NETRC_MACHINE = 'safari' - _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' + _API_BASE = 'https://www.safaribooksonline.com/api/v1' _API_FORMAT = 'json' LOGGED_IN = False def _real_initialize(self): - # We only need to log in once for courses or individual videos - if not self.LOGGED_IN: - self._login() - SafariBaseIE.LOGGED_IN = True + self._login() def _login(self): + # We only need to log in once for courses or individual videos + if self.LOGGED_IN: + return + (username, password) = self._get_login_info() if username is None: - self.raise_login_required('safaribooksonline.com account is required') + return - headers = std_headers + headers = std_headers.copy() if 'Referer' not in headers: headers['Referer'] = self._LOGIN_URL + login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers) login_page = self._download_webpage( - self._LOGIN_URL, None, + login_page_request, None, 'Downloading login form') csrf = self._html_search_regex( @@ -58,8 +59,8 @@ class SafariBaseIE(InfoExtractor): 'next': '', } - request = compat_urllib_request.Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) + request = sanitized_Request( + self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) login_page = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -68,6 +69,8 @@ class SafariBaseIE(InfoExtractor): 'Login failed; make sure your credentials are correct and try again.', expected=True) + SafariBaseIE.LOGGED_IN = True + self.to_screen('Login successful') @@ -87,13 +90,15 @@ class SafariIE(SafariBaseIE): _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', - 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', + 'md5': 'dcc5a425e79f2564148652616af1f2a3', 'info_dict': { - 'id': '2842601850001', + 'id': '0_qbqx90ic', 'ext': 'mp4', - 'title': 'Introduction', + 'title': 'Introduction to Hadoop Fundamentals LiveLessons', + 'timestamp': 1437758058, + 'upload_date': '20150724', + 'uploader_id': 'stork', }, - 'skip': 'Requires safaribooksonline account credentials', }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'only_matching': True, @@ -108,15 +113,30 @@ class SafariIE(SafariBaseIE): course_id = mobj.group('course_id') part = mobj.group('part') - webpage = self._download_webpage( - '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), - part) + webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) + reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') + partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') + ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') + + query = { + 'wid': '_%s' % partner_id, + 'uiconf_id': ui_id, + 'flashvars[referenceId]': reference_id, + } - bc_url = BrightcoveIE._extract_brightcove_url(webpage) - if not bc_url: - raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) + if self.LOGGED_IN: + kaltura_session = self._download_json( + '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), + course_id, 'Downloading kaltura session JSON', + 'Unable to download kaltura session JSON', fatal=False) + if kaltura_session: + session = kaltura_session.get('session') + if session: + query['flashvars[ks]'] = session - return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') + return self.url_result(update_url_query( + 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), + 'Kaltura') class SafariCourseIE(SafariBaseIE): @@ -142,7 +162,7 @@ class SafariCourseIE(SafariBaseIE): course_id = self._match_id(url) course_json = self._download_json( - '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), + '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), course_id, 'Downloading course JSON') if 'chapters' not in course_json: