X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffacebook.py;h=cdb093262a00387386a0a3243554f46386276733;hb=998895dffac2170b7d49b0478561db05cc0730ca;hp=49231d3e9e74a82e96121424ed8043607dab8b54;hpb=5080cbf9fd26441256390f169d24ab3259c740f7;p=youtube-dl.git diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 49231d3e9..cdb093262 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -38,7 +38,8 @@ class FacebookIE(InfoExtractor): story\.php )\?(?:.*?)(?:v|video_id|story_fbid)=| [^/]+/videos/(?:[^/]+/)?| - [^/]+/posts/ + [^/]+/posts/| + groups/[^/]+/permalink/ )| facebook: ) @@ -123,8 +124,26 @@ class FacebookIE(InfoExtractor): }, { 'url': 'facebook:544765982287235', 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', + 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttps://www\.facebook\.com/video/embed.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + + # Facebook API embed + # see https://developers.facebook.com/docs/plugins/embedded-video-player + mobj = re.search(r'''(?x)]+ + class=(?P[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ + data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage) + if mobj is not None: + return mobj.group('url') + def _login(self): (useremail, password) = self._get_login_info() if useremail is None: @@ -200,12 +219,25 @@ class FacebookIE(InfoExtractor): BEFORE = '{swf.addParam(param[0], param[1]);});' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' - m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage) - if m: - swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"') + PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER) + + for m in re.findall(PATTERN, webpage): + swf_params = m.replace('\\\\', '\\').replace('\\"', '"') data = dict(json.loads(swf_params)) params_raw = compat_urllib_parse_unquote(data['params']) - video_data = json.loads(params_raw)['video_data'] + video_data_candidate = json.loads(params_raw)['video_data'] + for _, f in video_data_candidate.items(): + if not f: + continue + if isinstance(f, dict): + f = [f] + if not isinstance(f, list): + continue + if f[0].get('video_id') == video_id: + video_data = video_data_candidate + break + if video_data: + break def video_data_list2dict(video_data): ret = {} @@ -235,6 +267,8 @@ class FacebookIE(InfoExtractor): formats = [] for format_id, f in video_data.items(): + if f and isinstance(f, dict): + f = [f] if not f or not isinstance(f, list): continue for quality in ('sd', 'hd'): @@ -290,8 +324,8 @@ class FacebookIE(InfoExtractor): if '/posts/' in url: entries = [ - self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) - for video_id in self._parse_json( + self.url_result('facebook:%s' % vid, FacebookIE.ie_key()) + for vid in self._parse_json( self._search_regex( r'(["\'])video_ids\1\s*:\s*(?P\[.+?\])', webpage, 'video ids', group='ids'),