ExtractorError,
unsmuggle_url,
+ unescapeHTML,
)
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
- _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
_TESTS = [
{
'description': 'md5:363109c02998fee92ec02211bd8000df',
'uploader': 'National Ballet of Canada',
},
- },
+ }
]
@classmethod
object_str = object_str.replace('<--', '<!--')
object_str = fix_xml_ampersands(object_str)
- object_doc = xml.etree.ElementTree.fromstring(object_str)
+ object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
if fv_el is not None:
@classmethod
def _extract_brightcove_url(cls, webpage):
- """Try to extract the brightcove url from the wepbage, returns None
+ """Try to extract the brightcove url from the webpage, returns None
if it can't be found
"""
- m_brightcove = re.search(
+ urls = cls._extract_brightcove_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def _extract_brightcove_urls(cls, webpage):
+ """Return a list of all Brightcove URLs from the webpage """
+
+ url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+ if url_m:
+ url = unescapeHTML(url_m.group(1))
+ # Some sites don't add it, we can't download with this url, for example:
+ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+ if 'playerKey' in url:
+ return [url]
+
+ matches = re.findall(
r'''(?sx)<object
(?:
- [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
+ [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
- if m_brightcove is not None:
- return cls._build_brighcove_url(m_brightcove.group())
- else:
- return None
+ return [cls._build_brighcove_url(m) for m in matches]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
return self._extract_video_info(video_info)
def _get_playlist_info(self, player_key):
- playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
- player_key, 'Downloading playlist information')
+ info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
+ playlist_info = self._download_webpage(
+ info_url, player_key, 'Downloading playlist information')
json_data = json.loads(playlist_info)
if 'videoList' not in json_data:
def _extract_video_info(self, video_info):
info = {
'id': compat_str(video_info['id']),
- 'title': video_info['displayName'],
+ 'title': video_info['displayName'].strip(),
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
'uploader': video_info.get('publisherName'),