ExtractorError,
unsmuggle_url,
+ unescapeHTML,
)
'description': 'md5:363109c02998fee92ec02211bd8000df',
'uploader': 'National Ballet of Canada',
},
- },
- {
- # https://github.com/rg3/youtube-dl/issues/2253
- 'url': 'http://v.thestar.com/services/player/bcpid2071349530001?bckey=AQ~~,AAAAuO4KaJE~,gatFNwSKdGDmDpIYqNJ-fTHn_c4z_LH_&bctid=3101154703001',
- 'file': '3101154703001.mp4',
- 'md5': '0ba9446db037002366bab3b3eb30c88c',
- 'info_dict': {
- 'title': 'Still no power',
- 'uploader': 'thestar.com',
- 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
- }
}
]
object_str = object_str.replace('<--', '<!--')
object_str = fix_xml_ampersands(object_str)
- object_doc = xml.etree.ElementTree.fromstring(object_str)
+ object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
if fv_el is not None:
@classmethod
def _extract_brightcove_url(cls, webpage):
- """Try to extract the brightcove url from the wepbage, returns None
+ """Try to extract the brightcove url from the webpage, returns None
if it can't be found
"""
+ urls = cls._extract_brightcove_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def _extract_brightcove_urls(cls, webpage):
+ """Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
- return url_m.group(1)
+ url = unescapeHTML(url_m.group(1))
+ # Some sites don't add it, we can't download with this url, for example:
+ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+ if 'playerKey' in url:
+ return [url]
- m_brightcove = re.search(
+ matches = re.findall(
r'''(?sx)<object
(?:
- [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
+ [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
- if m_brightcove is not None:
- return cls._build_brighcove_url(m_brightcove.group())
- else:
- return None
+ return [cls._build_brighcove_url(m) for m in matches]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})