X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=764070635160d96487baa53b4370b28e8a957316;hb=2de957c7e10763dbed5729254e045c06c30562d1;hp=da016f7ee86635c114bb4328742971c8082e5491;hpb=18b4e04f1c663e0ea695f6501b860f85af9d7ca1;p=youtube-dl.git diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee..764070635 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -8,11 +8,13 @@ from ..utils import ( compat_urllib_error, compat_urllib_parse, compat_urllib_request, + compat_urlparse, ExtractorError, ) from .brightcove import BrightcoveIE + class GenericIE(InfoExtractor): IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' @@ -23,21 +25,10 @@ class GenericIE(InfoExtractor): u'file': u'13601338388002.mp4', u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', u'info_dict': { - u"uploader": u"www.hodiho.fr", + u"uploader": u"www.hodiho.fr", u"title": u"R\u00e9gis plante sa Jeep" } }, - { - u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/', - u'file': u'2371591881001.mp4', - u'md5': u'9e80619e0a94663f0bdc849b4566af19', - u'note': u'Test Brightcove downloads and detection in GenericIE', - u'info_dict': { - u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', - u'uploader': u'8TV', - u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', - } - }, ] def report_download_webpage(self, video_id): @@ -107,6 +98,11 @@ class GenericIE(InfoExtractor): return new_url def _real_extract(self, url): + parsed_url = compat_urlparse.urlparse(url) + if not parsed_url.scheme: + self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') + return self.url_result('http://' + url) + try: new_url = self._test_redirect(url) if new_url: @@ -124,7 +120,7 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) self.report_extraction(video_id) - # Look for BrigthCove: + # Look for BrightCove: m_brightcove = re.search(r'', webpage, re.DOTALL) if m_brightcove is not None: self.to_screen(u'Brightcove video detected.') @@ -151,7 +147,7 @@ class GenericIE(InfoExtractor): mobj = re.search(r'.*?.*?