X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=7a14c98f9b6ef9d550606c72c330d0730ec1233e;hb=dabc127362ddfe88996e72b7e0d5cd2e4f239c98;hp=3c56daa02eccb9e08ed0d1b0840bf99cf9b8ef5c;hpb=c0d0b01f0e12ce23f7a751ef05e52dabd3e4c1e7;p=youtube-dl.git diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3c56daa02..7a14c98f9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,7 @@ from ..utils import ( compat_urlparse, ExtractorError, + HEADRequest, smuggle_url, unescapeHTML, unified_strdate, @@ -109,21 +110,18 @@ class GenericIE(InfoExtractor): def _send_head(self, url): """Check if it is a redirect, like url shorteners, in case return the new url.""" - class HeadRequest(compat_urllib_request.Request): - def get_method(self): - return "HEAD" class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): """ Subclass the HTTPRedirectHandler to make it use our - HeadRequest also on the redirected URL + HEADRequest also on the redirected URL """ def redirect_request(self, req, fp, code, msg, headers, newurl): if code in (301, 302, 303, 307): newurl = newurl.replace(' ', '%20') newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type")) - return HeadRequest(newurl, + return HEADRequest(newurl, headers=newheaders, origin_req_host=req.get_origin_req_host(), unverifiable=True) @@ -152,7 +150,7 @@ class GenericIE(InfoExtractor): compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: opener.add_handler(handler()) - response = opener.open(HeadRequest(url)) + response = opener.open(HEADRequest(url)) if response is None: raise ExtractorError(u'Invalid URL protocol') return response @@ -224,7 +222,7 @@ class GenericIE(InfoExtractor): self.to_screen(u'Brightcove video detected.') return self.url_result(bc_url, 'Brightcove') - # Look for embedded Vimeo player + # Look for embedded (iframe) Vimeo player mobj = re.search( r']+?src="(https?://player.vimeo.com/video/.+?)"', webpage) if mobj: @@ -232,9 +230,18 @@ class GenericIE(InfoExtractor): surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl, 'Vimeo') + # Look for embedded (swf embed) Vimeo player + mobj = re.search( + r']+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage) + if mobj: + return self.url_result(mobj.group(1), 'Vimeo') + # Look for embedded YouTube player - matches = re.findall( - r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage) + matches = re.findall(r'''(?x) + (?:]+?src=|embedSWF\(\s*) + (["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/ + (?:embed|v)/.+?) + \1''', webpage) if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') for tuppl in matches] @@ -293,6 +300,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return OoyalaIE._build_url_result(mobj.group(1)) + # Look for Aparat videos + mobj = re.search(r'