X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpornhub.py;h=20976c101d97f53a58b3599ded30f7bfb1d4bcd5;hb=d9cb92c84058bce2c222b7a634608d7a16addcb2;hp=6d57e1d35eb567b26f65dea8cc74c653f955c08f;hpb=6c3760292c9d20f891395111bea97f401270d86b;p=youtube-dl.git diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 6d57e1d35..20976c101 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -25,7 +25,15 @@ from ..aes import ( class PornHubIE(InfoExtractor): - _VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P[0-9a-z]+)' + IE_DESC = 'PornHub and Thumbzilla' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| + (?:www\.)?thumbzilla\.com/video/ + ) + (?P[0-9a-z]+) + ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': '1e19b41231a02eba417839222ac9d58e', @@ -63,8 +71,24 @@ class PornHubIE(InfoExtractor): 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, }, { + # removed at the request of cam4.com 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', 'only_matching': True, + }, { + # removed at the request of the copyright owner + 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', + 'only_matching': True, + }, { + # removed by uploader + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', + 'only_matching': True, + }, { + # private video + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', + 'only_matching': True, + }, { + 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', + 'only_matching': True, }] @classmethod @@ -87,8 +111,8 @@ class PornHubIE(InfoExtractor): webpage = self._download_webpage(req, video_id) error_msg = self._html_search_regex( - r'(?s)
(.*?)
', - webpage, 'error message', default=None) + r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', + webpage, 'error message', default=None, group='error') if error_msg: error_msg = re.sub(r'\s+', ' ', error_msg) raise ExtractorError(