From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 22 Jan 2019 20:51:29 +0000 (+0700)
Subject: [pornhub] Bypass scrape detection (closes #5930)
X-Git-Url: https://git.cielonegro.org/gitweb.cgi?a=commitdiff_plain;h=278d061a0c5eae20963c0a6df4b9b13fd1537186;p=youtube-dl.git

[pornhub] Bypass scrape detection (closes #5930)
---

diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index e377de196..f5f3e6593 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -10,7 +10,9 @@ from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
     compat_str,
+    compat_urllib_request,
 )
+from .openload import PhantomJSwrapper
 from ..utils import (
     ExtractorError,
     int_or_none,
@@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _download_webpage_handle(self, *args, **kwargs):
+        def dl(*args, **kwargs):
+            return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs)
+
+        webpage, urlh = dl(*args, **kwargs)
+
+        if any(re.search(p, webpage) for p in (
+                r'<body\b[^>]+\bonload=["\']go\(\)',
+                r'document\.cookie\s*=\s*["\']RNKEY=',
+                r'document\.location\.reload\(true\)')):
+            url_or_request = args[0]
+            url = (url_or_request.get_full_url()
+                   if isinstance(url_or_request, compat_urllib_request.Request)
+                   else url_or_request)
+            phantom = PhantomJSwrapper(self, required_version='2.0')
+            phantom.get(url, html=webpage)
+            webpage, urlh = dl(*args, **kwargs)
+
+        return webpage, urlh
+
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(