]> gitweb @ CieloNegro.org - youtube-dl.git/blobdiff - youtube_dl/extractor/youtube.py
[xhamster] Fix and improve
[youtube-dl.git] / youtube_dl / extractor / youtube.py
index e038c7752219e4dc9ee2fed84269595188b1b361..02c5ede745beeb97fb71fe57abab7b963db2266f 100644 (file)
@@ -34,6 +34,7 @@ from ..utils import (
     unified_strdate,
     orderedSet,
     write_json_file,
+    uppercase_escape,
 )
 
 class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -136,14 +137,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                          (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                             (?:www\.)?deturl\.com/www\.youtube\.com/|
-                            (?:www\.)?pwnyoutube\.com|
+                            (?:www\.)?pwnyoutube\.com/|
+                            (?:www\.)?yourepeat\.com/|
                             tube\.majestyc\.net/|
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                              |(?:                                             # or the v= param in all its forms
-                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  v=
@@ -1590,11 +1592,10 @@ class YoutubeChannelIE(InfoExtractor):
             # Download all channel pages using the json-based channel_ajax query
             for pagenum in itertools.count(1):
                 url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
-                                              u'Downloading page #%s' % pagenum)
-    
-                page = json.loads(page)
-    
+                page = self._download_json(
+                    url, channel_id, note=u'Downloading page #%s' % pagenum,
+                    transform_source=uppercase_escape)
+
                 ids_in_page = self.extract_videos_from_page(page['content_html'])
                 video_ids.extend(ids_in_page)
     
@@ -1694,7 +1695,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
             api_response = data['data']
 
             if 'items' not in api_response:
-                raise ExtractorError(u'[youtube] No video results')
+                raise ExtractorError(
+                    u'[youtube] No video results', expected=True)
 
             new_ids = list(video['id'] for video in api_response['items'])
             video_ids += new_ids
@@ -1814,7 +1816,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list
     _VALID_URL = r'''(?x)
-        (?:https?://)?[^/]+/watch\?feature=[a-z_]+$|
+        (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
     '''