X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=2c51a0b47f134ddf0478210b97731893f76c998c;hb=70e322695db9d67ba3ab35bf660d92be5e7e55f1;hp=3c8f7f7a2a65720e72c9efeb0fbf017202902de5;hpb=ad3bc6acd5d6724875b9fa59f9b5cdb9b904ec91;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3c8f7f7a2..2c51a0b47 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1138,6 +1138,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # upload date upload_date = None mobj = re.search(r'(?s)id="eow-date.*?>(.*?)', video_webpage) + if mobj is None: + mobj = re.search( + r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)', + video_webpage) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = unified_strdate(upload_date) @@ -1145,7 +1149,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): m_cat_container = get_element_by_id("eow-category", video_webpage) if m_cat_container: category = self._html_search_regex( - r'(?s)(.*?)', m_cat_container, 'cateory', + r'(?s)(.*?)', m_cat_container, 'category', default=None) video_categories = None if category is None else [category] else: @@ -1410,11 +1414,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): title_span = (search_title('playlist-title') or search_title('title long-title') or search_title('title')) title = clean_html(title_span) - video_re = r'''(?x)data-video-username="(.*?)".*? + video_re = r'''(?x)data-video-username=".*?".*? href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) - matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) - # Some of the videos may have been deleted, their username field is empty - ids = [video_id for (username, video_id) in matches if username] + ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) @@ -1771,9 +1773,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): feed_entries.extend( self.url_result(video_id, 'Youtube', video_id=video_id) for video_id in ids) - if info['paging'] is None: + mobj = re.search( + r'data-uix-load-more-href="/?[^"]+paging=(?P\d+)', + feed_html) + if mobj is None: break - paging = info['paging'] + paging = mobj.group('paging') return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):