X-Git-Url: http://git.cielonegro.org/gitweb.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=2c51a0b47f134ddf0478210b97731893f76c998c;hb=70e322695db9d67ba3ab35bf660d92be5e7e55f1;hp=1f3aa4322c274e226b6e5945874e8627724a79cf;hpb=e399853d0c5784257ffcb6fba147d0b47d3f9bb6;p=youtube-dl.git diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1f3aa4322..2c51a0b47 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"uploader": u"Philipp Hagemeister", u"uploader_id": u"phihag", u"upload_date": u"20121002", - u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." + u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", + u"categories": [u'Science & Technology'], } }, { @@ -1136,11 +1137,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # upload date upload_date = None - mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) + mobj = re.search(r'(?s)id="eow-date.*?>(.*?)', video_webpage) + if mobj is None: + mobj = re.search( + r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)', + video_webpage) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = unified_strdate(upload_date) + m_cat_container = get_element_by_id("eow-category", video_webpage) + if m_cat_container: + category = self._html_search_regex( + r'(?s)(.*?)', m_cat_container, 'category', + default=None) + video_categories = None if category is None else [category] + else: + video_categories = None + # description video_description = get_element_by_id("eow-description", video_webpage) if video_description: @@ -1347,6 +1361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'title': video_title, 'thumbnail': video_thumbnail, 'description': video_description, + 'categories': video_categories, 'subtitles': video_subtitles, 'duration': video_duration, 'age_limit': 18 if age_gate else 0, @@ -1399,11 +1414,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): title_span = (search_title('playlist-title') or search_title('title long-title') or search_title('title')) title = clean_html(title_span) - video_re = r'''(?x)data-video-username="(.*?)".*? + video_re = r'''(?x)data-video-username=".*?".*? href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) - matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) - # Some of the videos may have been deleted, their username field is empty - ids = [video_id for (username, video_id) in matches if username] + ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) @@ -1760,9 +1773,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): feed_entries.extend( self.url_result(video_id, 'Youtube', video_id=video_id) for video_id in ids) - if info['paging'] is None: + mobj = re.search( + r'data-uix-load-more-href="/?[^"]+paging=(?P\d+)', + feed_html) + if mobj is None: break - paging = info['paging'] + paging = mobj.group('paging') return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):