[youtube] Fix categories and improve tags extraction

author Sergey M․ <dstftw@gmail.com>

Mon, 15 Jun 2020 20:13:39 +0000 (03:13 +0700)

committer Sergey M․ <dstftw@gmail.com>

Mon, 15 Jun 2020 20:13:39 +0000 (03:13 +0700)
author Sergey M․ <dstftw@gmail.com>
Mon, 15 Jun 2020 20:13:39 +0000 (03:13 +0700)
committer Sergey M․ <dstftw@gmail.com>
Mon, 15 Jun 2020 20:13:39 +0000 (03:13 +0700)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index ce2212a7caa13111e1d2835fa65be371a4937847..53dccdf0bf793568d78f4291029092d6b216b7b9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2356,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          m_cat_container = self._search_regex(
              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
              video_webpage, 'categories', default=None)
+        category = None
          if m_cat_container:
              category = self._html_search_regex(
                  r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
                  default=None)
-            video_categories = None if category is None else [category]
-        else:
-            video_categories = None
+        if not category:
+            category = try_get(
+                microformat, lambda x: x['category'], compat_str)
+        video_categories = None if category is None else [category]
  
          video_tags = [
              unescapeHTML(m.group('content'))
              for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+        if not video_tags:
+            video_tags = try_get(video_details, lambda x: x['keywords'], list)
  
          def _extract_count(count_name):
              return str_to_int(self._search_regex(
author	Sergey M․ <dstftw@gmail.com>
	Mon, 15 Jun 2020 20:13:39 +0000 (03:13 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Mon, 15 Jun 2020 20:13:39 +0000 (03:13 +0700)