[24video] Fix uploader extraction

[youtube-dl.git] / youtube_dl / extractor / nrk.py
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index c2a8202dd181f3c747e5d26633cb40c7ad3fa764..cc70c295014f95fcb7e74f2f009889b5ca135663 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..compat import compat_str
  from ..utils import (
      ExtractorError,
      float_or_none,
@@ -77,17 +76,25 @@ class NRKIE(InfoExtractor):
  
  
  class NRKPlaylistIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:[^/]+/)*(?P<id>[^/]+)'
+    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
          'info_dict': {
              'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
              'title': 'Gjenopplev den historiske solformørkelsen',
              'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
          },
-        'playlist_mincount': 2,
-    }
+        'playlist_count': 2,
+    }, {
+        'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+        'info_dict': {
+            'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+            'title': 'Rivertonprisen til Karin Fossum',
+            'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+        },
+        'playlist_count': 5,
+    }]
  
      def _real_extract(self, url):
          playlist_id = self._match_id(url)
@@ -97,7 +104,8 @@ class NRKPlaylistIE(InfoExtractor):
          entries = [
              self.url_result('nrk:%s' % video_id, 'NRK')
              for video_id in re.findall(
-                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="(\d+)"', webpage)
+                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+                webpage)
          ]
  
          playlist_title = self._og_search_title(webpage)
@@ -191,20 +199,10 @@ class NRKTVIE(InfoExtractor):
          url = "%s%s" % (baseurl, subtitlesurl)
          self._debug_print('%s: Subtitle url: %s' % (video_id, url))
          captions = self._download_xml(
-            url, video_id, 'Downloading subtitles',
-            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
+            url, video_id, 'Downloading subtitles')
          lang = captions.get('lang', 'no')
-        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
-        srt = ''
-        for pos, p in enumerate(ps):
-            begin = parse_duration(p.get('begin'))
-            duration = parse_duration(p.get('dur'))
-            starttime = self._subtitles_timecode(begin)
-            endtime = self._subtitles_timecode(begin + duration)
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
          return {lang: [
              {'ext': 'ttml', 'url': url},
-            {'ext': 'srt', 'data': srt},
          ]}
  
      def _extract_f4m(self, manifest_url, video_id):