]> gitweb @ CieloNegro.org - youtube-dl.git/commitdiff
[movingimage] Adapt to the new domain name and fix extraction
authorYen Chi Hsuan <yan12125@gmail.com>
Thu, 1 Sep 2016 08:58:16 +0000 (16:58 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Thu, 1 Sep 2016 08:58:16 +0000 (16:58 +0800)
Closes #10466

ChangeLog
youtube_dl/extractor/extractors.py
youtube_dl/extractor/movingimage.py [moved from youtube_dl/extractor/ssa.py with 65% similarity]

index 0f8076d96d0794d81cf71784f905647b45500280..877e8112e4462a9cc643fa86e71b408d847f023b 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Extractors
+* [movingimage] Fix for the new site name (#10466)
+
+
 version 2016.08.31
 
 Extractors
 version 2016.08.31
 
 Extractors
index 21efa96b2364d8a79646802843267852a802481b..8d0688f5398f3e438a36787279d33604c0d9ccf8 100644 (file)
@@ -486,6 +486,7 @@ from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
 from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
 from .msn import MSNIE
 from .mtv import (
     MTVIE,
 from .msn import MSNIE
 from .mtv import (
     MTVIE,
@@ -806,7 +807,6 @@ from .srgssr import (
     SRGSSRPlayIE,
 )
 from .srmediathek import SRMediathekIE
     SRGSSRPlayIE,
 )
 from .srmediathek import SRMediathekIE
-from .ssa import SSAIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamable import StreamableIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamable import StreamableIE
similarity index 65%
rename from youtube_dl/extractor/ssa.py
rename to youtube_dl/extractor/movingimage.py
index 54d1843f2200d0cef7fa2e7b192f673d316c5f18..bb789c32edb45e78e9806faaae169af09826135e 100644 (file)
@@ -7,22 +7,19 @@ from ..utils import (
 )
 
 
 )
 
 
-class SSAIE(InfoExtractor):
-    _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
+class MovingImageIE(InfoExtractor):
+    _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
     _TEST = {
     _TEST = {
-        'url': 'http://ssa.nls.uk/film/3561',
+        'url': 'http://movingimage.nls.uk/film/3561',
+        'md5': '4caa05c2b38453e6f862197571a7be2f',
         'info_dict': {
             'id': '3561',
         'info_dict': {
             'id': '3561',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'SHETLAND WOOL',
             'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
             'duration': 900,
             'thumbnail': 're:^https?://.*\.jpg$',
         },
             'title': 'SHETLAND WOOL',
             'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
             'duration': 900,
             'thumbnail': 're:^https?://.*\.jpg$',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        },
     }
 
     def _real_extract(self, url):
     }
 
     def _real_extract(self, url):
@@ -30,10 +27,9 @@ class SSAIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
 
         webpage = self._download_webpage(url, video_id)
 
-        streamer = self._search_regex(
-            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
-        play_path = self._search_regex(
-            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+        formats = self._extract_m3u8_formats(
+            self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
+            video_id, ext='mp4', entry_protocol='m3u8_native')
 
         def search_field(field_name, fatal=False):
             return self._search_regex(
 
         def search_field(field_name, fatal=False):
             return self._search_regex(
@@ -44,13 +40,11 @@ class SSAIE(InfoExtractor):
         description = unescapeHTML(search_field('Description'))
         duration = parse_duration(search_field('Running time'))
         thumbnail = self._search_regex(
         description = unescapeHTML(search_field('Description'))
         duration = parse_duration(search_field('Running time'))
         thumbnail = self._search_regex(
-            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+            r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
 
         return {
             'id': video_id,
-            'url': streamer,
-            'play_path': play_path,
-            'ext': 'flv',
+            'formats': formats,
             'title': title,
             'description': description,
             'duration': duration,
             'title': title,
             'description': description,
             'duration': duration,