Add tests to MySpass

author Philipp Hagemeister <phihag@phihag.de>

Mon, 18 Feb 2013 17:45:09 +0000 (18:45 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 18 Feb 2013 17:45:09 +0000 (18:45 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 18 Feb 2013 17:45:09 +0000 (18:45 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 18 Feb 2013 17:45:09 +0000 (18:45 +0100)
diff --cc test/tests.json

index a6782ed4c6cd742b192be083b8f98f9a80a7c368,5c46af2c8896af2a0bc58ca8fd0f827f7ee2e0e5..a3c31ae51e0646cef3315258f9a82e7d80995273
--- 1/test/tests.json
--- 2/test/tests.json
+++ b/test/tests.json
@@@ -286,14 -286,5 +286,23 @@@
         "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
       }
   
+ +  },
+ +  {
+ +    "name": "TED",
+ +    "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
+ +    "file": "102.mp4",
+ +    "md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
+ +    "info_dict": {
+ +        "title": "Dan Dennett: The illusion of consciousness"
+ +    }
++  },
++  {
++    "name": "MySpass",
++    "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/",
++    "file": "11741.mp4",
++    "md5": "0b49f4844a068f8b33f4b7c88405862b",
++    "info_dict": {
++        "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
++    }
     }
   ]
diff --cc youtube_dl/InfoExtractors.py

index 086aa5da3eeae202caf7fa743029b6863c969048,57d5e9d363ddb61df4c9d1722e54fa811bd871a2..fe9bd97d0070dfbbee5b0b6904d048ab1935a5b2
--- 1/youtube_dl/InfoExtractors.py
--- 2/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -3967,31 -3967,64 +3967,87 @@@ class KeekIE(InfoExtractor)
                   'uploader': uploader
           }
           return [info]
-         
+ 
- -class MyspassIE(InfoExtractor):
+ +class TEDIE(InfoExtractor):
+ +    _VALID_URL=r'http://www.ted.com/talks/(?P<videoName>\w+)'
+ +    def _real_extract(self, url):
+ +        m=re.match(self._VALID_URL, url)
+ +        videoName=m.group('videoName')
+ +        webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName)
+ +        #If the url includes the language we get the title translated
+ +        title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-\\\']*)</span></h1>'
+ +        title=re.search(title_RE, webpage).group('title')
+ +        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
+ +                        "id":(?P<videoID>[\d]+).*?
+ +                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
+ +        info_match=re.search(info_RE,webpage,re.VERBOSE)
+ +        video_id=info_match.group('videoID')
+ +        mediaSlug=info_match.group('mediaSlug')
+ +        video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug
+ +        info = {
+ +                'id':video_id,
+ +                'url':video_url,
+ +                'ext': 'mp4',
+ +                'title': title
+ +        }
+ +        return [info]
+ +
++class MySpassIE(InfoExtractor):
+     _VALID_URL = r'http://www.myspass.de/.*'
- -    IE_NAME = u'myspass'
+     
+     def _real_extract(self, url):
+         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
- -        
++
+         # video id is the last path element of the URL
+         # usually there is a trailing slash, so also try the second but last
+         url_path = compat_urllib_parse_urlparse(url).path
+         url_parent_path, video_id = os.path.split(url_path)
+         if not video_id:
+             _, video_id = os.path.split(url_parent_path)
+         
+         # get metadata
+         metadata_url = META_DATA_URL_TEMPLATE % video_id
+         metadata_text = self._download_webpage(metadata_url, video_id)
+         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
+         
+         # extract values from metadata
+         url_flv_el = metadata.find('url_flv')
+         if url_flv_el is None:
+             self._downloader.trouble(u'ERROR: unable to extract download url')
+             return
+         video_url = url_flv_el.text
+         extension = os.path.splitext(video_url)[1][1:]
+         title_el = metadata.find('title')
+         if title_el is None:
+             self._downloader.trouble(u'ERROR: unable to extract title')
+             return
+         title = title_el.text
+         format_id_el = metadata.find('format_id')
+         if format_id_el is None:
+             format = ext
+         else:
+             format = format_id_el.text
+         description_el = metadata.find('description')
+         if description_el is not None:
+             description = description_el.text
+         else:
+             description = None
+         imagePreview_el = metadata.find('imagePreview')
+         if imagePreview_el is not None:
+             thumbnail = imagePreview_el.text
+         else:
+             thumbnail = None
+         info = {
+             'id': video_id,
+             'url': video_url,
+             'title': title,
+             'ext': extension,
+             'format': format,
+             'thumbnail': thumbnail,
+             'description': description
+         }
+         return [info]
+ 
   def gen_extractors():
       """ Return a list of an instance of every supported extractor.
       The order does matter; the first extractor matched is the one handling the URL.
@@@ -4039,7 -4072,7 +4095,8 @@@
           RBMARadioIE(),
           EightTracksIE(),
           KeekIE(),
- -        MyspassIE(),
+ +        TEDIE(),
++        MySpassIE(),
           GenericIE()
       ]
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 18 Feb 2013 17:45:09 +0000 (18:45 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 18 Feb 2013 17:45:09 +0000 (18:45 +0100)
		1	2
test/tests.json	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/InfoExtractors.py	patch \|	diff1 \|	diff2 \|	blob \| history