]> gitweb @ CieloNegro.org - youtube-dl.git/blob - youtube_dl/extractor/playfm.py
[cbs] extract smpte and vtt subtitles
[youtube-dl.git] / youtube_dl / extractor / playfm.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     parse_iso8601,
12 )
13
14
15 class PlayFMIE(InfoExtractor):
16     IE_NAME = 'play.fm'
17     _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
18
19     _TEST = {
20         'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
21         'md5': 'c505f8307825a245d0c7ad1850001f22',
22         'info_dict': {
23             'id': '71276',
24             'ext': 'mp3',
25             'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
26             'description': '',
27             'duration': 5627,
28             'timestamp': 1406033781,
29             'upload_date': '20140722',
30             'uploader': 'Dan Drastic',
31             'uploader_id': '71170',
32             'view_count': int,
33             'comment_count': int,
34         },
35     }
36
37     def _real_extract(self, url):
38         mobj = re.match(self._VALID_URL, url)
39         video_id = mobj.group('id')
40         slug = mobj.group('slug')
41
42         recordings = self._download_json(
43             'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
44
45         error = recordings.get('error')
46         if isinstance(error, dict):
47             raise ExtractorError(
48                 '%s returned error: %s' % (self.IE_NAME, error.get('message')),
49                 expected=True)
50
51         audio_url = recordings['audio']
52         video_id = compat_str(recordings.get('id') or video_id)
53         title = recordings['title']
54         description = recordings.get('description')
55         duration = int_or_none(recordings.get('recordingDuration'))
56         timestamp = parse_iso8601(recordings.get('created_at'))
57         uploader = recordings.get('page', {}).get('title')
58         uploader_id = compat_str(recordings.get('page', {}).get('id'))
59         view_count = int_or_none(recordings.get('playCount'))
60         comment_count = int_or_none(recordings.get('commentCount'))
61         categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
62
63         return {
64             'id': video_id,
65             'url': audio_url,
66             'title': title,
67             'description': description,
68             'duration': duration,
69             'timestamp': timestamp,
70             'uploader': uploader,
71             'uploader_id': uploader_id,
72             'view_count': view_count,
73             'comment_count': comment_count,
74             'categories': categories,
75         }