2 from __future__ import unicode_literals
7 from .common import InfoExtractor
14 class FranceCultureIE(InfoExtractor):
15 _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
17 'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
21 'title': 'Rendez-vous au pays des geeks',
23 'uploader': 'Colette Fellous',
24 'upload_date': '20140301',
26 'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
27 'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
31 def _real_extract(self, url):
32 mobj = re.match(self._VALID_URL, url)
33 video_id = mobj.group('id')
34 baseurl = mobj.group('baseurl')
36 webpage = self._download_webpage(url, video_id)
37 params_code = self._search_regex(
38 r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
39 webpage, 'parameter code')
40 params = compat_parse_qs(params_code)
41 video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
43 title = self._html_search_regex(
44 r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
45 uploader = self._html_search_regex(
46 r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
47 webpage, 'uploader', fatal=False)
48 thumbnail_part = self._html_search_regex(
49 r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
50 'thumbnail', fatal=False)
51 if thumbnail_part is None:
54 thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
55 description = self._html_search_regex(
56 r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
58 info = json.loads(params['infoData'][0])[0]
59 duration = info.get('media_length')
60 upload_date_candidate = info.get('media_section5')
63 if (upload_date_candidate is not None and
64 re.match(r'[0-9]{8}$', upload_date_candidate))
70 'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
73 'upload_date': upload_date,
75 'thumbnail': thumbnail,
76 'description': description,