[tvp] support for tvp.info vue pages

This commit is contained in:
Lauren Liberda 2021-04-09 17:07:59 +02:00 committed by Dominika
parent e293203eba
commit 8a84a62b70

View file

@ -52,7 +52,7 @@ class TVPIE(InfoExtractor):
'age_limit': 12,
},
}, {
# TVPlayer 2 in client-side rendered website (regional)
# TVPlayer 2 in client-side rendered website (regional; window.__newsData)
'url': 'https://warszawa.tvp.pl/25804446/studio-yayo',
'md5': '883c409691c6610bdc8f464fab45a9a9',
'info_dict': {
@ -62,6 +62,14 @@ class TVPIE(InfoExtractor):
'upload_date': '20160616',
'timestamp': 1466075700,
}
}, {
# TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData)
'url': 'https://www.tvp.info/52880236/09042021-0800',
'info_dict': {
'id': '52880236',
'ext': 'mp4',
'title': '09.04.2021, 08:00',
},
}, {
# client-side rendered (regional) program (playlist) page
'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia',
@ -146,13 +154,10 @@ class TVPIE(InfoExtractor):
'thumbnails': thumbnails,
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
if '//s.tvp.pl/files/portale-v4/regiony-tvp-pl' in webpage:
# vue client-side rendered sites (all regional pages)
def _handle_vuejs_page(self, url, webpage, page_id):
# vue client-side rendered sites (all regional pages + tvp.info)
video_data = self._search_regex([
r'window\.__newsData\s*=\s*({(?:.|\s)+?});',
r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?});',
], webpage, 'video data', default=None)
if video_data:
return self._extract_vue_video(
@ -191,8 +196,20 @@ class TVPIE(InfoExtractor):
'entries': entries,
}
raise ExtractorError('Could not extract video/website data')
else:
# classic server-site rendered sites
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
# regional pages are always vue.js
if '//s.tvp.pl/files/portale-v4/regiony-tvp-pl' in webpage:
return self._handle_vuejs_page(url, webpage, page_id)
# some tvp.info pages are vue.js, some are not
if 'window.__videoData' in webpage or 'window.__websiteData' in webpage:
return self._handle_vuejs_page(url, webpage, page_id)
# classic server-side rendered sites
video_id = self._search_regex([
r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)',
r'<iframe[^>]+src="[^"]*?object_id=(\d+)',