[medialaan] add support DPG Media MyChannels based websites

closes #14871
closes #15597
closes #16106
closes #16489
This commit is contained in:
Remita Amine 2021-02-26 16:20:26 +01:00 committed by Laura Liberda
parent 8d47c811f1
commit 6a18fcbd8a
4 changed files with 156 additions and 229 deletions

View file

@ -1517,6 +1517,7 @@ from .vrv import (
VRVSeriesIE, VRVSeriesIE,
) )
from .vshare import VShareIE from .vshare import VShareIE
from .vtm import VTMIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE

View file

@ -134,6 +134,7 @@ from .xnews import XLinkIE
from .libsyn import LibsynIE from .libsyn import LibsynIE
from .pulsembed import PulsEmbedIE from .pulsembed import PulsEmbedIE
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2276,6 +2277,20 @@ class GenericIE(InfoExtractor):
'duration': 1581, 'duration': 1581,
}, },
}, },
{
# MyChannels SDK embed
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
'md5': '90c0699c37006ef18e198c032d81739c',
'info_dict': {
'id': '194165',
'ext': 'mp4',
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
'timestamp': 1611740340,
'upload_date': '20210127',
'duration': 159,
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2515,6 +2530,9 @@ class GenericIE(InfoExtractor):
webpage = self._webpage_read_content( webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes) full_response, url, video_id, prefix=first_bytes)
if '<title>DPG Media Privacy Gate</title>' in webpage:
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id) self.report_extraction(video_id)
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest? # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@ -2692,6 +2710,7 @@ class GenericIE(InfoExtractor):
LibsynIE, LibsynIE,
VHXEmbedIE, VHXEmbedIE,
ArcPublishingIE, ArcPublishingIE,
MedialaanIE,
): ):
try: try:
ie_key = embie.ie_key() ie_key = embie.ie_key()

View file

@ -2,268 +2,113 @@ from __future__ import unicode_literals
import re import re
from .gigya import GigyaBaseIE from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
extract_attributes,
int_or_none, int_or_none,
parse_duration, mimetype2ext,
try_get, parse_iso8601,
unified_timestamp,
) )
class MedialaanIE(GigyaBaseIE): class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.|nieuws\.)?
(?: (?:
(?P<site_id>vtm|q2|vtmkzoom)\.be/ (?:embed\.)?mychannels.video/embed/|
(?: embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
video(?:/[^/]+/id/|/?\?.*?\baid=)| (?:www\.)?(?:
(?:[^/]+/)* (?:
) 7sur7|
demorgen|
hln|
joe|
qmusic
)\.be|
(?:
[abe]d|
bndestem|
destentor|
gelderlander|
pzc|
tubantia|
volkskrant
)\.nl
)/video/(?:[^/]+/)*[^/?&#]+~p
) )
(?P<id>[^/?#&]+) (?P<id>\d+)
''' '''
_NETRC_MACHINE = 'medialaan'
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
_SITE_TO_APP_ID = {
'vtm': 'vtm_watch',
'q2': 'q2',
'vtmkzoom': 'vtmkzoom',
}
_TESTS = [{ _TESTS = [{
# vod 'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
'info_dict': { 'info_dict': {
'id': 'vtm_20170219_VM0678361_vtmwatch', 'id': '193993',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Allemaal Chris afl. 6', 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', 'timestamp': 1611663540,
'timestamp': 1487533280, 'upload_date': '20210126',
'upload_date': '20170219', 'duration': 238,
'duration': 2562,
'series': 'Allemaal Chris',
'season': 'Allemaal Chris',
'season_number': 1,
'season_id': '256936078124527',
'episode': 'Allemaal Chris afl. 6',
'episode_number': 6,
'episode_id': '256936078591527',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Requires account credentials',
}, { }, {
# clip 'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
'url': 'http://vtm.be/video?aid=168332',
'info_dict': {
'id': '168332',
'ext': 'mp4',
'title': '"Veronique liegt!"',
'description': 'md5:1385e2b743923afe54ba4adc38476155',
'timestamp': 1489002029,
'upload_date': '20170308',
'duration': 96,
},
}, {
# vod
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
'only_matching': True, 'only_matching': True,
}, { }, {
# vod 'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
'url': 'http://vtm.be/video?aid=163157',
'only_matching': True, 'only_matching': True,
}, { }, {
# vod 'url': 'https://embed.mychannels.video/script/production/193993',
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
'only_matching': True, 'only_matching': True,
}, { }, {
# clip 'url': 'https://embed.mychannels.video/production/193993',
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
'only_matching': True, 'only_matching': True,
}, { }, {
# http/s redirect 'url': 'https://mychannels.video/embed/193993',
'url': 'https://vtmkzoom.be/video?aid=45724', 'only_matching': True,
'info_dict': {
'id': '257136373657000',
'ext': 'mp4',
'title': 'K3 Dansstudio Ushuaia afl.6',
},
'params': {
'skip_download': True,
},
'skip': 'Requires account credentials',
}, { }, {
# nieuws.vtm.be 'url': 'https://embed.mychannels.video/embed/193993',
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
'only_matching': True, 'only_matching': True,
}] }]
def _real_initialize(self): @staticmethod
self._logged_in = False def _extract_urls(webpage):
entries = []
def _login(self): for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
username, password = self._get_login_info() mychannels_id = extract_attributes(element).get('data-mychannels-id')
if username is None: if mychannels_id:
self.raise_login_required() entries.append('https://mychannels.video/embed/' + mychannels_id)
return entries
auth_data = {
'APIKey': self._APIKEY,
'sdk': 'js_6.1',
'format': 'json',
'loginID': username,
'password': password,
}
auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']
self._signature_timestamp = auth_info['signatureTimestamp']
self._logged_in = True
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) production_id = self._match_id(url)
video_id, site_id = mobj.group('id', 'site_id') production = self._download_json(
'https://embed.mychannels.video/sdk/production/' + production_id,
production_id, query={'options': 'UUUU_default'})['productions'][0]
title = production['title']
webpage = self._download_webpage(url, video_id) formats = []
for source in (production.get('sources') or []):
config = self._parse_json( src = source.get('src')
self._search_regex( if not src:
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);', continue
webpage, 'config', default='{}'), video_id, ext = mimetype2ext(source.get('type'))
transform_source=lambda s: s.replace( if ext == 'm3u8':
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'")) formats.extend(self._extract_m3u8_formats(
src, production_id, 'mp4', 'm3u8_native',
vod_id = config.get('vodId') or self._search_regex( m3u8_id='hls', fatal=False))
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)
# clip, no authentication required
if not vod_id:
player = self._parse_json(
self._search_regex(
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
default=''),
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
if player:
video = player[-1]
if video['videoUrl'] in ('http', 'https'):
return self.url_result(video['url'], MedialaanIE.ie_key())
info = {
'id': video_id,
'url': video['videoUrl'],
'title': video['title'],
'thumbnail': video.get('imageUrl'),
'timestamp': int_or_none(video.get('createdDate')),
'duration': int_or_none(video.get('duration')),
}
else: else:
info = self._parse_html5_media_entries( formats.append({
url, webpage, video_id, m3u8_id='hls')[0] 'ext': ext,
info.update({ 'url': src,
'id': video_id,
'title': self._html_search_meta('description', webpage),
'duration': parse_duration(self._html_search_meta('duration', webpage)),
}) })
# vod, authentication required self._sort_formats(formats)
else:
if not self._logged_in:
self._login()
settings = self._parse_json( return {
self._search_regex( 'id': production_id,
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', 'title': title,
webpage, 'drupal settings', default='{}'), 'formats': formats,
video_id) 'thumbnail': production.get('posterUrl'),
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
def get(container, item): 'duration': int_or_none(production.get('duration')) or None,
return try_get( }
settings, lambda x: x[container][item],
compat_str) or self._search_regex(
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
default=None)
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
data = self._download_json(
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
video_id, query={
'app_id': app_id,
'user_network': sso,
'UID': self._uid,
'UIDSignature': self._uid_signature,
'signatureTimestamp': self._signature_timestamp,
})
formats = self._extract_m3u8_formats(
data['response']['uri'], video_id, entry_protocol='m3u8_native',
ext='mp4', m3u8_id='hls')
self._sort_formats(formats)
info = {
'id': vod_id,
'formats': formats,
}
api_key = get('vod', 'apiKey')
channel = get('medialaanGigya', 'channel')
if api_key:
videos = self._download_json(
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
query={
'channels': channel,
'ids': vod_id,
'limit': 1,
'apikey': api_key,
})
if videos:
video = try_get(
videos, lambda x: x['response']['videos'][0], dict)
if video:
def get(container, item, expected_type=None):
return try_get(
video, lambda x: x[container][item], expected_type)
def get_string(container, item):
return get(container, item, compat_str)
info.update({
'series': get_string('program', 'title'),
'season': get_string('season', 'title'),
'season_number': int_or_none(get('season', 'number')),
'season_id': get_string('season', 'id'),
'episode': get_string('episode', 'title'),
'episode_number': int_or_none(get('episode', 'number')),
'episode_id': get_string('episode', 'id'),
'duration': int_or_none(
video.get('duration')) or int_or_none(
video.get('durationMillis'), scale=1000),
'title': get_string('episode', 'title'),
'description': get_string('episode', 'text'),
'timestamp': unified_timestamp(get_string(
'publication', 'begin')),
})
if not info.get('title'):
info['title'] = try_get(
config, lambda x: x['videoConfig']['title'],
compat_str) or self._html_search_regex(
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
default=None) or self._og_search_title(webpage)
if not info.get('description'):
info['description'] = self._html_search_regex(
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
webpage, 'description', default=None)
return info

View file

@ -0,0 +1,62 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
try_get,
)
class VTMIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
_TEST = {
'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
'md5': '37dca85fbc3a33f2de28ceb834b071f8',
'info_dict': {
'id': '192445',
'ext': 'mp4',
'title': 'Gast vernielt Genkse hotelkamer',
'timestamp': 1611060180,
'upload_date': '20210119',
'duration': 74,
# TODO: fix url _type result processing
# 'series': 'Op Interventie',
}
}
def _real_extract(self, url):
uuid = self._match_id(url)
video = self._download_json(
'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
uuid, query={
'query': '''{
getComponent(type: Video, uuid: "%s") {
... on Video {
description
duration
myChannelsVideo
program {
title
}
publishedAt
title
}
}
}''' % uuid,
}, headers={
'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
})['data']['getComponent']
return {
'_type': 'url',
'id': uuid,
'title': video.get('title'),
'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
'description': video.get('description'),
'timestamp': parse_iso8601(video.get('publishedAt')),
'duration': int_or_none(video.get('duration')),
'series': try_get(video, lambda x: x['program']['title']),
'ie_key': 'Medialaan',
}