x-link (x-news.pl embeds) extractor

This commit is contained in:
Laura Liberda 2021-01-31 01:26:49 +01:00
parent a3816f69be
commit 87fad4b7eb
3 changed files with 69 additions and 0 deletions

View file

@ -1492,6 +1492,7 @@ from .ximalaya import (
XimalayaAlbumIE XimalayaAlbumIE
) )
from .xminus import XMinusIE from .xminus import XMinusIE
from .xnews import XLinkIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xstream import XstreamIE from .xstream import XstreamIE
from .xtube import XTubeUserIE, XTubeIE from .xtube import XTubeUserIE, XTubeIE

View file

@ -122,6 +122,7 @@ from .kinja import KinjaEmbedIE
from .onnetwork import OnNetworkLoaderIE from .onnetwork import OnNetworkLoaderIE
from .embetty import EmbettyIE from .embetty import EmbettyIE
from .rtlnl import RtlNlIE from .rtlnl import RtlNlIE
from .xnews import XLinkIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2587,6 +2588,7 @@ class GenericIE(InfoExtractor):
TeachableIE, # must be before Wistia TeachableIE, # must be before Wistia
WistiaIE, WistiaIE,
SVTIE, SVTIE,
XLinkIE,
): ):
try: try:
ie_key = embie.ie_key() ie_key = embie.ie_key()

View file

@ -0,0 +1,66 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
parse_duration,
smuggle_url,
unsmuggle_url,
)
class XLinkIE(InfoExtractor):
IE_NAME = 'x-link'
IE_DESC = 'x-news.pl embeds'
_VALID_URL = r'https?://get\.x-link\.pl/(?:[a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12}),(?P<id>[a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12}),embed\.html'
_TESTS = [{
'url': 'https://get.x-link.pl/6fc656ab-ee92-d813-6afd-59863a7ccbdd,7186de52-4c89-5d64-7508-fca6a4f2d3b9,embed.html#__youtubedl_smuggle=%7B%22referer%22%3A+%22https%3A%2F%2Fgazetawroclawska.pl%2Fsklepy-w-galeriach-handlowych-otwarte-od-poniedzialku-w-rezimie-sanitarnym-co-trzeba-wiedziec%2Far%2Fc3-15417477%22%7D',
'info_dict': {
'id': '7186de52-4c89-5d64-7508-fca6a4f2d3b9',
'ext': 'mp4',
'title': 'Luzowanie obostrzeń: Od 1 lutego otwarte galerie handlowe i muzea, nie będzie też godzin dla seniorów',
},
}]
@staticmethod
def _extract_urls(webpage, url=None):
return [smuggle_url(mobj.group('url'), {'referer': url}) for mobj
in re.finditer(r'<script\b[^>]+\bdata-url=(["\'])(?P<url>https?://get\.x-link\.pl/(?:[a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12},){2}embed\.html)[^"\']*?\1', webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
headers = {}
_, smugged = unsmuggle_url(url, default={})
referer = smugged.get("referer")
if referer is None:
self.report_warning("Referer not smuggled, will probably fail")
else:
headers["Referer"] = referer.encode('utf-8')
webpage = self._download_webpage(url, video_id, headers=headers)
data = self._search_regex(r'initConsent\(\[({.+?})],', webpage, 'video data')
data = js_to_json(data)
data = self._parse_json(data, video_id)
thumbnails = []
if data.get('thumbnail'):
thumbnails.append({
'url': 'https:' + data.get('thumbnail'),
})
if data.get('poster'):
thumbnails.append({
'url': 'https:' + data.get('poster'),
})
return {
'id': video_id,
'url': 'https:' + data['src'],
'title': data['title'],
'thumbnails': thumbnails,
'duration': parse_duration(data.get('videoDuration')),
}