[clip.rs] fix extraction
This commit is contained in:
parent
b55552ad1a
commit
4cfa7883a3
|
@ -2,14 +2,16 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .pulsembed import PulseVideoIE
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
from .pulsembed import PulseVideoIE, PulsEmbedIE
|
||||||
|
|
||||||
|
|
||||||
class ClipRsIE(InfoExtractor):
|
class ClipRsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
'url': 'https://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||||
'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1488842.1399140381',
|
'id': '1488842.1399140381',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -19,16 +21,42 @@ class ClipRsIE(InfoExtractor):
|
||||||
'timestamp': 1459850243,
|
'timestamp': 1459850243,
|
||||||
'upload_date': '20160405',
|
'upload_date': '20160405',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.clip.rs/u-novom-sadu-se-sinoc-desio-jedan-zimski-neum-svi-su-zaboravili-na-koronu-uhvatili-se-u-kolo-i-nastao-je-hit-video/15686',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2210721.1689293351',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'U Novom Sadu se sinoć desio jedan zimski Neum: Svi su zaboravili na koronu, uhvatili se u kolo i nastao je HIT VIDEO',
|
||||||
|
'description': 'md5:b1d7d6c0b029b922f06a2a08c9761852',
|
||||||
|
'timestamp': 1609405068,
|
||||||
|
'upload_date': '20201231',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
info_dict = {}
|
||||||
|
|
||||||
return {
|
mvp_id = PulseVideoIE._search_mvp_id(webpage, default=None)
|
||||||
|
if mvp_id:
|
||||||
|
info_dict.update({
|
||||||
|
'url': 'pulsevideo:%s' % PulseVideoIE._search_mvp_id(webpage),
|
||||||
|
'ie_key': PulseVideoIE.ie_key(),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
entries = PulsEmbedIE._extract_entries(webpage)
|
||||||
|
if not entries:
|
||||||
|
raise ExtractorError('Video ID not found on webpage')
|
||||||
|
if len(entries) > 1:
|
||||||
|
raise ExtractorError('More than 1 PulsEmbed')
|
||||||
|
info_dict.update(entries[0])
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'pulsevideo:%s' % PulseVideoIE._search_mvp_id(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'ie_key': PulseVideoIE.ie_key(),
|
'description': self._og_search_description(webpage),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
}
|
})
|
||||||
|
return info_dict
|
||||||
|
|
|
@ -43,6 +43,10 @@ class PulseVideoIE(InfoExtractor):
|
||||||
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage)
|
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage)
|
||||||
if mvp:
|
if mvp:
|
||||||
return mvp.group(1)
|
return mvp.group(1)
|
||||||
|
mvp = re.search(
|
||||||
|
r'\sid=(["\']?)mvp:(\d+\.\d+)\1', webpage)
|
||||||
|
if mvp:
|
||||||
|
return mvp.group(2)
|
||||||
if default != NO_DEFAULT:
|
if default != NO_DEFAULT:
|
||||||
return default
|
return default
|
||||||
raise ExtractorError('Could not extract mvp')
|
raise ExtractorError('Could not extract mvp')
|
||||||
|
@ -222,7 +226,16 @@ class PulsEmbedIE(InfoExtractor):
|
||||||
'url': smuggle_url('pulsembed:%s' % embed.group('id'), {'referer': url}),
|
'url': smuggle_url('pulsembed:%s' % embed.group('id'), {'referer': url}),
|
||||||
'ie_key': 'PulsEmbed',
|
'ie_key': 'PulsEmbed',
|
||||||
})
|
})
|
||||||
return entries
|
|
||||||
|
ids = []
|
||||||
|
|
||||||
|
def dedupe(entry):
|
||||||
|
if entry['url'] not in ids:
|
||||||
|
ids.append(entry['url'])
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
return list(filter(dedupe, entries))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
Loading…
Reference in a new issue