aliexpress product video extractor

This commit is contained in:
selfisekai 2020-11-07 04:13:19 +01:00
parent 84ea897ae8
commit ca3abff9ea
2 changed files with 48 additions and 1 deletions

View file

@ -10,6 +10,7 @@ from ..utils import (
class AliExpressLiveIE(InfoExtractor): class AliExpressLiveIE(InfoExtractor):
IE_NAME = 'aliexpress:live'
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)' _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://live.aliexpress.com/live/2800002704436634', 'url': 'https://live.aliexpress.com/live/2800002704436634',
@ -51,3 +52,46 @@ class AliExpressLiveIE(InfoExtractor):
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats, 'formats': formats,
} }
class AliExpressProductIE(InfoExtractor):
IE_NAME = 'aliexpress:product'
_TESTS = [{
'url': 'https://pl.aliexpress.com/item/4000570726711.html',
'info_dict': {
'id': '249591332087',
'title': str, # depends on IP location
'ext': 'mp4',
},
}, {
'url': 'https://www.aliexpress.com/item/4000813110155.html',
'info_dict': {
'id': '274294663774',
'title': str, # depends on IP location
'ext': 'mp4',
},
}]
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?aliexpress\.(?:com|ru)/item/(?P<id>\d+)\.html'
def _real_extract(self, url):
pid = self._match_id(url)
webpage = self._download_webpage(url, pid, 'Downloading product page')
vid = self._search_regex(
r'"videoId"\s*:\s*"?(\d+)"?',
webpage, 'video id')
uid = self._search_regex(
r'"videoUid"\s*:\s*"?(?P<uid>\d+)"?',
webpage, 'video uid')
og_title = self._og_search_title(webpage)
title = self._search_regex(r'^.*?\|(.+?)\|', og_title, 'product title', default=og_title)
return {
# I have no idea what these params mean but it at least seems to work
'url': 'https://cloud.video.taobao.com/play/u/%s/p/1/e/6/t/10301/%s.mp4' % (uid, vid),
'id': vid,
'title': title,
}

View file

@ -42,7 +42,10 @@ from .animeondemand import AnimeOnDemandIE
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .aol import AolIE from .aol import AolIE
from .allocine import AllocineIE from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE from .aliexpress import (
AliExpressLiveIE,
AliExpressProductIE,
)
from .apa import APAIE from .apa import APAIE
from .aparat import AparatIE from .aparat import AparatIE
from .appleconnect import AppleConnectIE from .appleconnect import AppleConnectIE