[instagram] Improve extraction (closes #22880)

This commit is contained in:
=?UTF-8?q?Sergey=20M=E2=80=A4?= 2021-02-26 15:38:42 +01:00 committed by Dominika
parent 22c3b77c77
commit 6e617eb2e8

View file

@ -122,9 +122,9 @@ class InstagramIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
(video_url, description, thumbnail, timestamp, uploader,
(media, video_url, description, thumbnail, timestamp, uploader,
uploader_id, like_count, comment_count, comments, height,
width) = [None] * 11
width) = [None] * 12
shared_data = self._parse_json(
self._search_regex(
@ -137,15 +137,17 @@ class InstagramIE(InfoExtractor):
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
lambda x: x['entry_data']['PostPage'][0]['media']),
dict)
# _sharedData.entry_data.PostPage is empty when authenticated (see
# https://github.com/hdl-org/haruhi-dl/pull/22880)
if not media:
additional_data = self._parse_json(
self._search_regex(r'window\.__additionalDataLoaded\(\'[^\']+\',\s*({.+?})\);',
self._search_regex(
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
webpage, 'additional data', default='{}'),
video_id, fatal=False)
if additional_data:
media = try_get(
additional_data,
lambda x: x['graphql']['shortcode_media'],
additional_data, lambda x: x['graphql']['shortcode_media'],
dict)
if media:
video_url = media.get('video_url')