[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes
This commit is contained in:
Yen Chi Hsuan 2016-08-20 00:12:32 +08:00
parent a9a3b4a081
commit 70852b47ca
No known key found for this signature in database
GPG key ID: 3FDDD575826C5C30
3 changed files with 21 additions and 1 deletions

View file

@ -1,7 +1,9 @@
version <unreleased>
Core
* Support m3u8 manifests in HTML5 multimedia tags
+ Recognize file size strings with full unit names (for example "8.5
megabytes")
+ Support m3u8 manifests in HTML5 multimedia tags
* Fix js_to_json(): correct octal or hexadecimal number detection
Extractors

View file

@ -823,6 +823,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240)
self.assertEqual(parse_filesize('1,24 kb'), 1240)
self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
def test_parse_count(self):
self.assertEqual(parse_count(None), None)

View file

@ -1504,46 +1504,63 @@ def parse_filesize(s):
_UNIT_TABLE = {
'B': 1,
'b': 1,
'bytes': 1,
'KiB': 1024,
'KB': 1000,
'kB': 1024,
'Kb': 1000,
'kb': 1000,
'kilobytes': 1000,
'kibibytes': 1024,
'MiB': 1024 ** 2,
'MB': 1000 ** 2,
'mB': 1024 ** 2,
'Mb': 1000 ** 2,
'mb': 1000 ** 2,
'megabytes': 1000 ** 2,
'mebibytes': 1024 ** 2,
'GiB': 1024 ** 3,
'GB': 1000 ** 3,
'gB': 1024 ** 3,
'Gb': 1000 ** 3,
'gb': 1000 ** 3,
'gigabytes': 1000 ** 3,
'gibibytes': 1024 ** 3,
'TiB': 1024 ** 4,
'TB': 1000 ** 4,
'tB': 1024 ** 4,
'Tb': 1000 ** 4,
'tb': 1000 ** 4,
'terabytes': 1000 ** 4,
'tebibytes': 1024 ** 4,
'PiB': 1024 ** 5,
'PB': 1000 ** 5,
'pB': 1024 ** 5,
'Pb': 1000 ** 5,
'pb': 1000 ** 5,
'petabytes': 1000 ** 5,
'pebibytes': 1024 ** 5,
'EiB': 1024 ** 6,
'EB': 1000 ** 6,
'eB': 1024 ** 6,
'Eb': 1000 ** 6,
'eb': 1000 ** 6,
'exabytes': 1000 ** 6,
'exbibytes': 1024 ** 6,
'ZiB': 1024 ** 7,
'ZB': 1000 ** 7,
'zB': 1024 ** 7,
'Zb': 1000 ** 7,
'zb': 1000 ** 7,
'zettabytes': 1000 ** 7,
'zebibytes': 1024 ** 7,
'YiB': 1024 ** 8,
'YB': 1000 ** 8,
'yB': 1024 ** 8,
'Yb': 1000 ** 8,
'yb': 1000 ** 8,
'yottabytes': 1000 ** 8,
'yobibytes': 1024 ** 8,
}
return lookup_unit_table(_UNIT_TABLE, s)