Skip to content

Commit c052a16

Browse files
committed
[JSInterp] Add tests and relevant functionality from yt-dlp
* thx seproDev, bashonly: yt-dlp/yt-dlp#12760, yt-dlp/yt-dlp#12761: - Improve nested attribute support - Pass global stack when extracting objects - interpret_statement: Match attribute before indexing - Fix assignment to array elements with nested brackets - Add new signature tests - Invalidate JS function cache - Avoid testdata dupes now that we cache by URL * rework nsig function name search * fully fixes #33102 * update cache required versions * update program version
1 parent bd2ded5 commit c052a16

File tree

5 files changed

+129
-22
lines changed

5 files changed

+129
-22
lines changed

test/test_jsinterp.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ def test_assignments(self):
180180
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
181181
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
182182
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
183+
self._test('function f(){var x = 2; var y = ["a", "b"]; y[x%y["length"]]="z"; return y}', ['z', 'b'])
183184

184185
def test_comments(self):
185186
self._test('''
@@ -552,6 +553,8 @@ def test_split(self):
552553
test_result = list('test')
553554
tests = [
554555
'function f(a, b){return a.split(b)}',
556+
'function f(a, b){return a["split"](b)}',
557+
'function f(a, b){let x = ["split"]; return a[x[0]](b)}',
555558
'function f(a, b){return String.prototype.split.call(a, b)}',
556559
'function f(a, b){return String.prototype.split.apply(a, [b])}',
557560
]
@@ -602,6 +605,9 @@ def test_slice(self):
602605
self._test('function f(){return "012345678".slice(-1, 1)}', '')
603606
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
604607

608+
def test_splice(self):
609+
self._test('function f(){var T = ["0", "1", "2"]; T["splice"](2, 1, "0")[0]; return T }', ['0', '1', '0'])
610+
605611
def test_pop(self):
606612
# pop
607613
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
@@ -636,6 +642,16 @@ def test_forEach(self):
636642
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
637643
[2, 4, 1, [4, 2]])
638644

645+
def test_extract_function(self):
646+
jsi = JSInterpreter('function a(b) { return b + 1; }')
647+
func = jsi.extract_function('a')
648+
self.assertEqual(func([2]), 3)
649+
650+
def test_extract_function_with_global_stack(self):
651+
jsi = JSInterpreter('function c(d) { return d + e + f + g; }')
652+
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
653+
self.assertEqual(func([1]), 1111)
654+
639655

640656
if __name__ == '__main__':
641657
unittest.main()

test/test_youtube_signature.py

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,51 @@
9494
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
9595
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
9696
),
97+
(
98+
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
99+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
100+
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
101+
),
97102
(
98103
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
99104
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
100105
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
101106
),
107+
(
108+
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
109+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
110+
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
111+
),
112+
(
113+
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
114+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
115+
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
116+
),
117+
(
118+
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
119+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
120+
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
121+
),
122+
(
123+
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
124+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
125+
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
126+
),
127+
(
128+
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
129+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
130+
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
131+
),
132+
(
133+
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
134+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
135+
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
136+
),
137+
(
138+
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
139+
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
140+
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
141+
),
102142
]
103143

104144
_NSIG_TESTS = [
@@ -272,7 +312,7 @@
272312
),
273313
(
274314
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
275-
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
315+
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
276316
),
277317
(
278318
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
@@ -286,6 +326,26 @@
286326
'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
287327
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
288328
),
329+
(
330+
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
331+
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
332+
),
333+
(
334+
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
335+
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
336+
),
337+
(
338+
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
339+
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
340+
),
341+
(
342+
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
343+
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
344+
),
345+
(
346+
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
347+
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
348+
),
289349
]
290350

291351

@@ -335,7 +395,7 @@ def make_tfunc(url, sig_input, expected_sig):
335395
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
336396

337397
def test_func(self):
338-
basename = 'player-{0}-{1}.js'.format(name, test_id)
398+
basename = 'player-{0}.js'.format(test_id)
339399
fn = os.path.join(self.TESTDATA_DIR, basename)
340400

341401
if not os.path.exists(fn):

youtube_dl/extractor/youtube.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,7 +1652,7 @@ def _extract_signature_function(self, video_id, player_url, example_sig):
16521652
assert os.path.basename(func_id) == func_id
16531653

16541654
self.write_debug('Extracting signature function {0}'.format(func_id))
1655-
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
1655+
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.04.07'), None
16561656

16571657
if not cache_spec:
16581658
code = self._load_player(video_id, player_url, player_id)
@@ -1813,6 +1813,10 @@ def _decrypt_nsig(self, n, video_id, player_url):
18131813
return ret
18141814

18151815
def _extract_n_function_name(self, jscode):
1816+
func_name, idx = None, None
1817+
# these special cases are redundant and probably obsolete (2025-04):
1818+
# they make the tests run ~10% faster without fallback warnings
1819+
r"""
18161820
func_name, idx = self._search_regex(
18171821
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
18181822
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
@@ -1839,9 +1843,28 @@ def _extract_n_function_name(self, jscode):
18391843
\(\s*[\w$]+\s*\)
18401844
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
18411845
default=(None, None))
1846+
"""
1847+
1848+
if not func_name:
1849+
# nfunc=function(x){...}|function nfunc(x); ...
1850+
# ... var y=[nfunc]|y[idx]=nfunc);
1851+
# obvious REs hang, so use a two-stage tactic
1852+
for m in re.finditer(r'''(?x)
1853+
[\n;]var\s(?:(?:(?!,).)+,|\s)*?(?!\d)[\w$]+(?:\[(?P<idx>\d+)\])?\s*=\s*
1854+
(?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
1855+
\s*?[;\n]
1856+
''', jscode):
1857+
func_name = self._search_regex(
1858+
r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
1859+
re.escape(m.group('nfunc')), '{'),
1860+
jscode, 'Initial JS player n function name (2)', group=2, default=None)
1861+
if func_name:
1862+
idx = m.group('idx')
1863+
break
1864+
18421865
# thx bashonly: yt-dlp/yt-dlp/pull/10611
18431866
if not func_name:
1844-
self.report_warning('Falling back to generic n function search')
1867+
self.report_warning('Falling back to generic n function search', only_once=True)
18451868
return self._search_regex(
18461869
r'''(?xs)
18471870
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
@@ -1855,14 +1878,14 @@ def _extract_n_function_name(self, jscode):
18551878
return func_name
18561879

18571880
return self._search_json(
1858-
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
1881+
r'(?<![\w-])var\s(?:(?:(?!,).)+,|\s)*?{0}\s*='.format(re.escape(func_name)), jscode,
18591882
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
1860-
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
1883+
func_name, contains_pattern=r'\[.+\]', end_pattern='[,;]',
18611884
transform_source=js_to_json)[int(idx)]
18621885

18631886
def _extract_n_function_code(self, video_id, player_url):
18641887
player_id = self._extract_player_info(player_url)
1865-
func_code = self.cache.load('youtube-nsig', player_id)
1888+
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.04.07')
18661889
jscode = func_code or self._load_player(video_id, player_url)
18671890
jsi = JSInterpreter(jscode)
18681891

youtube_dl/jsinterp.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,6 @@ def _js_typeof(expr):
303303
('!', _js_unary_op(lambda x: _js_ternary(x, if_true=False, if_false=True))),
304304
)
305305

306-
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
307-
308306
_COMP_OPERATORS = (
309307
('===', _js_id_op(operator.is_)),
310308
('!==', _js_id_op(operator.is_not)),
@@ -316,9 +314,12 @@ def _js_typeof(expr):
316314
('>', _js_comp_op(operator.gt)),
317315
)
318316

317+
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS + _SC_OPERATORS))
318+
319319
_NAME_RE = r'[a-zA-Z_$][\w$]*'
320320
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
321321
_QUOTES = '\'"/'
322+
_NESTED_BRACKETS = r'[^[\]]+(?:\[[^[\]]+(?:\[[^\]]+\])?\])?'
322323

323324

324325
class JS_Break(ExtractorError):
@@ -1088,15 +1089,18 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
10881089

10891090
m = re.match(r'''(?x)
10901091
(?P<assign>
1091-
(?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
1092+
(?P<out>{_NAME_RE})(?P<out_idx>(?:\[{_NESTED_BRACKETS}\])+)?\s*
10921093
(?P<op>{_OPERATOR_RE})?
10931094
=(?!=)(?P<expr>.*)$
10941095
)|(?P<return>
10951096
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
1096-
)|(?P<indexing>
1097-
(?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
10981097
)|(?P<attribute>
1099-
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
1098+
(?P<var>{_NAME_RE})(?:
1099+
(?P<nullish>\?)?\.(?P<member>[^(]+)|
1100+
\[(?P<member2>{_NESTED_BRACKETS})\]
1101+
)\s*
1102+
)|(?P<indexing>
1103+
(?P<in>{_NAME_RE})(?P<in_idx>\[.+\])$
11001104
)|(?P<function>
11011105
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
11021106
)'''.format(**globals()), expr)
@@ -1111,10 +1115,11 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
11111115
elif left_val in (None, JS_Undefined):
11121116
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
11131117

1114-
indexes = re.split(r'\]\s*\[', m.group('out_idx'))
1115-
for i, idx in enumerate(indexes, 1):
1118+
indexes = md['out_idx']
1119+
while indexes:
1120+
idx, indexes = self._separate_at_paren(indexes)
11161121
idx = self.interpret_expression(idx, local_vars, allow_recursion)
1117-
if i < len(indexes):
1122+
if indexes:
11181123
left_val = self._index(left_val, idx)
11191124
if isinstance(idx, float):
11201125
idx = int(idx)
@@ -1159,7 +1164,9 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
11591164

11601165
if md.get('indexing'):
11611166
val = local_vars[m.group('in')]
1162-
for idx in re.split(r'\]\s*\[', m.group('in_idx')):
1167+
indexes = m.group('in_idx')
1168+
while indexes:
1169+
idx, indexes = self._separate_at_paren(indexes)
11631170
idx = self.interpret_expression(idx, local_vars, allow_recursion)
11641171
val = self._index(val, idx)
11651172
return val, should_return
@@ -1204,7 +1211,7 @@ def eval_method(variable, member):
12041211
if obj is JS_Undefined:
12051212
try:
12061213
if variable not in self._objects:
1207-
self._objects[variable] = self.extract_object(variable)
1214+
self._objects[variable] = self.extract_object(variable, local_vars)
12081215
obj = self._objects[variable]
12091216
except self.Exception:
12101217
if not nullish:
@@ -1215,7 +1222,7 @@ def eval_method(variable, member):
12151222

12161223
# Member access
12171224
if arg_str is None:
1218-
return self._index(obj, member)
1225+
return self._index(obj, member, nullish)
12191226

12201227
# Function call
12211228
argvals = [
@@ -1400,7 +1407,7 @@ def interpret_iter(self, list_txt, local_vars, allow_recursion):
14001407
for v in self._separate(list_txt):
14011408
yield self.interpret_expression(v, local_vars, allow_recursion)
14021409

1403-
def extract_object(self, objname):
1410+
def extract_object(self, objname, *global_stack):
14041411
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
14051412
obj = {}
14061413
fields = next(filter(None, (
@@ -1421,7 +1428,8 @@ def extract_object(self, objname):
14211428
fields):
14221429
argnames = self.build_arglist(f.group('args'))
14231430
name = remove_quotes(f.group('key'))
1424-
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
1431+
obj[name] = function_with_repr(
1432+
self.build_function(argnames, f.group('code'), *global_stack), 'F<{0}>'.format(name))
14251433

14261434
return obj
14271435

youtube_dl/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from __future__ import unicode_literals
22

3-
__version__ = '2021.12.17'
3+
__version__ = '2025.04.07'

0 commit comments

Comments
 (0)