@@ -182,15 +182,27 @@ async def _compile(
182182
183183 async def _build_stencils (self ) -> dict [str , _stencils .StencilGroup ]:
184184 generated_cases = PYTHON_EXECUTOR_CASES_C_H .read_text ()
185- opnames = sorted (re .findall (r"\n {8}case (\w+): \{\n" , generated_cases ))
185+ cases_and_opnames = sorted (
186+ re .findall (
187+ r"\n {8}(case (\w+): \{\n.*?\n {8}\})" , generated_cases , flags = re .DOTALL
188+ )
189+ )
186190 tasks = []
187191 with tempfile .TemporaryDirectory () as tempdir :
188192 work = pathlib .Path (tempdir ).resolve ()
189193 async with asyncio .TaskGroup () as group :
190194 coro = self ._compile ("trampoline" , TOOLS_JIT / "trampoline.c" , work )
191195 tasks .append (group .create_task (coro , name = "trampoline" ))
192- for opname in opnames :
193- coro = self ._compile (opname , TOOLS_JIT_TEMPLATE_C , work )
196+ template = TOOLS_JIT_TEMPLATE_C .read_text ()
197+ for case , opname in cases_and_opnames :
198+ # Write out a copy of the template with *only* this case
199+ # inserted. This is about twice as fast as #include'ing all
200+ # of executor_cases.c.h each time we compile (since the C
201+ # compiler wastes a bunch of time parsing the dead code for
202+ # all of the other cases):
203+ c = work / f"{ opname } .c"
204+ c .write_text (template .replace ("CASE" , case ))
205+ coro = self ._compile (opname , c , work )
194206 tasks .append (group .create_task (coro , name = opname ))
195207 return {task .get_name (): task .result () for task in tasks }
196208
0 commit comments