LLVM 20 Debugging¶
LLVM 15 and 20 produce different results with fastmath=True when inlining get_inverse_doppler_factor into line_scatter_event. This causes 1-2 ULP differences that cascade.
[1]:
import os, struct, re
os.environ["NUMBA_NUM_THREADS"] = "1"
import numpy as np
import llvmlite.binding as binding
print(f"LLVM {binding.llvm_version_info}")
def f2h(f):
return struct.pack('!d', float(f)).hex()
def ulp_diff(a, b):
ai = struct.unpack('!q', struct.pack('!d', a))[0]
bi = struct.unpack('!q', struct.pack('!d', b))[0]
return abs(ai - bi)
LLVM (20, 1, 8)
[2]:
# Run minimal sim to trigger JIT
import tardis
from tardis.io.configuration.config_reader import Configuration
from tardis.simulation import Simulation
config = Configuration.from_yaml(os.path.join(
os.path.dirname(tardis.__file__),
"io", "configuration", "tests", "data", "tardis_configv1_verysimple.yml",
))
config["plasma"]["line_interaction_type"] = "downbranch"
config["montecarlo"]["no_of_packets"] = 100
config["montecarlo"]["last_no_of_packets"] = 100
config["montecarlo"]["no_of_virtual_packets"] = 0
config.atom_data = os.environ.get(
"TARDIS_ATOM_DATA",
os.path.expanduser("~/Downloads/tardis-data/kurucz_cd23_chianti_H_He.h5"),
)
sim = Simulation.from_config(config)
sim.iterate(no_of_packets=100, no_of_virtual_packets=0)
print("JIT done")
Initializing tabulator and plotly panel extensions for widgets to work
Number of density points larger than number of shells. Assuming inner point irrelevant
model_isotope_time_0 is not set in the configuration. Isotopic mass fractions will not be decayed and is assumed to be correct for the time_explosion. THIS IS NOT RECOMMENDED!
/home/aryaatharva18/tardis-main/tardis/tardis/opacities/macro_atom/macroatom_solver.py:624: FutureWarning:
DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
/home/aryaatharva18/tardis-main/tardis/tardis/transport/montecarlo/modes/classic/montecarlo_transport.py:161: NumbaTypeSafetyWarning:
unsafe cast from uint64 to int64. Precision may be lost.
JIT done
IR for line_scatter_event¶
After inlining, LLVM 15 keeps fdiv 1.0, x (reciprocal) then fmul. LLVM 20 folds it into a single fdiv y, x. Look for fdiv instructions below.
[3]:
from tardis.transport.montecarlo.interaction_event_callers import line_scatter_event
sig = line_scatter_event.signatures[0]
ir = line_scatter_event.inspect_llvm(sig)
arith_re = re.compile(
r'^\s+(%.+)\s*=\s*(fmul|fadd|fsub|fdiv|fneg|call.*(?:sqrt|fma))\s+(.+)$',
re.MULTILINE,
)
ops = arith_re.findall(ir)
fast_n = sum(1 for _, _, r in ops if r.startswith('fast'))
reassoc_n = sum(1 for _, _, r in ops if r.startswith('reassoc'))
print(f"FP ops: {len(ops)} total, {fast_n} fast, {reassoc_n} reduced-flags")
print()
print("fdiv instructions:")
for name, op, rest in ops:
if op == 'fdiv':
print(f" {name} = {op} {rest}")
FP ops: 52 total, 51 fast, 1 reduced-flags
fdiv instructions:
%.30.i = fdiv fast double %0, %arg.time_explosion
%storemerge.i = fdiv reassoc arcp contract afn double %.7.i.i, %.15.i.i
%.11.i.i.i = fdiv fast double 1.000000e+00, %.7.i.i10
%.15.i.i.i = fdiv fast double %.7.i2.i, %sqrt.i.i7
%.22.i.i = fdiv fast double %.126, %.9.i.i
%.29.i.i = fdiv fast double %.23.i.i, %.25.i.i
%.30.i3 = fdiv fast double %3, %arg.time_explosion
%.176 = fdiv fast double %storemerge.i, %.7.i.i10
%.17618 = fdiv fast double %4, %sqrt.i.i7
%.30.i.i = fdiv fast double %1, %arg.time_explosion
%.11.i.i.i = fdiv fast double 1.000000e+00, %.7.i.i.i
%.15.i.i.i = fdiv fast double %.7.i2.i.i, %sqrt.i.i.i
%.22.i.i = fdiv fast double %.54.i, %.9.i.i
%.29.i.i = fdiv fast double %.23.i.i, %.25.i.i
[4]:
# Show context around each fdiv
lines = ir.split('\n')
for i, line in enumerate(lines):
if 'fdiv' in line and '=' in line:
start, end = max(0, i - 2), min(len(lines), i + 3)
print(f"--- IR line {i+1} ---")
for j in range(start, end):
m = ">>>" if j == i else " "
print(f"{m} {j+1:5d}: {lines[j]}")
print()
--- IR line 103 ---
101: %.55 = zext i1 %.13 to i8
102: %0 = fmul fast double %.39, 0x3DC2567F4ED09FE8
>>> 103: %.30.i = fdiv fast double %0, %arg.time_explosion
104: %.6.i.i = fmul fast double %.30.i, %.48
105: %.7.i.i = fsub fast double 1.000000e+00, %.6.i.i
--- IR line 110 ---
108: %sqrt.i.i = tail call fast double @llvm.sqrt.f64(double %.10.i.i)
109: %.15.i.i = select i1 %.13, double %sqrt.i.i, double 1.000000e+00
>>> 110: %storemerge.i = fdiv reassoc arcp contract afn double %.7.i.i, %.15.i.i
111: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %.4.i)
112: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %excinfo.1.i)
--- IR line 120 ---
118:
119: B60.endif.i.i: ; preds = %B0.endif.endif.endif
>>> 120: %.11.i.i.i = fdiv fast double 1.000000e+00, %.7.i.i10
121: br label %B36.endif.i
122:
--- IR line 124 ---
122:
123: B82.endif.i.i: ; preds = %B0.endif.endif.endif.thread
>>> 124: %.15.i.i.i = fdiv fast double %.7.i2.i, %sqrt.i.i7
125: br label %B36.endif.i
126:
--- IR line 155 ---
153: B182.endif.i: ; preds = %B36.endif.i
154: %.9.i.i = fmul fast double %arg.time_explosion, 0x421BEB9BF3A00000
>>> 155: %.22.i.i = fdiv fast double %.126, %.9.i.i
156: %.23.i.i = fadd fast double %.22.i.i, %.31.i
157: %.24.i.i = fmul fast double %.22.i.i, %.31.i
--- IR line 159 ---
157: %.24.i.i = fmul fast double %.22.i.i, %.31.i
158: %.25.i.i = fadd fast double %.24.i.i, 1.000000e+00
>>> 159: %.29.i.i = fdiv fast double %.23.i.i, %.25.i.i
160: %sunkaddr = getelementptr inbounds i8, ptr %arg.r_packet.1, i64 8
161: store double %.29.i.i, ptr %sunkaddr, align 8, !noalias !25
--- IR line 201 ---
199: %.126 = load double, ptr %arg.r_packet.1, align 8
200: %3 = fmul fast double %.126, 0x3DC2567F4ED09FE8
>>> 201: %.30.i3 = fdiv fast double %3, %arg.time_explosion
202: %.6.i.i4 = fmul fast double %.30.i3, %.31.i
203: br i1 %2, label %B0.endif.endif.endif.thread, label %B0.endif.endif.endif
--- IR line 219 ---
217: %.173 = getelementptr inbounds nuw i8, ptr %arg.r_packet.1, i64 24
218: %.174 = load double, ptr %.173, align 8
>>> 219: %.176 = fdiv fast double %storemerge.i, %.7.i.i10
220: %.178 = fmul fast double %.174, %.176
221: store double %.178, ptr %.173, align 8
--- IR line 233 ---
231: %.17417 = load double, ptr %.17316, align 8
232: %4 = fmul fast double %.7.i2.i, %storemerge.i
>>> 233: %.17618 = fdiv fast double %4, %sqrt.i.i7
234: %.17819 = fmul fast double %.17417, %.17618
235: store double %.17819, ptr %.17316, align 8
--- IR line 714 ---
712: %.63.i = load double, ptr %.62.i, align 8, !noalias !31
713: %1 = fmul fast double %.54.i, 0x3DC2567F4ED09FE8
>>> 714: %.30.i.i = fdiv fast double %1, %arg.time_explosion
715: %.6.i.i.i = fmul fast double %.30.i.i, %.63.i
716: br i1 %0, label %B60.endif.i.i, label %B82.endif.i.i
--- IR line 720 ---
718: B60.endif.i.i: ; preds = %B528
719: %.7.i.i.i = fsub fast double 1.000000e+00, %.6.i.i.i
>>> 720: %.11.i.i.i = fdiv fast double 1.000000e+00, %.7.i.i.i
721: br label %B36.endif.i
722:
--- IR line 728 ---
726: %.10.i.i.i = fsub fast double 1.000000e+00, %.8.i.i.i
727: %sqrt.i.i.i = tail call fast double @llvm.sqrt.f64(double %.10.i.i.i)
>>> 728: %.15.i.i.i = fdiv fast double %.7.i2.i.i, %sqrt.i.i.i
729: br label %B36.endif.i
730:
--- IR line 758 ---
756: B182.endif.i: ; preds = %B36.endif.i
757: %.9.i.i = fmul fast double %arg.time_explosion, 0x421BEB9BF3A00000
>>> 758: %.22.i.i = fdiv fast double %.54.i, %.9.i.i
759: %.23.i.i = fadd fast double %.22.i.i, %.63.i
760: %.24.i.i = fmul fast double %.22.i.i, %.63.i
--- IR line 762 ---
760: %.24.i.i = fmul fast double %.22.i.i, %.63.i
761: %.25.i.i = fadd fast double %.24.i.i, 1.000000e+00
>>> 762: %.29.i.i = fdiv fast double %.23.i.i, %.25.i.i
763: %sunkaddr = getelementptr inbounds i8, ptr %arg.r_packet.1, i64 8
764: store double %.29.i.i, ptr %sunkaddr, align 8, !noalias !31