import sys
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt

def _find(name):
    p = Path.cwd()
    for _ in range(6):
        if (p / name).exists():
            return p
        p = p.parent
    return Path.cwd()

PY_DIR = _find("fmg")
sys.path.insert(0, str(PY_DIR))
REPO = PY_DIR.parent
FIG_DIR = REPO / "paper" / "figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)

import fmg
from fmg import schedules, gaussian, linalg, plotting
from fmg.seeding import set_seed

SEED = set_seed()
plt = plotting.setup_matplotlib()
print("fmg", fmg.__version__, "| seed", SEED, "| figures ->", FIG_DIR)

fmg 0.1.0 | seed 20260617 | figures -> /Users/eserie/galaxies/flow-matching-gaussians/.worktrees/task-20260617-32e5/paper/figures

S0 = np.diag([2.0, 1.0])
S1_comm = np.diag([1.0, 4.0])                      # diagonal -> commutes with S0
theta = np.pi / 6
R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
S1_ncom = R @ np.diag([4.0, 1.0]) @ R.T            # rotated frame -> does NOT commute

for name, S1 in [("commuting", S1_comm), ("non-commuting", S1_ncom)]:
    linalg.assert_spd(S0, "S0"); linalg.assert_spd(S1, f"S1[{name}]")
    print(f"{name:14s}  ||[S0,S1]|| = {gaussian.commutator_norm(S0, S1):.4f}")

SCHEDULES = [schedules.linear(), schedules.variance_preserving(), schedules.cosine()]
SCHED_SI  = [schedules.linear(), schedules.variance_preserving(), schedules.cubic()]

commuting       ||[S0,S1]|| = 0.0000
non-commuting   ||[S0,S1]|| = 1.8371

def ellipse_panel(axes, S0, S1, sched_list, title):
    times = np.linspace(0, 1, 9)
    for ax, s in zip(axes, sched_list):
        for j, t in enumerate(times):
            St = gaussian.sigma_t(t, s, S0, S1)
            c = plt.cm.viridis(j / (len(times) - 1))
            lw = 2.2 if t in (0.0, 1.0) else 1.2
            plotting.cov_ellipse(ax, St, n_std=2.0, edgecolor=c, lw=lw)
        ax.set_title(f"{title}\n{s.name}")
        ax.set_aspect("equal"); ax.set_xlim(-5, 5); ax.set_ylim(-5, 5)

fig, axes = plt.subplots(1, 3, figsize=(13, 4.6))
ellipse_panel(axes, S0, S1_comm, SCHEDULES, "commuting")
fig.suptitle(r"Covariance ellipses $\Sigma_t=a^2\Sigma_0+b^2\Sigma_1$ — commuting pair")
plotting.savefig_pdf(fig, FIG_DIR / "ellipses_commuting_schedules.pdf")
plt.show()

fig, axes = plt.subplots(1, 3, figsize=(13, 4.6))
ellipse_panel(axes, S0, S1_ncom, SCHEDULES, "non-commuting")
fig.suptitle(r"Covariance ellipses $\Sigma_t=a^2\Sigma_0+b^2\Sigma_1$ — non-commuting pair (note the rotation)")
plotting.savefig_pdf(fig, FIG_DIR / "ellipses_noncommuting_schedules.pdf")
plt.show()

def path_vs_geodesic(ax, S0, S1, title):
    sched = schedules.linear()
    T_OT = gaussian.bures_ot_map(S0, S1)
    for j, t in enumerate(np.linspace(0, 1, 9)):
        St = gaussian.sigma_t(t, sched, S0, S1)
        Sg = gaussian.bures_geodesic_cov(t, S0, S1, T_OT)
        plotting.cov_ellipse(ax, St, n_std=2.0, edgecolor="#0072B2",
                             lw=1.0 + 1.2 * (t in (0.0, 1.0)), ls="-")
        plotting.cov_ellipse(ax, Sg, n_std=2.0, edgecolor="#D55E00",
                             lw=1.0 + 1.2 * (t in (0.0, 1.0)), ls="--")
    ax.plot([], [], color="#0072B2", label="interpolant $\\Sigma_t$ (linear)")
    ax.plot([], [], color="#D55E00", ls="--", label="Bures geodesic $\\Sigma_t^{\\rm geo}$")
    ax.set_title(title); ax.legend(loc="upper right")
    ax.set_aspect("equal"); ax.set_xlim(-5, 5); ax.set_ylim(-5, 5)

fig, axes = plt.subplots(1, 2, figsize=(12, 5.6))
path_vs_geodesic(axes[0], S0, S1_comm, "commuting: paths coincide")
path_vs_geodesic(axes[1], S0, S1_ncom, "non-commuting: independent bridge $\\neq$ optimal bridge")
fig.suptitle("Independent-coupling covariance path vs. the optimal-transport geodesic")
plotting.savefig_pdf(fig, FIG_DIR / "ellipses_interpolant_vs_geodesic.pdf")
plt.show()

def map_report(S0, S1, label):
    T_OT = gaussian.bures_ot_map(S0, S1)
    Ts = {s.name: gaussian.flow_map(s, S0, S1) for s in SCHED_SI}
    names = list(Ts)
    spread = max(np.linalg.norm(Ts[a] - Ts[b])
                 for a in names for b in names)
    T1 = Ts["linear"]
    Q, P = linalg.polar(T1)
    print(f"--- {label}  (||[S0,S1]|| = {gaussian.commutator_norm(S0, S1):.4f}) ---")
    print(f"  schedule spread  max||T1(s)-T1(s')||      = {spread:.2e}   (schedule-independence)")
    print(f"  symmetry defect  ||T1 - T1^T||/||T1||      = {gaussian.symmetry_defect(T1):.3e}")
    print(f"  distance to OT   ||T1 - T_OT||             = {np.linalg.norm(T1 - T_OT):.3e}")
    print(f"  pushforward res  ||T1 S0 T1^T - S1||/||S1|| = {gaussian.pushforward_residual(T1, S0, S1):.2e}")
    print(f"  polar rotation   angle(Q)                  = {np.degrees(linalg.rotation_angle(Q)):+.3f} deg")
    return T1, T_OT

T1c, T_OTc = map_report(S0, S1_comm, "commuting")
T1n, T_OTn = map_report(S0, S1_ncom, "non-commuting")

--- commuting  (||[S0,S1]|| = 0.0000) ---
  schedule spread  max||T1(s)-T1(s')||      = 6.26e-13   (schedule-independence)
  symmetry defect  ||T1 - T1^T||/||T1||      = 0.000e+00
  distance to OT   ||T1 - T_OT||             = 4.162e-13
  pushforward res  ||T1 S0 T1^T - S1||/||S1|| = 4.04e-13
  polar rotation   angle(Q)                  = +0.000 deg
--- non-commuting  (||[S0,S1]|| = 1.8371) ---
  schedule spread  max||T1(s)-T1(s')||      = 2.29e-13   (schedule-independence)
  symmetry defect  ||T1 - T1^T||/||T1||      = 1.988e-01
  distance to OT   ||T1 - T_OT||             = 2.011e-01
  pushforward res  ||T1 S0 T1^T - S1||/||S1|| = 2.05e-14
  polar rotation   angle(Q)                  = -5.977 deg

# Visualise WHERE the two maps send marked points (both land on the Sigma_1 ellipse).
def map_action(ax, S0, S1, T1, T_OT, title):
    th_dense = np.linspace(0, 2 * np.pi, 200)
    th_marks = np.linspace(0, 2 * np.pi, 16, endpoint=False)
    S0h = linalg.sym_sqrt(S0)
    base_curve = 2 * (S0h @ np.stack([np.cos(th_dense), np.sin(th_dense)]))
    base_marks = 2 * (S0h @ np.stack([np.cos(th_marks), np.sin(th_marks)]))
    tgt_curve = 2 * (linalg.sym_sqrt(S1) @ np.stack([np.cos(th_dense), np.sin(th_dense)]))
    fm = T1 @ base_marks
    ot = T_OT @ base_marks
    ax.plot(base_curve[0], base_curve[1], color="0.6", lw=1.3, label=r"$\Sigma_0$ ellipse")
    ax.plot(tgt_curve[0], tgt_curve[1], color="0.3", lw=1.3, ls=":", label=r"$\Sigma_1$ ellipse (shared image)")
    for i in range(len(th_marks)):
        ax.plot([ot[0, i], fm[0, i]], [ot[1, i], fm[1, i]], color="0.5", lw=0.8, zorder=1)
    ax.scatter(ot[0], ot[1], c="#D55E00", s=40, marker="D", zorder=3, label=r"$T_{\rm OT}$ image")
    ax.scatter(fm[0], fm[1], c="#0072B2", s=40, marker="o", zorder=3, label=r"$T_1$ image")
    ax.set_title(title); ax.legend(loc="upper right", fontsize=8)
    ax.set_aspect("equal"); ax.set_xlim(-7, 7); ax.set_ylim(-7, 7)

fig, axes = plt.subplots(1, 2, figsize=(12, 5.6))
map_action(axes[0], S0, S1_comm, T1c, T_OTc, "commuting: $T_1 = T_{\\rm OT}$ (segments vanish)")
map_action(axes[1], S0, S1_ncom, T1n, T_OTn, "non-commuting: points slid by the polar rotation")
fig.suptitle("Flow map vs. Bures OT map — same target ellipse, different point images")
plotting.savefig_pdf(fig, FIG_DIR / "flowmap_vs_otmap.pdf")
plt.show()

alphas = np.linspace(0.0, np.pi / 2, 60)
cs, ss, ds, spreads = [], [], [], []
for al in alphas:
    Ra = np.array([[np.cos(al), -np.sin(al)], [np.sin(al), np.cos(al)]])
    S1a = Ra @ np.diag([4.0, 1.0]) @ Ra.T
    T1 = gaussian.flow_map(schedules.linear(), S0, S1a)
    T1b = gaussian.flow_map(schedules.cubic(), S0, S1a)
    T_OT = gaussian.bures_ot_map(S0, S1a)
    cs.append(gaussian.commutator_norm(S0, S1a))
    ss.append(gaussian.symmetry_defect(T1))
    ds.append(np.linalg.norm(T1 - T_OT))
    spreads.append(np.linalg.norm(T1 - T1b))
cs, ss, ds, spreads = map(np.asarray, (cs, ss, ds, spreads))

fig, ax = plt.subplots(figsize=(7.5, 4.6))
ax.plot(cs, ss, "o-", ms=3, color="#0072B2", label=r"symmetry residual $s=\|T_1-T_1^\top\|/\|T_1\|$")
ax.plot(cs, ds, "s-", ms=3, color="#D55E00", label=r"map distance $d=\|T_1-T_{\rm OT}\|$")
ax.plot(cs, spreads + 1e-16, "^-", ms=3, color="#009E73", label=r"schedule spread $\|T_1^{\rm lin}-T_1^{\rm cub}\|$")
ax.set_xlabel(r"commutator norm $c=\|[\Sigma_0,\Sigma_1]\|$")
ax.set_ylabel("residual")
ax.set_title(r"$T_1=T_{\rm OT}\Leftrightarrow T_1$ symmetric $\Leftrightarrow[\Sigma_0,\Sigma_1]=0$")
ax.legend()
plotting.savefig_pdf(fig, FIG_DIR / "commutator_sweep.pdf")
plt.show()

print("at c=0:    s = %.2e,  d = %.2e" % (ss[0], ds[0]))
print("max over sweep:  s = %.3f,  d = %.3f" % (ss.max(), ds.max()))
print("schedule spread stays at machine zero over the whole sweep: max = %.2e" % spreads.max())

at c=0:    s = 0.00e+00,  d = 8.34e-14
max over sweep:  s = 0.214,  d = 0.222
schedule spread stays at machine zero over the whole sweep: max = 1.20e-12

Notebook B — Gaussian Covariance Flow and the Optimal-Transport Map¶

1. Two test pairs: commuting and non-commuting¶

2. Covariance ellipses evolving in time¶

3. Interpolant path vs. the Bures–Wasserstein geodesic¶

4. The flow map $T_1$ — schedule-independent, and equal to $T_{\rm OT}$ only when commuting¶

5. Commutator sweep — the theorem, falsifiable¶

6. Takeaways¶