import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse

np.random.seed(42)
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

# Verify with simulation
n_samples = 10000

# Original distribution
mu = np.array([0, 0])
Sigma = np.array([[1.0, 0.5],
                  [0.5, 1.0]])

# Transformation matrix
A = np.array([[2, 1],
              [0, 1.5]])

# Generate samples
X = np.random.multivariate_normal(mu, Sigma, n_samples)

# Transform
Y = (A @ X.T).T

# Compare theoretical vs empirical covariance
Sigma_Y_theoretical = A @ Sigma @ A.T
Sigma_Y_empirical = np.cov(Y.T)

print("Covariance Transformation: Σ_y = A Σ_x A.T")
print("="*50)
print(f"\nOriginal Σ_x:")
print(Sigma)
print(f"\nTransformation A:")
print(A)
print(f"\nTheoretical Σ_y = A Σ_x A.T:")
print(Sigma_Y_theoretical)
print(f"\nEmpirical Σ_y (from {n_samples:,} samples):")
print(Sigma_Y_empirical.round(3))
print(f"\nClose match: {np.allclose(Sigma_Y_theoretical, Sigma_Y_empirical, rtol=0.05)}")

Covariance Transformation: Σ_y = A Σ_x A.T
==================================================

Original Σ_x:
[[1.  0.5]
 [0.5 1. ]]

Transformation A:
[[2.  1. ]
 [0.  1.5]]

Theoretical Σ_y = A Σ_x A.T:
[[7.   3.  ]
 [3.   2.25]]

Empirical Σ_y (from 10,000 samples):
[[7.14  3.085]
 [3.085 2.287]]

Close match: True

def plot_covariance_ellipse(ax, mean, cov, n_std=2, **kwargs):
    """Plot a covariance ellipse."""
    eigenvalues, eigenvectors = np.linalg.eigh(cov)
    angle = np.degrees(np.arctan2(eigenvectors[1, 1], eigenvectors[0, 1]))
    width, height = 2 * n_std * np.sqrt(eigenvalues)
    ellipse = Ellipse(mean, width, height, angle=angle, **kwargs)
    ax.add_patch(ellipse)
    return ellipse

# Visualize the transformation
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Original
ax1 = axes[0]
ax1.scatter(X[:500, 0], X[:500, 1], alpha=0.3, s=10, c='blue')
plot_covariance_ellipse(ax1, mu, Sigma, n_std=2, facecolor='blue', alpha=0.2, edgecolor='blue', linewidth=2)
ax1.set_xlim(-5, 8)
ax1.set_ylim(-5, 8)
ax1.set_aspect('equal')
ax1.set_title('Original: $\\mathbf{x} \\sim N(0, \\Sigma)$', fontsize=12)
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='black', linewidth=0.5)
ax1.axvline(x=0, color='black', linewidth=0.5)

# Arrow showing transformation
ax2 = axes[1]
ax2.text(0.5, 0.6, '$\\mathbf{y} = \\mathbf{A}\\mathbf{x}$', fontsize=20, ha='center', transform=ax2.transAxes)
ax2.text(0.5, 0.4, '$\\longrightarrow$', fontsize=40, ha='center', transform=ax2.transAxes)
ax2.text(0.5, 0.25, f'A = [[2, 1], [0, 1.5]]', fontsize=12, ha='center', transform=ax2.transAxes, family='monospace')
ax2.axis('off')
ax2.set_title('Linear Transformation', fontsize=12)

# Transformed
ax3 = axes[2]
ax3.scatter(Y[:500, 0], Y[:500, 1], alpha=0.3, s=10, c='red')
plot_covariance_ellipse(ax3, np.zeros(2), Sigma_Y_theoretical, n_std=2, 
                        facecolor='red', alpha=0.2, edgecolor='red', linewidth=2)
ax3.set_xlim(-5, 8)
ax3.set_ylim(-5, 8)
ax3.set_aspect('equal')
ax3.set_title('Transformed: $\\mathbf{y} \\sim N(0, \\mathbf{A}\\Sigma\\mathbf{A}^T)$', fontsize=12)
ax3.grid(True, alpha=0.3)
ax3.axhline(y=0, color='black', linewidth=0.5)
ax3.axvline(x=0, color='black', linewidth=0.5)

plt.suptitle('The $\\mathbf{A}\\Sigma\\mathbf{A}^T$ Formula: Linear Transform Changes Covariance', fontsize=14)
plt.tight_layout()
plt.show()

# Sensor uncertainties (variance in position estimate)
# LiDAR: accurate, GPS: moderate, Wheel odometry: poor
var_lidar = 0.04    # σ = 0.2 meters
var_gps = 0.25      # σ = 0.5 meters  
var_odom = 0.64     # σ = 0.8 meters

# Combined sensor covariance (assuming independence)
Sigma_sensors = np.diag([var_lidar, var_gps, var_odom])

print("Sensor Covariances (diagonal = independent sensors)")
print("="*50)
sensors = ['LiDAR', 'GPS', 'Odometry']
for name, var in zip(sensors, [var_lidar, var_gps, var_odom]):
    print(f"  {name:12}: σ² = {var:.2f}  (σ = {np.sqrt(var):.2f} m)")

Sensor Covariances (diagonal = independent sensors)
==================================================
  LiDAR       : σ² = 0.04  (σ = 0.20 m)
  GPS         : σ² = 0.25  (σ = 0.50 m)
  Odometry    : σ² = 0.64  (σ = 0.80 m)

# Fusion strategy: weighted average based on inverse variance
# More accurate sensors get higher weight
variances = np.array([var_lidar, var_gps, var_odom])
weights = (1/variances) / np.sum(1/variances)  # Normalize to sum to 1

# Fusion matrix A: (1x3) - combines 3 sensor readings into 1 estimate
A = weights.reshape(1, -1)

print("\nOptimal Fusion Weights (inverse variance weighting)")
print("="*50)
for name, w in zip(sensors, weights):
    print(f"  {name:12}: weight = {w:.3f}")
print(f"\nFusion matrix A = {A.round(3)}")

Optimal Fusion Weights (inverse variance weighting)
==================================================
  LiDAR       : weight = 0.818
  GPS         : weight = 0.131
  Odometry    : weight = 0.051

Fusion matrix A = [[0.818 0.131 0.051]]

# THE KEY FORMULA: Fused variance = A @ Σ_sensors @ A.T
Sigma_fused = A @ Sigma_sensors @ A.T

print("\nFused Uncertainty: Σ_fused = A @ Σ_sensors @ A.T")
print("="*50)
print(f"\nΣ_fused = {Sigma_fused[0,0]:.4f}")
print(f"σ_fused = {np.sqrt(Sigma_fused[0,0]):.3f} meters")
print(f"\nComparison:")
print(f"  Best single sensor (LiDAR): σ = {np.sqrt(var_lidar):.3f} m")
print(f"  Fused estimate:             σ = {np.sqrt(Sigma_fused[0,0]):.3f} m  ← BETTER!")
print(f"\nImprovement: {(1 - np.sqrt(Sigma_fused[0,0])/np.sqrt(var_lidar))*100:.1f}% more accurate than best sensor alone")

Fused Uncertainty: Σ_fused = A @ Σ_sensors @ A.T
==================================================

Σ_fused = 0.0327
σ_fused = 0.181 meters

Comparison:
  Best single sensor (LiDAR): σ = 0.200 m
  Fused estimate:             σ = 0.181 m  ← BETTER!

Improvement: 9.6% more accurate than best sensor alone

# Visualize the fusion
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# True position
true_pos = 10.0  # meters

# Simulate measurements
np.random.seed(42)
n_measurements = 50
measurements = np.column_stack([
    np.random.normal(true_pos, np.sqrt(var_lidar), n_measurements),
    np.random.normal(true_pos, np.sqrt(var_gps), n_measurements),
    np.random.normal(true_pos, np.sqrt(var_odom), n_measurements)
])

# Fuse each measurement set
fused = measurements @ A.T

# Left: Individual sensor distributions
ax1 = axes[0]
x_range = np.linspace(8, 12, 200)
for i, (name, var, color) in enumerate(zip(sensors, variances, ['blue', 'green', 'orange'])):
    pdf = 1/np.sqrt(2*np.pi*var) * np.exp(-(x_range - true_pos)**2 / (2*var))
    ax1.fill_between(x_range, pdf, alpha=0.3, color=color, label=f'{name} (σ={np.sqrt(var):.2f}m)')
    ax1.plot(x_range, pdf, color=color, linewidth=2)

# Fused distribution
pdf_fused = 1/np.sqrt(2*np.pi*Sigma_fused[0,0]) * np.exp(-(x_range - true_pos)**2 / (2*Sigma_fused[0,0]))
ax1.fill_between(x_range, pdf_fused, alpha=0.4, color='red', label=f'Fused (σ={np.sqrt(Sigma_fused[0,0]):.3f}m)')
ax1.plot(x_range, pdf_fused, 'r-', linewidth=3)

ax1.axvline(x=true_pos, color='black', linestyle='--', linewidth=2, label='True position')
ax1.set_xlabel('Position (m)')
ax1.set_ylabel('Probability Density')
ax1.set_title('Sensor Uncertainties vs Fused Estimate', fontsize=12)
ax1.legend()
ax1.grid(True, alpha=0.3)

# Right: Simulated measurements
ax2 = axes[1]
for i, (name, color) in enumerate(zip(sensors, ['blue', 'green', 'orange'])):
    ax2.scatter(range(n_measurements), measurements[:, i], alpha=0.5, s=30, c=color, label=name)
ax2.scatter(range(n_measurements), fused, c='red', s=60, marker='*', label='Fused', zorder=5)
ax2.axhline(y=true_pos, color='black', linestyle='--', linewidth=2, label='True position')
ax2.set_xlabel('Measurement #')
ax2.set_ylabel('Position estimate (m)')
ax2.set_title('Individual Measurements vs Fused Estimates', fontsize=12)
ax2.legend(loc='upper right')
ax2.grid(True, alpha=0.3)

plt.suptitle('Sensor Fusion: $\\mathbf{A}\\Sigma\\mathbf{A}^T$ Combines Uncertainties Optimally', fontsize=14)
plt.tight_layout()
plt.show()

# Factor analysis: 2 factors → 5 observed variables
# Example: "Intelligence" and "Effort" factors → test scores

# Loading matrix: how factors influence observed variables
Lambda = np.array([
    [0.9, 0.1],   # Math: mostly intelligence
    [0.8, 0.2],   # Physics: mostly intelligence
    [0.7, 0.3],   # Chemistry: intelligence + some effort
    [0.3, 0.8],   # History: mostly effort (memorization)
    [0.2, 0.9],   # Literature: mostly effort
])

# Factor covariance (identity = independent factors)
Psi = np.eye(2)

# Observed covariance from factors alone
Sigma_factors = Lambda @ Psi @ Lambda.T

# Add some idiosyncratic noise
noise_var = 0.1
Sigma_observed = Sigma_factors + noise_var * np.eye(5)

subjects = ['Math', 'Physics', 'Chemistry', 'History', 'Literature']
factors = ['Intelligence', 'Effort']

print("Factor Analysis: Test Score Correlations")
print("="*50)
print(f"\nLoading matrix Λ (how factors influence subjects):")
print(f"{'Subject':12} {'Intelligence':>12} {'Effort':>10}")
print("-"*35)
for i, subj in enumerate(subjects):
    print(f"{subj:12} {Lambda[i,0]:>12.1f} {Lambda[i,1]:>10.1f}")

Factor Analysis: Test Score Correlations
==================================================

Loading matrix Λ (how factors influence subjects):
Subject      Intelligence     Effort
-----------------------------------
Math                  0.9        0.1
Physics               0.8        0.2
Chemistry             0.7        0.3
History               0.3        0.8
Literature            0.2        0.9

# Visualize
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Loading matrix
ax1 = axes[0]
im1 = ax1.imshow(Lambda, cmap='RdBu_r', vmin=-1, vmax=1, aspect='auto')
ax1.set_xticks([0, 1])
ax1.set_xticklabels(factors)
ax1.set_yticks(range(5))
ax1.set_yticklabels(subjects)
ax1.set_title('Loading Matrix $\\Lambda$\n(Factor → Subject)', fontsize=11)
for i in range(5):
    for j in range(2):
        ax1.text(j, i, f'{Lambda[i,j]:.1f}', ha='center', va='center', fontsize=11)
plt.colorbar(im1, ax=ax1)

# Factor covariance (identity)
ax2 = axes[1]
im2 = ax2.imshow(Psi, cmap='RdBu_r', vmin=-1, vmax=1)
ax2.set_xticks([0, 1])
ax2.set_yticks([0, 1])
ax2.set_xticklabels(factors)
ax2.set_yticklabels(factors)
ax2.set_title('Factor Covariance $\\Psi$\n(Independent)', fontsize=11)
for i in range(2):
    for j in range(2):
        ax2.text(j, i, f'{Psi[i,j]:.0f}', ha='center', va='center', fontsize=12)
plt.colorbar(im2, ax=ax2)

# Resulting observed correlation
ax3 = axes[2]
# Convert to correlation for better interpretation
stds = np.sqrt(np.diag(Sigma_observed))
corr = Sigma_observed / np.outer(stds, stds)
im3 = ax3.imshow(corr, cmap='RdBu_r', vmin=-1, vmax=1)
ax3.set_xticks(range(5))
ax3.set_yticks(range(5))
ax3.set_xticklabels([s[:4] for s in subjects], rotation=45)
ax3.set_yticklabels(subjects)
ax3.set_title('Observed Correlation\n$\\Lambda\\Psi\\Lambda^T$ + noise', fontsize=11)
for i in range(5):
    for j in range(5):
        ax3.text(j, i, f'{corr[i,j]:.2f}', ha='center', va='center', fontsize=9)
plt.colorbar(im3, ax=ax3)

plt.suptitle('Factor Analysis: 2 Factors Create Complex 5×5 Correlation Structure', fontsize=14)
plt.tight_layout()
plt.show()

print("\nKey insight: Math-Physics high correlation (0.74) - both load on Intelligence")
print("History-Literature high correlation (0.75) - both load on Effort")
print("Math-Literature low correlation (0.26) - load on different factors")

Key insight: Math-Physics high correlation (0.74) - both load on Intelligence
History-Literature high correlation (0.75) - both load on Effort
Math-Literature low correlation (0.26) - load on different factors

# Demonstrate properties
np.random.seed(42)

# B is symmetric PSD (a covariance matrix)
temp = np.random.randn(3, 3)
B = temp @ temp.T  # Always PSD

# A is a non-square transformation
A = np.random.randn(2, 3)

# Compute ABA.T
result = A @ B @ A.T

print("Properties of A @ B @ A.T")
print("="*50)
print(f"\nB ({B.shape[0]}×{B.shape[1]}):")
print(B.round(3))
print(f"B is symmetric: {np.allclose(B, B.T)}")
print(f"B eigenvalues (all ≥ 0 for PSD): {np.linalg.eigvalsh(B).round(3)}")

print(f"\nA ({A.shape[0]}×{A.shape[1]}):")
print(A.round(3))

print(f"\nA @ B @ A.T ({result.shape[0]}×{result.shape[1]}):")
print(result.round(3))
print(f"Result is symmetric: {np.allclose(result, result.T)}")
print(f"Result eigenvalues: {np.linalg.eigvalsh(result).round(3)}")
print(f"Result is PSD: {all(np.linalg.eigvalsh(result) >= -1e-10)}")

Properties of A @ B @ A.T
==================================================

B (3×3):
[[0.685 0.637 0.374]
 [0.637 2.429 2.335]
 [0.374 2.335 3.303]]
B is symmetric: True
B eigenvalues (all ≥ 0 for PSD): [0.283 0.789 5.346]

A (2×3):
[[ 0.543 -0.463 -0.466]
 [ 0.242 -1.913 -1.725]]

A @ B @ A.T (2×2):
[[ 1.938  7.72 ]
 [ 7.72  33.274]]
Result is symmetric: True
Result eigenvalues: [ 0.14  35.072]
Result is PSD: True

# Example: Orthogonal transformation (rotation) preserves eigenvalues
B = np.array([[3, 1], [1, 2]])  # Original covariance

# Rotation matrix (orthogonal)
theta = np.pi/4
Q = np.array([[np.cos(theta), -np.sin(theta)],
              [np.sin(theta), np.cos(theta)]])

# Rotated covariance
B_rotated = Q @ B @ Q.T

print("Rotation Preserves Eigenvalues")
print("="*50)
print(f"\nOriginal B eigenvalues: {np.linalg.eigvalsh(B).round(4)}")
print(f"Rotated Q B Q.T eigenvalues: {np.linalg.eigvalsh(B_rotated).round(4)}")
print(f"\nEigenvalues preserved: {np.allclose(sorted(np.linalg.eigvalsh(B)), sorted(np.linalg.eigvalsh(B_rotated)))}")

Rotation Preserves Eigenvalues
==================================================

Original B eigenvalues: [1.382 3.618]
Rotated Q B Q.T eigenvalues: [1.382 3.618]

Eigenvalues preserved: True

# Visualize rotation of covariance ellipse
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

for ax, cov, title in [(axes[0], B, 'Original $\\Sigma$'),
                        (axes[1], B_rotated, f'Rotated $Q\\Sigma Q^T$ (θ={int(np.degrees(theta))}°)')]:
    # Generate samples
    samples = np.random.multivariate_normal([0, 0], cov, 500)
    ax.scatter(samples[:, 0], samples[:, 1], alpha=0.3, s=15)
    plot_covariance_ellipse(ax, [0, 0], cov, n_std=2, 
                           facecolor='blue', alpha=0.2, edgecolor='blue', linewidth=2)
    ax.set_xlim(-5, 5)
    ax.set_ylim(-5, 5)
    ax.set_aspect('equal')
    ax.grid(True, alpha=0.3)
    ax.axhline(y=0, color='black', linewidth=0.5)
    ax.axvline(x=0, color='black', linewidth=0.5)
    ax.set_title(title, fontsize=12)

plt.suptitle('Orthogonal $Q\\Sigma Q^T$: Rotates Ellipse, Preserves Eigenvalues', fontsize=14)
plt.tight_layout()
plt.show()

Pattern	Name	Meaning
$\mathbf{A} \mathbf{A}^T$	Gram matrix	Covariance when $\mathbf{B} = \mathbf{I}$
$\mathbf{Q} \mathbf{B} \mathbf{Q}^T$	Similarity transform (orthogonal)	Rotates the covariance
$\mathbf{D} \mathbf{B} \mathbf{D}^T$	Diagonal scaling	Scales variances independently

The $\mathbf{A} \mathbf{B} \mathbf{A}^T$ Pattern: Transforming Covariance¶

The Core Theorem¶

Where It Appears¶

1. Why This Pattern?¶

Real-World Scenario: Sensor Fusion for Robot Localization¶

2. Factor Analysis: Latent Variables Create Correlations¶

3. Properties of $\mathbf{A}\mathbf{B}\mathbf{A}^T$¶

4. Special Cases¶

Key Takeaways¶