import numpy as np
import matplotlib.pyplot as plt
import time

np.random.seed(42)
np.set_printoptions(precision=4, suppress=True)

def generate_covariance_matrix(n):
    """Generate a realistic gene expression covariance matrix."""
    n_pathways = 10
    pathway_loadings = np.random.randn(n, n_pathways) * 0.1
    systematic_cov = pathway_loadings @ pathway_loadings.T
    idio_var = np.random.uniform(0.01, 0.05, n)
    return systematic_cov + np.diag(idio_var)

# Generate baseline data
N = 500
Sigma = generate_covariance_matrix(N)
Sigma_inv = np.linalg.inv(Sigma)

print(f"Gene panel: {N} genes")
print(f"Baseline covariance and its inverse: pre-computed")

Gene panel: 500 genes
Baseline covariance and its inverse: pre-computed

# FOXM1 regulatory profile
# FOXM1 is a key regulator of cell proliferation — it strongly activates
# genes involved in mitosis (e.g., CCNB1, CDK1, PLK1, AURKB)

u = np.random.randn(N) * 0.01  # Weak baseline effect on most genes

# Genes 400-500 are proliferation genes — FOXM1 strongly regulates them
u[400:] = np.random.uniform(0.05, 0.15, 100)

print("FOXM1 Regulatory Profile:")
print(f"  Genes 0–399 (non-proliferation):  mean |loading| = {np.mean(np.abs(u[:400])):.4f}")
print(f"  Genes 400–499 (proliferation):    mean |loading| = {np.mean(np.abs(u[400:])):.4f}")
print(f"\n  Ratio: proliferation genes are {np.mean(np.abs(u[400:])) / np.mean(np.abs(u[:400])):.0f}x more regulated by FOXM1")

FOXM1 Regulatory Profile:
  Genes 0–399 (non-proliferation):  mean |loading| = 0.0076
  Genes 400–499 (proliferation):    mean |loading| = 0.0990

  Ratio: proliferation genes are 13x more regulated by FOXM1

def sherman_morrison(A_inv, u, v=None):
    """
    Compute (A + u @ v^T)^{-1} using the Sherman-Morrison formula.
    If v is None, computes the symmetric case (A + u @ u^T)^{-1}.
    
    Parameters:
    -----------
    A_inv : ndarray (N x N) — inverse of the original matrix
    u : ndarray (N,) — rank-1 update vector
    v : ndarray (N,) or None — second vector (defaults to u for symmetric case)
    
    Returns:
    --------
    ndarray (N x N) — inverse of (A + u @ v^T)
    """
    if v is None:
        v = u
    
    # Step 1: w = A^{-1} @ u
    w = A_inv @ u
    
    # Step 2: z = A^{-1}^T @ v  (= A^{-1} @ v if A is symmetric)
    z = v @ A_inv
    
    # Step 3: scalar denominator
    alpha = 1 + v @ w
    
    # Step 4: rank-1 correction
    return A_inv - np.outer(w, z) / alpha

print("Sherman-Morrison function defined.")
print("Handles both symmetric (u @ u^T) and general (u @ v^T) rank-1 updates.")

Sherman-Morrison function defined.
Handles both symmetric (u @ u^T) and general (u @ v^T) rank-1 updates.

# Updated covariance: adding FOXM1's effect
Sigma_new = Sigma + np.outer(u, u)

# Method 1: Direct inversion
start = time.time()
inv_direct = np.linalg.inv(Sigma_new)
time_direct = time.time() - start

# Method 2: Sherman-Morrison
start = time.time()
inv_sm = sherman_morrison(Sigma_inv, u)
time_sm = time.time() - start

# Verify
max_diff = np.max(np.abs(inv_direct - inv_sm))

print("Single TF Update (FOXM1 Discovery):")
print("=" * 50)
print(f"Direct inversion:    {time_direct*1000:.2f} ms  (O(N³))")
print(f"Sherman-Morrison:    {time_sm*1000:.2f} ms  (O(N²))")
print(f"Speedup:             {time_direct/time_sm:.1f}x")
print(f"\nMax difference:      {max_diff:.2e} (numerically identical)")

Single TF Update (FOXM1 Discovery):
==================================================
Direct inversion:    3.72 ms  (O(N³))
Sherman-Morrison:    0.92 ms  (O(N²))
Speedup:             4.0x

Max difference:      1.75e-12 (numerically identical)

def compute_partial_correlations(Theta):
    """Compute partial correlation matrix from a precision matrix."""
    d = np.sqrt(np.diag(Theta))
    P = -Theta / np.outer(d, d)
    np.fill_diagonal(P, 1.0)
    return P

# Partial correlations before and after
P_before = compute_partial_correlations(Sigma_inv)
P_after = compute_partial_correlations(inv_sm)

# Changes in partial correlations
delta_P = P_after - P_before

# Compare changes in different gene groups
triu = np.triu_indices(N, k=1)

# Mask for proliferation-proliferation pairs (both genes in 400-499)
prolif_mask = (triu[0] >= 400) & (triu[1] >= 400)
# Mask for other-other pairs (both genes in 0-399)
other_mask = (triu[0] < 400) & (triu[1] < 400)
# Mask for cross pairs
cross_mask = ~prolif_mask & ~other_mask

print("Change in |Partial Correlations| by Gene Group:")
print("=" * 55)
print(f"{'Gene pair group':<30} {'Mean |Δρ|':>10} {'Max |Δρ|':>10}")
print("-" * 55)
print(f"{'Proliferation–Proliferation':<30} {np.mean(np.abs(delta_P[triu][prolif_mask])):>10.6f} "
      f"{np.max(np.abs(delta_P[triu][prolif_mask])):>10.6f}")
print(f"{'Other–Other':<30} {np.mean(np.abs(delta_P[triu][other_mask])):>10.6f} "
      f"{np.max(np.abs(delta_P[triu][other_mask])):>10.6f}")
print(f"{'Cross (Other–Proliferation)':<30} {np.mean(np.abs(delta_P[triu][cross_mask])):>10.6f} "
      f"{np.max(np.abs(delta_P[triu][cross_mask])):>10.6f}")
print(f"\nAs expected, the largest changes are among proliferation genes (FOXM1 targets).")

Change in |Partial Correlations| by Gene Group:
=======================================================
Gene pair group                 Mean |Δρ|   Max |Δρ|
-------------------------------------------------------
Proliferation–Proliferation      0.008554   0.036003
Other–Other                      0.000057   0.000985
Cross (Other–Proliferation)      0.000702   0.006158

As expected, the largest changes are among proliferation genes (FOXM1 targets).

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot 1: FOXM1 regulatory profile
ax1 = axes[0]
ax1.bar(range(N), np.abs(u), color=['#3498db']*400 + ['#e74c3c']*100, alpha=0.7, width=1.0)
ax1.set_xlabel('Gene Index', fontsize=12)
ax1.set_ylabel('|FOXM1 Loading|', fontsize=12)
ax1.set_title('FOXM1 Regulatory Profile', fontsize=14)
ax1.axvline(x=400, color='black', linestyle='--', alpha=0.5)
ax1.text(200, ax1.get_ylim()[1]*0.9, 'Other genes', ha='center', fontsize=10, color='#3498db')
ax1.text(450, ax1.get_ylim()[1]*0.9, 'Proliferation', ha='center', fontsize=10, color='#e74c3c')
ax1.grid(True, alpha=0.3)

# Plot 2: Change in partial correlations (heatmap of a subregion)
ax2 = axes[1]
# Show genes 390-500 to see the boundary
region = slice(390, 500)
im = ax2.imshow(np.abs(delta_P[region, region]), cmap='Reds', aspect='auto')
ax2.set_xlabel('Gene Index (390–499)', fontsize=12)
ax2.set_ylabel('Gene Index (390–499)', fontsize=12)
ax2.set_title('|Change in Partial Correlation|\n(Genes 390–499)', fontsize=14)
ax2.axhline(y=10, color='white', linestyle='--', linewidth=1.5)
ax2.axvline(x=10, color='white', linestyle='--', linewidth=1.5)
plt.colorbar(im, ax=ax2, label='|Δρ|')

# Plot 3: Distribution of changes by group
ax3 = axes[2]
ax3.hist(np.abs(delta_P[triu][other_mask]), bins=50, alpha=0.5, label='Other–Other',
         color='#3498db', density=True)
ax3.hist(np.abs(delta_P[triu][prolif_mask]), bins=50, alpha=0.5, label='Prolif.–Prolif.',
         color='#e74c3c', density=True)
ax3.set_xlabel('|Change in Partial Correlation|', fontsize=12)
ax3.set_ylabel('Density', fontsize=12)
ax3.set_title('Distribution of Network Changes', fontsize=14)
ax3.legend(fontsize=11)
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Simulate discovering 5 TFs one at a time
tf_names = ['FOXM1', 'E2F1', 'MYC', 'SOX2', 'KLF4']
tf_descriptions = [
    'Cell proliferation (genes 400–499)',
    'Cell cycle entry (genes 350–449)',
    'Growth / metabolism (genes 0–99)',
    'Stemness program (genes 200–299)',
    'Epithelial differentiation (genes 100–199)'
]

# Create regulatory profiles for each TF
tf_vectors = []
for i, (name, desc) in enumerate(zip(tf_names, tf_descriptions)):
    v = np.random.randn(N) * 0.01  # Weak baseline
    start_gene = [400, 350, 0, 200, 100][i]
    v[start_gene:start_gene+100] = np.random.uniform(0.05, 0.15, 100)
    tf_vectors.append(v)

# Method 1: Sequential Sherman-Morrison updates
start = time.time()
current_inv = Sigma_inv.copy()
for v in tf_vectors:
    current_inv = sherman_morrison(current_inv, v)
time_sequential = time.time() - start

# Method 2: Direct inversion of the final matrix
Sigma_all = Sigma.copy()
for v in tf_vectors:
    Sigma_all += np.outer(v, v)

start = time.time()
inv_direct_all = np.linalg.inv(Sigma_all)
time_direct_all = time.time() - start

# Verify equivalence
max_diff = np.max(np.abs(current_inv - inv_direct_all))

print("Sequential Discovery of 5 Transcription Factors:")
print("=" * 55)
for name, desc in zip(tf_names, tf_descriptions):
    print(f"  {name:<8} — {desc}")
print(f"\n{'Method':<30} {'Time':>10}")
print("-" * 45)
print(f"{'5x Sherman-Morrison':<30} {time_sequential*1000:>8.2f} ms")
print(f"{'Direct inversion (final)':<30} {time_direct_all*1000:>8.2f} ms")
print(f"{'Speedup':<30} {time_direct_all/time_sequential:>8.1f}x")
print(f"\nMax difference: {max_diff:.2e} (numerically identical)")

Sequential Discovery of 5 Transcription Factors:
=======================================================
  FOXM1    — Cell proliferation (genes 400–499)
  E2F1     — Cell cycle entry (genes 350–449)
  MYC      — Growth / metabolism (genes 0–99)
  SOX2     — Stemness program (genes 200–299)
  KLF4     — Epithelial differentiation (genes 100–199)

Method                               Time
---------------------------------------------
5x Sherman-Morrison                3.83 ms
Direct inversion (final)           3.22 ms
Speedup                             0.8x

Max difference: 9.77e-13 (numerically identical)

# Visualize cumulative network changes as TFs are added one by one
fig, axes = plt.subplots(1, 5, figsize=(22, 4))

current_inv = Sigma_inv.copy()
P_baseline = compute_partial_correlations(Sigma_inv)

for idx, (v, name, ax) in enumerate(zip(tf_vectors, tf_names, axes)):
    current_inv = sherman_morrison(current_inv, v)
    P_current = compute_partial_correlations(current_inv)
    delta = np.abs(P_current - P_baseline)
    
    im = ax.imshow(delta, cmap='Reds', aspect='auto', vmin=0, vmax=delta.max())
    ax.set_title(f'After +{name}\n({idx+1} TF{"s" if idx > 0 else ""})', fontsize=11)
    ax.set_xlabel('Gene', fontsize=9)
    if idx == 0:
        ax.set_ylabel('Gene', fontsize=9)
    plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)

plt.suptitle('Cumulative Change in |Partial Correlations| as TFs Are Discovered', 
             fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

# Demonstrate that the denominator is always > 1 for symmetric updates
denominators = []
for _ in range(1000):
    u_test = np.random.randn(N) * 0.05
    denom = 1 + u_test @ Sigma_inv @ u_test
    denominators.append(denom)

denominators = np.array(denominators)

print("Denominator (1 + u^T Sigma^{-1} u) over 1000 random TF profiles:")
print(f"  Min:  {denominators.min():.4f}")
print(f"  Mean: {denominators.mean():.4f}")
print(f"  Max:  {denominators.max():.4f}")
print(f"\n  Always > 1? {(denominators > 1).all()}")
print("  -> Sherman-Morrison is always numerically stable for covariance updates.")

Denominator (1 + u^T Sigma^{-1} u) over 1000 random TF profiles:
  Min:  39.7958
  Mean: 50.1533
  Max:  62.0117

  Always > 1? True
  -> Sherman-Morrison is always numerically stable for covariance updates.

Woodbury (rank-$D$)	Sherman-Morrison (rank-1)
$X$ is $N \times D$	$u$ is $N \times 1$
$I_D + X^T\Sigma^{-1}X$ is $D \times D$	$1 + u^T\Sigma^{-1}u$ is a scalar
Requires inverting a $D \times D$ matrix	No matrix inversion needed — just divide by a scalar

Property	Detail
Complexity	$O(N^2)$ — two matrix-vector products + one outer product
Storage	Only need $A^{-1}$ and the update vector $u$
Stability	Always stable for symmetric PSD updates ($uu^T$)
Composable	Can apply multiple rank-1 updates sequentially

Sherman-Morrison Formula: Single Transcription Factor Discovery¶

Real-World Scenario: Updating a Gene Network When a New TF is Found¶

Deriving Sherman-Morrison from the Woodbury Formula¶

The Sherman-Morrison Formula¶

Scenario: Discovering a New Transcription Factor¶

Implementation: Sherman-Morrison Step by Step¶

Impact on the Gene Regulatory Network¶

Sequential Rank-1 Updates: Discovering TFs One at a Time¶

When Does Sherman-Morrison Fail?¶

Summary¶

The Sherman-Morrison Formula¶

Key Properties¶

Biology Takeaway¶