Matrix-Vector Multiplication: Geometric Transformations¶
When a matrix multiplies a vector, it transforms that vector:
$$\mathbf{y} = \mathbf{A} \mathbf{x}$$
This is one of the most powerful ideas in linear algebra. Matrices can:
- Rotate vectors
- Scale (stretch/shrink) vectors
- Shear (skew) vectors
- Project vectors onto subspaces
- Reflect vectors
Understanding this geometrically is the key to intuition about covariance transformations, neural networks, and much more.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch, Circle, Polygon
from matplotlib.collections import PatchCollection
np.random.seed(42)
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11
1. Two Ways to Think About $\mathbf{A}\mathbf{x}$¶
View 1: Row-by-Row (Dot Products)¶
Each element of the output is a dot product of a row of $\mathbf{A}$ with $\mathbf{x}$:
$$\begin{pmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{pmatrix} \begin{pmatrix} x_1 \\ x_2 \end{pmatrix} = \begin{pmatrix} a_{11}x_1 + a_{12}x_2 \\ a_{21}x_1 + a_{22}x_2 \end{pmatrix}$$
View 2: Column-by-Column (Linear Combination)¶
The output is a weighted sum of columns of $\mathbf{A}$:
$$\mathbf{A}\mathbf{x} = x_1 \begin{pmatrix} a_{11} \\ a_{21} \end{pmatrix} + x_2 \begin{pmatrix} a_{12} \\ a_{22} \end{pmatrix}$$
# Both views give the same result
A = np.array([[2, 1],
[1, 3]])
x = np.array([3, 2])
# View 1: Row dot products
y1 = np.array([A[0] @ x, A[1] @ x])
# View 2: Column linear combination
y2 = x[0] * A[:, 0] + x[1] * A[:, 1]
# Standard way
y = A @ x
print("Matrix A:")
print(A)
print(f"\nVector x = {x}")
print(f"\nView 1 (row dot products):")
print(f" y[0] = [2, 1] · [3, 2] = 2×3 + 1×2 = {y1[0]}")
print(f" y[1] = [1, 3] · [3, 2] = 1×3 + 3×2 = {y1[1]}")
print(f"\nView 2 (column combination):")
print(f" y = 3×[2, 1] + 2×[1, 3]")
print(f" = [6, 3] + [2, 6]")
print(f" = {y2}")
print(f"\nResult: y = {y}")
Matrix A:
[[2 1]
[1 3]]
Vector x = [3 2]
View 1 (row dot products):
y[0] = [2, 1] · [3, 2] = 2×3 + 1×2 = 8
y[1] = [1, 3] · [3, 2] = 1×3 + 3×2 = 9
View 2 (column combination):
y = 3×[2, 1] + 2×[1, 3]
= [6, 3] + [2, 6]
= [8 9]
Result: y = [8 9]
# Visualize the column combination view
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Left: Show the column vectors and their weighted sum
ax1 = axes[0]
col1 = A[:, 0]
col2 = A[:, 1]
# Draw column vectors (unscaled)
ax1.arrow(0, 0, col1[0], col1[1], head_width=0.15, head_length=0.1,
fc='blue', ec='blue', linewidth=1.5, alpha=0.5, label='Column 1')
ax1.arrow(0, 0, col2[0], col2[1], head_width=0.15, head_length=0.1,
fc='red', ec='red', linewidth=1.5, alpha=0.5, label='Column 2')
# Draw scaled column vectors
scaled1 = x[0] * col1
scaled2 = x[1] * col2
ax1.arrow(0, 0, scaled1[0], scaled1[1], head_width=0.2, head_length=0.12,
fc='blue', ec='blue', linewidth=2.5, label=f'{x[0]}×Col1')
ax1.arrow(0, 0, scaled2[0], scaled2[1], head_width=0.2, head_length=0.12,
fc='red', ec='red', linewidth=2.5, label=f'{x[1]}×Col2')
# Draw the sum (shift scaled2 to tip of scaled1)
ax1.arrow(scaled1[0], scaled1[1], scaled2[0], scaled2[1], head_width=0.15, head_length=0.1,
fc='red', ec='red', linewidth=1.5, linestyle='--', alpha=0.7)
ax1.arrow(0, 0, y[0], y[1], head_width=0.25, head_length=0.15,
fc='green', ec='green', linewidth=3, label='Result y = Ax')
ax1.set_xlim(-1, 10)
ax1.set_ylim(-1, 10)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='black', linewidth=0.5)
ax1.axvline(x=0, color='black', linewidth=0.5)
ax1.legend(loc='upper left')
ax1.set_title('Ax = $x_1$(Column 1) + $x_2$(Column 2)', fontsize=12)
# Right: Show transformation of input vector
ax2 = axes[1]
ax2.arrow(0, 0, x[0], x[1], head_width=0.2, head_length=0.12,
fc='purple', ec='purple', linewidth=2.5, label='Input x')
ax2.arrow(0, 0, y[0], y[1], head_width=0.25, head_length=0.15,
fc='green', ec='green', linewidth=3, label='Output y = Ax')
# Draw a curved arrow showing transformation
ax2.annotate('', xy=(y[0]*0.7, y[1]*0.7), xytext=(x[0]*0.9, x[1]*0.9),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.3',
color='gray', lw=2))
ax2.text(4, 4, 'A transforms', fontsize=11, color='gray')
ax2.set_xlim(-1, 10)
ax2.set_ylim(-1, 10)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.axhline(y=0, color='black', linewidth=0.5)
ax2.axvline(x=0, color='black', linewidth=0.5)
ax2.legend(loc='upper left')
ax2.set_title('Matrix A Transforms Vector x', fontsize=12)
plt.suptitle('Two Views of Matrix-Vector Multiplication', fontsize=14)
plt.tight_layout()
plt.show()
2. Geometric Transformations¶
Different matrices create different geometric effects. Let's visualize each type.
def plot_transformation(ax, A, title, original_color='blue', transformed_color='red'):
"""Plot how a matrix transforms a set of points."""
# Create a grid of points (unit circle + some radial lines)
theta = np.linspace(0, 2*np.pi, 100)
circle = np.array([np.cos(theta), np.sin(theta)])
# Transform the circle
transformed = A @ circle
# Plot original and transformed
ax.plot(circle[0], circle[1], color=original_color, linewidth=2, alpha=0.5, label='Original')
ax.plot(transformed[0], transformed[1], color=transformed_color, linewidth=2, label='Transformed')
# Plot basis vectors and their transforms
e1 = np.array([1, 0])
e2 = np.array([0, 1])
ax.arrow(0, 0, e1[0]*0.9, e1[1], head_width=0.1, fc=original_color, ec=original_color, alpha=0.5)
ax.arrow(0, 0, e2[0], e2[1]*0.9, head_width=0.1, fc=original_color, ec=original_color, alpha=0.5)
Ae1 = A @ e1
Ae2 = A @ e2
ax.arrow(0, 0, Ae1[0]*0.9, Ae1[1]*0.9, head_width=0.12, fc=transformed_color, ec=transformed_color)
ax.arrow(0, 0, Ae2[0]*0.9, Ae2[1]*0.9, head_width=0.12, fc=transformed_color, ec=transformed_color)
ax.set_xlim(-2.5, 2.5)
ax.set_ylim(-2.5, 2.5)
ax.set_aspect('equal')
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='black', linewidth=0.5)
ax.axvline(x=0, color='black', linewidth=0.5)
ax.set_title(title, fontsize=11)
ax.legend(loc='upper left', fontsize=8)
# Different types of transformations
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
# 1. Scaling
A_scale = np.array([[2, 0], [0, 0.5]])
plot_transformation(axes[0, 0], A_scale, 'Scaling\n(stretch x by 2, shrink y by 0.5)')
# 2. Rotation (45 degrees)
theta = np.pi/4
A_rotate = np.array([[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
plot_transformation(axes[0, 1], A_rotate, 'Rotation (45°)')
# 3. Shear
A_shear = np.array([[1, 0.5], [0, 1]])
plot_transformation(axes[0, 2], A_shear, 'Shear\n(horizontal shear)')
# 4. Reflection
A_reflect = np.array([[1, 0], [0, -1]])
plot_transformation(axes[1, 0], A_reflect, 'Reflection\n(across x-axis)')
# 5. Projection
A_project = np.array([[1, 0], [0, 0]])
plot_transformation(axes[1, 1], A_project, 'Projection\n(onto x-axis)')
# 6. Combined (rotation + scaling)
A_combined = A_rotate @ np.array([[1.5, 0], [0, 0.7]])
plot_transformation(axes[1, 2], A_combined, 'Combined\n(rotate + scale = ellipse)')
plt.suptitle('Matrix Transformations: How Matrices Reshape Space', fontsize=14)
plt.tight_layout()
plt.show()
Real-World Scenario: Robot Arm Kinematics¶
Scenario: You're programming a 2D robot arm. The arm has two joints that can rotate. Each joint's position is computed using rotation matrices!
- Joint 1 rotates by angle $\theta_1$ from the base
- Joint 2 rotates by angle $\theta_2$ from Joint 1
The end effector position is found by chaining matrix transformations.
def rotation_matrix(theta):
"""2D rotation matrix for angle theta (radians)."""
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def plot_robot_arm(theta1, theta2, L1=1.0, L2=0.8):
"""Plot a 2-link robot arm."""
# Base is at origin
base = np.array([0, 0])
# Joint 1: rotate arm1 direction by theta1
R1 = rotation_matrix(theta1)
arm1_direction = np.array([L1, 0]) # Initially pointing right
arm1_end = base + R1 @ arm1_direction # = joint2 position
# Joint 2: rotate from joint1's frame by theta2
R2 = rotation_matrix(theta1 + theta2) # Total rotation
arm2_direction = np.array([L2, 0])
arm2_end = arm1_end + R2 @ arm2_direction # = end effector
return base, arm1_end, arm2_end
# Show robot arm at different configurations
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
configs = [
(0, 0, "Straight (θ₁=0°, θ₂=0°)"),
(np.pi/4, np.pi/3, "Bent (θ₁=45°, θ₂=60°)"),
(np.pi/2, -np.pi/2, "Folded (θ₁=90°, θ₂=-90°)")
]
for ax, (t1, t2, title) in zip(axes, configs):
base, joint, end = plot_robot_arm(t1, t2)
# Draw arm segments
ax.plot([base[0], joint[0]], [base[1], joint[1]], 'b-', linewidth=8, solid_capstyle='round')
ax.plot([joint[0], end[0]], [joint[1], end[1]], 'r-', linewidth=6, solid_capstyle='round')
# Draw joints
ax.scatter([base[0]], [base[1]], s=200, c='black', zorder=5)
ax.scatter([joint[0]], [joint[1]], s=150, c='darkblue', zorder=5)
ax.scatter([end[0]], [end[1]], s=100, c='darkred', zorder=5)
ax.set_xlim(-2, 2)
ax.set_ylim(-0.5, 2)
ax.set_aspect('equal')
ax.grid(True, alpha=0.3)
ax.set_title(f'{title}\nEnd position: ({end[0]:.2f}, {end[1]:.2f})', fontsize=11)
ax.axhline(y=0, color='gray', linewidth=2) # Ground
plt.suptitle('Robot Arm: Position = Chain of Rotation Matrices', fontsize=14)
plt.tight_layout()
plt.show()
# Show the workspace (reachable positions)
fig, ax = plt.subplots(figsize=(8, 8))
# Sample many configurations
n_samples = 50
theta1_range = np.linspace(-np.pi/2, np.pi, n_samples)
theta2_range = np.linspace(-np.pi, np.pi, n_samples)
end_positions = []
for t1 in theta1_range:
for t2 in theta2_range:
_, _, end = plot_robot_arm(t1, t2)
end_positions.append(end)
end_positions = np.array(end_positions)
ax.scatter(end_positions[:, 0], end_positions[:, 1], s=2, alpha=0.5, c='blue')
# Draw a sample arm
base, joint, end = plot_robot_arm(np.pi/3, np.pi/4)
ax.plot([base[0], joint[0]], [base[1], joint[1]], 'g-', linewidth=5, solid_capstyle='round')
ax.plot([joint[0], end[0]], [joint[1], end[1]], 'orange', linewidth=4, solid_capstyle='round')
ax.scatter([base[0]], [base[1]], s=200, c='black', zorder=5, label='Base')
ax.scatter([end[0]], [end[1]], s=150, c='red', zorder=5, label='End effector')
ax.set_xlim(-2.2, 2.2)
ax.set_ylim(-2.2, 2.2)
ax.set_aspect('equal')
ax.grid(True, alpha=0.3)
ax.set_title('Robot Arm Workspace\n(all reachable positions)', fontsize=12)
ax.legend()
plt.tight_layout()
plt.show()
print("Key insight: Each joint rotation is a matrix multiplication!")
print("The end position = Base + R(θ₁)·arm1 + R(θ₁+θ₂)·arm2")
Key insight: Each joint rotation is a matrix multiplication! The end position = Base + R(θ₁)·arm1 + R(θ₁+θ₂)·arm2
3. Columns Show Where Basis Vectors Go¶
Key insight: The columns of $\mathbf{A}$ show where the standard basis vectors land:
- Column 1 = where $\mathbf{e}_1 = [1, 0]^T$ goes
- Column 2 = where $\mathbf{e}_2 = [0, 1]^T$ goes
This completely determines the transformation!
# Demonstrate: columns are transformed basis vectors
A = np.array([[2, -1],
[1, 1.5]])
e1 = np.array([1, 0])
e2 = np.array([0, 1])
print(f"Matrix A:")
print(A)
print(f"\nWhere do basis vectors go?")
print(f" A @ e1 = A @ [1,0] = {A @ e1} ← This is column 1 of A!")
print(f" A @ e2 = A @ [0,1] = {A @ e2} ← This is column 2 of A!")
print(f"\nSo the columns of A literally tell you where [1,0] and [0,1] end up.")
Matrix A: [[ 2. -1. ] [ 1. 1.5]] Where do basis vectors go? A @ e1 = A @ [1,0] = [2. 1.] ← This is column 1 of A! A @ e2 = A @ [0,1] = [-1. 1.5] ← This is column 2 of A! So the columns of A literally tell you where [1,0] and [0,1] end up.
# Visualize this insight
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Before transformation
ax1 = axes[0]
ax1.arrow(0, 0, 0.9, 0, head_width=0.1, head_length=0.08, fc='blue', ec='blue', linewidth=2.5)
ax1.arrow(0, 0, 0, 0.9, head_width=0.1, head_length=0.08, fc='red', ec='red', linewidth=2.5)
ax1.text(1.1, 0, '$\mathbf{e}_1 = [1, 0]$', fontsize=11, color='blue')
ax1.text(0.1, 1.1, '$\mathbf{e}_2 = [0, 1]$', fontsize=11, color='red')
# Draw unit square
square = np.array([[0, 1, 1, 0, 0], [0, 0, 1, 1, 0]])
ax1.plot(square[0], square[1], 'g--', linewidth=1.5, alpha=0.7)
ax1.fill(square[0], square[1], alpha=0.1, color='green')
ax1.set_xlim(-0.5, 2.5)
ax1.set_ylim(-0.5, 2.5)
ax1.set_aspect('equal')
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='black', linewidth=0.5)
ax1.axvline(x=0, color='black', linewidth=0.5)
ax1.set_title('Before: Standard Basis', fontsize=12)
# After transformation
ax2 = axes[1]
Ae1 = A @ e1
Ae2 = A @ e2
ax2.arrow(0, 0, Ae1[0]*0.9, Ae1[1]*0.9, head_width=0.12, head_length=0.1, fc='blue', ec='blue', linewidth=2.5)
ax2.arrow(0, 0, Ae2[0]*0.9, Ae2[1]*0.9, head_width=0.12, head_length=0.1, fc='red', ec='red', linewidth=2.5)
ax2.text(Ae1[0]+0.15, Ae1[1], f'$A\mathbf{{e}}_1 = {list(Ae1)}$', fontsize=11, color='blue')
ax2.text(Ae2[0]+0.15, Ae2[1]+0.1, f'$A\mathbf{{e}}_2 = {list(Ae2)}$', fontsize=11, color='red')
# Draw transformed square (parallelogram)
transformed_square = A @ square
ax2.plot(transformed_square[0], transformed_square[1], 'g--', linewidth=1.5, alpha=0.7)
ax2.fill(transformed_square[0], transformed_square[1], alpha=0.1, color='green')
ax2.set_xlim(-1.5, 3)
ax2.set_ylim(-0.5, 3)
ax2.set_aspect('equal')
ax2.grid(True, alpha=0.3)
ax2.axhline(y=0, color='black', linewidth=0.5)
ax2.axvline(x=0, color='black', linewidth=0.5)
ax2.set_title('After: Columns of A', fontsize=12)
plt.suptitle('Matrix Columns = Where Basis Vectors Land', fontsize=14)
plt.tight_layout()
plt.show()
<>:8: SyntaxWarning: invalid escape sequence '\m'
<>:9: SyntaxWarning: invalid escape sequence '\m'
<>:31: SyntaxWarning: invalid escape sequence '\m'
<>:32: SyntaxWarning: invalid escape sequence '\m'
<>:8: SyntaxWarning: invalid escape sequence '\m'
<>:9: SyntaxWarning: invalid escape sequence '\m'
<>:31: SyntaxWarning: invalid escape sequence '\m'
<>:32: SyntaxWarning: invalid escape sequence '\m'
/var/folders/34/4mb6rzb52l76jcqm_pjx3fph0000gn/T/ipykernel_51138/1821629422.py:8: SyntaxWarning: invalid escape sequence '\m'
ax1.text(1.1, 0, '$\mathbf{e}_1 = [1, 0]$', fontsize=11, color='blue')
/var/folders/34/4mb6rzb52l76jcqm_pjx3fph0000gn/T/ipykernel_51138/1821629422.py:9: SyntaxWarning: invalid escape sequence '\m'
ax1.text(0.1, 1.1, '$\mathbf{e}_2 = [0, 1]$', fontsize=11, color='red')
/var/folders/34/4mb6rzb52l76jcqm_pjx3fph0000gn/T/ipykernel_51138/1821629422.py:31: SyntaxWarning: invalid escape sequence '\m'
ax2.text(Ae1[0]+0.15, Ae1[1], f'$A\mathbf{{e}}_1 = {list(Ae1)}$', fontsize=11, color='blue')
/var/folders/34/4mb6rzb52l76jcqm_pjx3fph0000gn/T/ipykernel_51138/1821629422.py:32: SyntaxWarning: invalid escape sequence '\m'
ax2.text(Ae2[0]+0.15, Ae2[1]+0.1, f'$A\mathbf{{e}}_2 = {list(Ae2)}$', fontsize=11, color='red')
4. Special Matrices and Their Effects¶
| Matrix Type | Form | Effect |
|---|---|---|
| Identity | $\mathbf{I}$ | No change |
| Diagonal | $\text{diag}(d_1, d_2)$ | Scale each axis independently |
| Rotation | $\begin{pmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{pmatrix}$ | Rotate by angle $\theta$ |
| Symmetric | $\mathbf{A} = \mathbf{A}^T$ | Scale along orthogonal eigenvector directions |
# Symmetric matrices scale along eigenvector directions
A_symmetric = np.array([[2, 1], [1, 2]]) # Symmetric!
# Find eigenvectors (principal directions)
eigenvalues, eigenvectors = np.linalg.eigh(A_symmetric)
print("Symmetric Matrix A:")
print(A_symmetric)
print(f"\nEigenvalues: {eigenvalues}")
print(f"Eigenvector 1: {eigenvectors[:, 0].round(3)} (scaled by {eigenvalues[0]})")
print(f"Eigenvector 2: {eigenvectors[:, 1].round(3)} (scaled by {eigenvalues[1]})")
print(f"\nEigenvectors are orthogonal: v1·v2 = {eigenvectors[:, 0] @ eigenvectors[:, 1]:.6f}")
Symmetric Matrix A: [[2 1] [1 2]] Eigenvalues: [1. 3.] Eigenvector 1: [-0.707 0.707] (scaled by 1.0) Eigenvector 2: [0.707 0.707] (scaled by 3.0) Eigenvectors are orthogonal: v1·v2 = 0.000000
# Visualize symmetric matrix transformation
fig, ax = plt.subplots(figsize=(8, 8))
# Plot transformation of unit circle
theta = np.linspace(0, 2*np.pi, 100)
circle = np.array([np.cos(theta), np.sin(theta)])
ellipse = A_symmetric @ circle
ax.plot(circle[0], circle[1], 'b-', linewidth=2, alpha=0.5, label='Original circle')
ax.plot(ellipse[0], ellipse[1], 'r-', linewidth=2, label='Transformed (ellipse)')
# Plot eigenvectors (they show the axes of the ellipse!)
v1, v2 = eigenvectors[:, 0], eigenvectors[:, 1]
ax.arrow(0, 0, v1[0]*0.9, v1[1]*0.9, head_width=0.1, fc='green', ec='green', linewidth=2)
ax.arrow(0, 0, v2[0]*0.9, v2[1]*0.9, head_width=0.1, fc='purple', ec='purple', linewidth=2)
# Show where eigenvectors go (they just get scaled!)
Av1 = A_symmetric @ v1
Av2 = A_symmetric @ v2
ax.arrow(0, 0, Av1[0]*0.9, Av1[1]*0.9, head_width=0.15, fc='green', ec='green',
linewidth=3, alpha=0.7, label=f'λ₁={eigenvalues[0]:.1f} × eigenvec 1')
ax.arrow(0, 0, Av2[0]*0.9, Av2[1]*0.9, head_width=0.15, fc='purple', ec='purple',
linewidth=3, alpha=0.7, label=f'λ₂={eigenvalues[1]:.1f} × eigenvec 2')
ax.set_xlim(-4, 4)
ax.set_ylim(-4, 4)
ax.set_aspect('equal')
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='black', linewidth=0.5)
ax.axvline(x=0, color='black', linewidth=0.5)
ax.legend(loc='upper left')
ax.set_title('Symmetric Matrix: Scales Along Eigenvector Axes\n(Circle → Ellipse with axes = eigenvectors)', fontsize=12)
plt.tight_layout()
plt.show()
Key Takeaways¶
Matrix-vector multiplication $\mathbf{Ax}$ transforms vector $\mathbf{x}$
Two equivalent views:
- Row view: each output element is a dot product
- Column view: output is a linear combination of columns
Columns of A show where basis vectors land
- Column $j$ = $\mathbf{A} \mathbf{e}_j$
- This completely determines the transformation
Geometric effects:
- Rotation: preserves lengths and angles
- Scaling: stretches/shrinks along axes
- Symmetric matrices: scale along orthogonal eigenvector directions
Applications: Robot kinematics, graphics, neural networks, coordinate transforms
Next: We'll explore matrix-matrix multiplication — composing transformations.