using LinearAlgebra
using TestImages
using LaTeXStrings
using Plots
import Random

function inverse_iteration(A, μ, x₀; tol=1e-12, maxiter=100)
    x = copy(x₀)
    ### BEGIN SOLUTION
    M = factorize(A-μ*I)
    niter = 0
    while niter < maxiter
        niter += 1
        x .= M\x
        norm2_x = x'x
        norm_x = √norm2_x
        x /= norm_x
        λ = x'A*x
        (norm(A*x - λ*x) ≤ tol) && return (λ,x,niter)
    end
    ### END SOLUTION
    return nothing
end

inverse_iteration (generic function with 1 method)

function rayleigh_iteration(A, μ₀, x₀; tol=1e-12, maxiter=100)
    x = copy(x₀)
    μ = μ₀
    niter = 0
    ### BEGIN SOLUTION
    while niter < maxiter
        niter += 1
        x .= (A - μ*I) \ x
        norm2_x = x'x
        norm_x = √norm2_x
        x /= norm_x
        μ = x'A*x
        (norm(A*x - μ*x) ≤ tol) && return (μ,x,niter)
    end
    ### END SOLUTION
    return nothing
end

rayleigh_iteration (generic function with 1 method)

# Test code
Random.seed!(2025)
N = 1000
A = randn(N,N); A = A*A'/N
x₀ = randn(N); x₀ /= √(x₀'x₀)
μ = 2.0
ε = 1e-12

@time result = inverse_iteration(A,μ,x₀;tol=ε)
result === nothing ? println("Inverse iteration did not converge.") : begin λ,x,n = result ; println("Inverse iteration: eigenvalue $(λ) found in $n iterations.") end
@time result = rayleigh_iteration(A,μ,x₀;tol=ε)
result === nothing ? println("Rayleigh iteration did not converge.") : begin λ_r,x_r,n_r = result ; println("Rayleigh quotient iteration: eigenvalue $(λ_r) found in $n_r iterations.") end

### BEGIN HIDDEN TESTS
if abs(λ-λ_r) < 1/N^2 # A has about N eigenvalues in [0,4]
    @assert (abs(λ-λ_r) ≤ 2ε)
    @assert all(abs.((x'A*x_r)/(x'x_r) .- λ) .≤ ε)
end

@assert n_r ≤ n
### END HIDDEN TESTS

  1.210942 seconds (3.60 M allocations: 210.233 MiB, 1.70% gc time, 96.38% compilation time)
Inverse iteration: eigenvalue 1.9983782746955578 found in 27 iterations.
  0.170488 seconds (20.56 k allocations: 108.258 MiB, 18.93% gc time, 22.85% compilation time)
Rayleigh quotient iteration: eigenvalue 1.9873152321450813 found in 7 iterations.

A = randn(20000,20000)
    A[1:20000,1] === A[1:20000,1]
    # false

@views function myQR(M)
    n,p = size(M)

    @assert p <= n "Error: p > n"

    Q = zero(M)
    R = zeros(eltype(M),p,p)

    ### BEGIN SOLUTION

    for k=1:p
        m = view(M, :, k)
        Q̃ = view(Q, :, 1:k-1)
        r = view(R, 1:k-1, k)
        q = view(Q, :, k)
        
        r .= Q̃'m
        q .= m - Q̃*r
        α = sqrt(q'q)
        q ./= α
        R[k,k] = α
    end

    ### END SOLUTION

    return Q,R
end

myQR (generic function with 1 method)

# Test code
M = randn(ComplexF64,100,50)
M = big.(M)
Q,R = myQR(M)
@show norm(Q*R-M)

### BEGIN HIDDEN TESTS
@assert norm(Q*R-M) < 1e-50
@assert norm(Q'Q-I) < 1e-50
@assert all(isreal.(diag(R)))
@assert all(real.(diag(R)) .>= 0)
### END HIDDEN TESTS

norm(Q * R - M) = 1.036416049574850419425422173720991419736827080356812444774911952379079269252268e-75

function myEigen(B, p, niter)

    ### BEGIN SOLUTION
    n = size(B, 1)
    X = randn(n, p)
    R = zeros(p,p)
    for i in 1:niter
        X, R = myQR(B*X)
    end
    λs = diag(X'*B*X)
    ### END SOLUTION

    return λs, X
end

myEigen (generic function with 1 method)

@show myEigen([1. 2.; 2. 1.], 2, 100)[1]
@show myEigen([1. 2.; 2. 1.], 2, 100)[2]

### BEGIN HIDDEN TESTS
@assert begin n = 2; A = randn(n, n); myEigen(A'A, n, 100)[1] ≈ reverse(eigen(A'A).values) end
@assert begin n = 4; A = randn(n, n); myEigen(A'A, n, 100)[1] ≈ reverse(eigen(A'A).values) end
@assert begin A = randn(5, 5); q, r = qr(A); B = q*Diagonal(1:5)*q'; myEigen(B, 3, 100)[1] ≈ [5; 4; 3] end
### END HIDDEN TESTS

(myEigen([1.0 2.0; 2.0 1.0], 2, 100))[1] = [3.0, -1.0]
(myEigen([1.0 2.0; 2.0 1.0], 2, 100))[2] = [-0.7071067811865475 0.7071067811865476; -0.7071067811865475 -0.7071067811865476]

function mySVD(B, p, niter)

    ### BEGIN SOLUTION
    n = size(B, 1)
    λ₁, U = myEigen(B*B', p, niter)
    λ₂, V = myEigen(B'B, p, niter)
    σs = U'B*V
    ### END SOLUTION
    return diag(σs), U, V
end

mySVD (generic function with 1 method)

n = 10
B = randn(n, n)
σs, U, V = mySVD(B, n, 1000)
@assert norm(U'U - I(n)) < 1e-10
@assert norm(V'V - I(n)) < 1e-10
@assert norm(U*Diagonal(σs)*V' - B) < 1e-10

A = testimage("woman_darkhair")

# Convert image to matrix of Float64
M = Float64.(A)
n = size(M,1)

# Function to plot a grayscale image from the matrix
# containing the intensity values of all the pixels
function plot_matrix(B, p)
    plot(Gray.(B), ticks=false, showaxis=false, title="p=$p")
end

plots = typeof(plot())[]

for p in [5,10,20,30]
    niter = 100
    σs, U, V = mySVD(M, p, niter)
    println("p = $p, compression ratio = ", ((2n+1)*p)/(n^2))
    push!(plots,plot_matrix(U*Diagonal(σs)*V', p))
end

push!(plots, plot_matrix(M, "n"))
plot(plots...)

p = 5, compression ratio = 0.019550323486328125
p = 10, compression ratio = 0.03910064697265625
p = 20, compression ratio = 0.0782012939453125
p = 30, compression ratio = 0.11730194091796875

import Downloads
import Tar

# URL where data can be downloaded
url = "https://urbain.vaes.uk/static/wikidata.tar"

# Download the data
filename = "wikidata.tar"
isfile(filename) || Downloads.download(url, filename)

# Extract data into directory `wikidata`
directoryname = "wikidata"
isdir(directoryname) || Tar.extract(filename, directoryname)

true

import CSV
import DataFrames

# Read nodes and edges into data frames
nodes_dataframe = CSV.read("$directoryname/names.csv", DataFrames.DataFrame)
edges_dataframe = CSV.read("$directoryname/edges.csv", DataFrames.DataFrame)

# Convert data to matrices
nodes = Matrix(nodes_dataframe)
edges = Matrix(edges_dataframe)

# The data structures should be self-explanatory
edges_dataframe

import Base.*

struct MySparseMatrix
    rows::Vector{Int}
    cols::Vector{Int}
    vals::Vector{Float64}
    m::Int
    n::Int
end

MySparseMatrix(R::Vector{Int},C::Vector{Int},V::Vector{Float64}) = MySparseMatrix(R,C,V,maximum(R),maximum(C))

@inbounds function *(M::MySparseMatrix, X::Vector{Float64})
    @assert size(X, 1) == M.n "Incompatible dimensions: M has $(M.n) columns but X has $(length(X)) rows."
    ### BEGIN SOLUTION
    Y = zeros(M.m)
    for (i,j,v) = zip(M.rows, M.cols, M.vals)
        Y[i] += v*X[j]
    end
    return Y
    ### END SOLUTION
end

* (generic function with 318 methods)

m, n = 4, 3
R = [2, 2, 2, 3, 3]
C = [1, 2, 3, 1, 3]
V = [5., 6., 7., 8., 9.]
A = MySparseMatrix(R, C, V, m, n)
b = [1.; 1.; 1.]
@assert A*b == [0.; 18.; 17.; 0.] "Multiplication does not work!"

nn, ne = length(nodes), size(edges, 1)

### BEGIN SOLUTION
# Count the number of outbound edges for each node
n_outbound = zeros(Int, nn)
for e in eachrow(edges)
    n_outbound[e[1]] += 1
end

# Build matrix
R, C = edges[:, 1], edges[:, 2]
V = 1 ./ n_outbound[R]
### END SOLUTION

M = MySparseMatrix(C, R, V)

MySparseMatrix([1, 2, 2, 2, 2, 2, 2, 3, 3, 3  …  29807, 104616, 120348, 198551, 116918, 190926, 112376, 108047, 84697, 154081], [175973, 130880, 145856, 159190, 159200, 159207, 159431, 4, 5, 6887  …  30983, 76929, 86508, 128270, 152419, 156536, 170697, 177550, 181302, 189293], [0.025, 0.007936507936507936, 0.012658227848101266, 0.0625, 0.017543859649122806, 0.02564102564102564, 0.0037174721189591076, 0.07692307692307693, 0.08333333333333333, 0.0125  …  0.1, 0.5, 1.0, 1.0, 1.0, 0.1, 0.1, 0.1, 1.0, 1.0], 199903, 199903)

function power_iteration(M, x; ε=1e-12, maxiter=1000)
    ### BEGIN SOLUTION
    niter = 0
    while niter < maxiter
        niter += 1
        x = x / √(x'x)
        Mx = M*x
        λ = x'Mx
        e = norm(Mx - λ*x)
        x = Mx
        e ≤ ε && return x
    end
    return nothing
    ### END SOLUTION
    # Return only the eigenvector, not the eigenvalue
end

power_iteration (generic function with 1 method)

@assert [1, -1]'power_iteration([1. 2.; 2. 1.], [1., 0.]) |> abs < 1e-9
@assert [1, 0]'power_iteration([0. 0.; 0. 1.], [1., 1.]) |> abs < 1e-9
@assert [0, 1]'power_iteration([1. 0.; 0. .5], [1., 1.]) |> abs < 1e-9

x = ones(nn) / nn
x = @time power_iteration(M, x)

p = sortperm(x, rev=true)
sorted_nodes = view(nodes, p)

print(join(sorted_nodes[1:20], "\n"))
@assert sorted_nodes[1] == "United States"
@assert sorted_nodes[2] == "United Kingdom"
@assert sorted_nodes[3] == "World War II"
@assert sorted_nodes[4] == "Latin"
@assert sorted_nodes[5] == "France"

  6.187492 seconds (7.99 k allocations: 1.126 GiB, 6.16% gc time, 0.22% compilation time)
United States
United Kingdom
World War II
Latin
France
Germany
English language
China
Canada
India
Mathematics
Italy
Catholic Church
Australia
Greek language
Europe
England
World War I
London
Russia

function search(keyword)
    ### BEGIN SOLUTION
    filter(s -> occursin(keyword, s), sorted_nodes)
    ### END SOLUTION
end

search("Newton")

47-element Vector{String}:
 "Isaac Newton"
 "Newton (unit)"
 "Newton's laws of motion"
 "Newton's law of universal gravitation"
 "Newton's method"
 "Newtonian fluid"
 "Non-Newtonian fluid"
 "Newton metre"
 "Newton, Massachusetts"
 "Olivia Newton-John"
 ⋮
 "Robert Newton"
 "John Gilbert Newton Brown"
 "Newton's inequalities"
 "Newton's method in optimization"
 "Thandie Newton"
 "Wayne Newton"
 "Post-Newtonian expansion"
 "Quasi-Newton method"
 "Helmut Newton"

Week 4: Eigenproblems¶

Single eigenvalues¶

[Exercise 1] Inverse and Rayleigh iteration¶

Multiple eigenvalues¶

[Exercise 2] Subspace iteration and SVD¶

Using sparse matrices for high-dimensional problems¶

[Exercise 3] PageRank algorithm¶

Row	FromNode	ToNode
	Int64	Int64
1	175973	1
2	130880	2
3	145856	2
4	159190	2
5	159200	2
6	159207	2
7	159431	2
8	4	3
9	5	3
10	6887	3
11	6916	3
12	6957	3
13	11490	3
⋮	⋮	⋮
10722179	170697	157057
10722180	177550	45
10722181	30983	29807
10722182	76929	104616
10722183	86508	120348
10722184	128270	198551
10722185	152419	116918
10722186	156536	190926
10722187	170697	112376
10722188	177550	108047
10722189	181302	84697
10722190	189293	154081