admin管理员组

文章数量:1398243

Problem: I have implemented several step-size strategies (classic, Polyak, and Adagrad), but my subgradient algorithm either diverges or fails to converge.

Initially, I focused on the problem:

Initial problem

I computed and implemented the subgradient (in Julia):

# Function to compute a subgradient of f at x
function subgrad(A, b, x) 
    tmp = A * x - b
    _, ind = findmax(abs.(tmp))
    return A[ind[1], :] * sign(tmp[ind[1]])
    # Averaging could be used to get the convex envelope
end

Then, I moved on to the deterministic minimization of a stochastic function: We consider the problem:

Deterministic problem

I started by constructing my problem and generating noisy data:

# Mean of the data
Abar = load("data2.jld")["Abar"]
bbar = load("data2.jld")["bbar"]

# Noise and sample size
m, n = size(Abar)
M = 1000
sigma = 4

# Structure to store all M samples of A and b
struct F1samples
    A_noisy::Array{Float64, 3} # size (M, m, n)
    b_noisy::Array{Float64, 3} # size (M, m, 1)
end

function build_F1(Abar, bbar, sigma, M)
    # Abar, bbar : mean of the data, size (m, n) and (m, 1)
    # sigma : noise level
    # M: sample size
    m, n = size(Abar)
    A_noisy = zeros(M, m, n)
    b_noisy = zeros(M, m, 1)
    
    for i = 1:M
        A_noisy[i, :, :] = Abar + sigma * randn(m, n)
        b_noisy[i, :, :] = bbar + sigma * randn(m, 1)
    end
    return F1samples(A_noisy, b_noisy)
end

Then, I evaluated the function at a given x :

# Function evaluation
function fvals(f1data::F1samples, x)
    # f1data: structure to store all M samples
    # x : current vector  
    # return f1(x)
    
    A_noisy = f1data.A_noisy
    b_noisy = f1data.b_noisy
    
    sum_fi = 0
    for i = 1:M
        max_val = maximum(abs.(A_noisy[i, :, :] * x - b_noisy[i, :])) 
        sum_fi += max_val
    end
    return sum_fi / M
end

Finally, I computed the subgradient:

# Subgradient evaluation
function subgrads(f1data::F1samples, x)
    # f1data: structure to store all the data
    # x : current vector  
    # return subgradient of f1(x)
    
    A_noisy = f1data.A_noisy
    b_noisy = f1data.b_noisy
    
    sum_subg_fi = zeros(n, 1)
    for i = 1:M
        sum_subg_fi += subgrad(A_noisy[i, :, :], b_noisy[i, :], x)
    end

    return sum_subg_fi / M  # Averaging the subgradient
end

Finally, I implemented my optimization algorithm:

using Plots

# Load data
Abar = load("data2.jld")["Abar"]
bbar = load("data2.jld")["bbar"]

f1 = build_F1(Abar, bbar, sigma, M)

# Initialization
x = zeros(n, 1)
xbest = x
i = 1
fbest = 1000000  # f_best^0
histo = []  # Sequence of best function values
constants = range(1e-3, stop=1e2, length=5)

pas = 0
itermax = 500

# We store the values of each f(xp)
values_fxp = []

xp = x
f_xp = fvals(f1, xp)

append!(values_fxp, f_xp)
append!(histo, fbest)

sum_sousgrad = zeros(n, 1)

while i < itermax
    i += 1
    
    sous_grad_p = subgrads(f1, xp)
    sum_sousgrad += sous_grad_p .^ 2  # for Adagrad 

    if pas == 0
        step_size = 1 / i
    elseif pas == 1
        step_size = 1 / sqrt(i + 1) 
    elseif i == 2  # Adagrad
        eta = 1 
        step_size = eta ./ (sqrt.(sum_sousgrad .+ 1e-8))
    else  # Polyak
        step_size = (f_xp - fbest) / (norm(sous_grad_p, 2)^2 + 1e-8)
    end

    xp = xp .- step_size .* sous_grad_p
    f_xp = fvals(f1, xp)

    append!(values_fxp, f_xp)
    if f_xp < fbest
        fbest = f_xp
        xbest = xp
    end
    
    append!(histo, fbest)

    if i == itermax - 1
        println("x* = ", xbest)
    end
end

Issue: My subgradient algorithm either diverges or does not converge. What could be wrong? Is there an issue with my step-size strategies, subgradient calculation, or data generation? Any insights would be greatly appreciated.

本文标签: juliaDeterministic minimization of a stochastic function with subgradient methodStack Overflow