julia算法案例 数据读取| 优化算法
当前位置:以往案例 > >julia算法案例 数据读取| 优化算法
2020-07-02

julia算法案例

这个一个数据读取和一个优化算法的julia code

using LinearAlgebra: norm
using Random

include("SPD1_loader.jl")

function proximal_mapping(x::Real, l1::Real, l2::Real, step_size::Real)
x /= (1 + l2 * step_size)
threshold = l1 * step_size / (1 + l2 * step_size)
if x > threshold
x -= threshold
elseif x < -threshold x += threshold else x = 0 end return x end @inline function dual_prox(u::Real, y::Real, step_size::Real)::Real # return (u - step_size * y) / (1.0 + step_size) v = (u - step_size * y) / (1.0 + step_size / 2) if v * y <= 0 return v else return 0.0 end end function SGD(data::Array{<:Real, 2}, label::Array{<:Real, 1}, l1::Real, l2::Real; max_epoch::Int=100, alpha::Real=5e-5, beta::Real=7e-2) n, d = size(data) x = zeros(d) ret = zeros(max_epoch) println("PSGD: alpha=$alpha, beta=$beta") for epoch = 1 : max_epoch func_val = norm(max.(1 .- (data * x) .* label, 0))^2 / n + l1 * norm(x, 1) + 0.5 * l2 * norm(x)^2 ret[epoch] = func_val sparsity = norm(x, 0) / d println("Epoch: $epoch, Func_Val: $func_val, Sparsity: $sparsity") step_size = alpha / (1.0 + epoch * beta) for _ = 1 : n idx = rand(1 : n) c = 2 * label[idx] * min(label[idx] * data[idx,:]' * x - 1, 0) x .= proximal_mapping.(x - step_size * c * data[idx,:], l1, l2, step_size) end end return ret end function SVRG(data::Array{<:Real, 2}, label::Array{<:Real, 1}, l1::Real, l2::Real; max_epoch::Int=100, step_size::Real=5e-5) n, d = size(data) x = zeros(d) ret = zeros(max_epoch) println("SVRG: step_size=$step_size") for epoch = 1 : max_epoch func_val = norm(max.(1 .- (data * x) .* label, 0))^2 / n + l1 * norm(x, 1) + 0.5 * l2 * norm(x)^2 ret[epoch] = func_val sparsity = norm(x, 0) / d println("Epoch: $epoch, Func_Val: $func_val, Sparsity: $sparsity") c_tilde = 2 * label .* min.((data * x) .* label .- 1, 0) grad = sum(data .* c_tilde, dims=1)[:] / n for _ = 1 : n idx = rand(1 : n) c = 2 * label[idx] * min(label[idx] * data[idx,:]' * x - 1, 0) x .= proximal_mapping.(x - step_size * (2 * (c - c_tilde[idx]) * data[idx,:] + grad), l1, l2, step_size) end end return ret end function SPD1(data::Array{<:Real, 2}, label::Array{<:Real, 1}, l1::Real, l2::Real; max_epoch::Int=100, eta_0::Real=1e-3, tau_0::Real=1e+1, alpha::Real=1e-1, beta::Real=2e+0) n, d = size(data) x, y = zeros(d), zeros(n) ret = zeros(max_epoch) println("SPD1: eta_0=$eta_0, tau_0=$tau_0, alpha=$alpha, beta=$beta") for epoch = 1 : max_epoch func_val = norm(max.(1 .- (data * x) .* label, 0))^2 / n + l1 * norm(x, 1) + 0.5 * l2 * norm(x)^2 ret[epoch] = func_val sparsity = norm(x, 0) / d println("Epoch: $epoch, Func_Val: $func_val, Sparsity: $sparsity") eta = eta_0 / (1.0 + epoch * alpha) tau = tau_0 / (1.0 + epoch * beta) for _ = 1 : n*d i, j = rand(1 : n), rand(1 : d) a, x_tmp = data[i,j], x[j] x[j] = proximal_mapping(x[j] - eta * a * y[i], l1, l2, eta) y[i] = dual_prox(y[i] + tau * a * x_tmp, label[i], tau / d) end end return ret end function SPD1_VR(data::Array{<:Real, 2}, label::Array{<:Real, 1}, l1::Real, l2::Real; max_epoch::Int=100, eta::Real=1e-3, tau::Real=5e-1) n, d = size(data) x, y = zeros(d), zeros(n) ret = zeros(max_epoch) println("SPD1-VR: eta=$eta, tau=$tau") for epoch = 1 : max_epoch func_val = norm(max.(1 .- (data * x) .* label, 0))^2 / n + l1 * norm(x, 1) + 0.5 * l2 * norm(x)^2 ret[epoch] = func_val sparsity = norm(x, 0) / d println("Epoch: $epoch, Func_Val: $func_val, Sparsity: $sparsity") grad_x, grad_y = data' * y / n, data * x / d x_tilde, y_tilde = copy(x), copy(y) for _ = 1 : n*d i, j = rand(1 : n), rand(1 : d) a, x_tmp = data[i,j], x[j] x[j] = proximal_mapping(x[j] - eta * (a * (y[i] - y_tilde[i]) + grad_x[j]), l1, l2, eta) y[i] = dual_prox(y[i] + tau * (a * (x[j] - x_tilde[j]) + grad_y[i]), label[i], tau / d) end end return ret end function SPD1_VR_AIS(data::Array{<:Real, 2}, label::Array{<:Real, 1}, l1::Real, l2::Real; max_epoch::Int=100, eta::Real=1e-3, tau::Real=5e-1) n, d = size(data) x, y = zeros(d), zeros(n) ret = zeros(max_epoch) G_x, bsvec_x, prob_x = ones(d), ones(d), ones(d) G_y, bsvec_y, prob_y = ones(n), ones(n), ones(n) size_sample_x, size_sample_y = 2*d, 2*n sample_x, sample_y = zeros(size_sample_x), zeros(size_sample_y) alpha0, alpha1 = 0.1, 0.8 println("SPD1-VR-AIS: eta=$eta, tau=$tau") for epoch = 1 : max_epoch func_val = norm(max.(1 .- (data * x) .* label, 0))^2 / n + l1 * norm(x, 1) + 0.5 * l2 * norm(x)^2 ret[epoch] = func_val sparsity = norm(x, 0) / d println("Epoch: $epoch, Func_Val: $func_val, Sparsity: $sparsity") grad_x, grad_y = data' * y / n, data * x / d x_tilde, y_tilde = copy(x), copy(y) if epoch > 1
for i = 1 : n
yi = dual_prox(y[i] + tau * grad_y[i], label[i], tau / d)
G_y[i] = norm(yi-y[i])
end
for j = 1 : d
xj = proximal_mapping(x[j] - eta * grad_x[j], l1, l2, eta)
G_x[j] = norm(xj-x[j])
end
end

alpha2 = alpha0 + alpha1 * epoch / max_epoch
prob_x = (alpha2 / sum(G_x)) .* G_x + ((1 - alpha2) / d) .* ones(d)
prob_y = (alpha2 / sum(G_y)) .* G_y + ((1 - alpha2) / n) .* ones(n)

temp = 0
for i = 1 : d
bsvec_x[i] = temp + prob_x[i]
temp = bsvec_x[i]
end
temp = 0
for i = 1 : n
bsvec_y[i] = temp + prob_y[i]
temp = bsvec_y[i]
end

for i = 1 : size_sample_x
randnum = rand()
left, right = 0, d
while left + 1 < right mid = convert(Int64,floor((left + right) / 2)) if randnum > bsvec_x[mid]
left = mid
else
right = mid
end
end
sample_x[i] = right
end

for i = 1 : size_sample_y
randnum = rand()
left, right = 0, n
while left + 1 < right mid = convert(Int64,floor((left + right) / 2)) if randnum > bsvec_y[mid]
left = mid
else
right = mid
end
end
sample_y[i] = right
end

for _ = 1 : n*d
index_x, index_y = rand(1 : size_sample_x), rand(1 : size_sample_y)
i, j = convert(Int64,sample_y[index_y]), convert(Int64,sample_x[index_x])
a, x_tmp = data[i,j], x[j]
eta_i = eta / (n * prob_y[i])
tau_j = tau / (d * prob_x[j])
x[j] = proximal_mapping(x[j] - eta_i * (a * (y[i] - y_tilde[i]) + grad_x[j]), l1, l2, eta_i)
y[i] = dual_prox(y[i] + tau_j * (a * (x[j] - x_tilde[j]) + grad_y[i]), label[i], tau_j / d)
end
end

return ret
end


function SPD1_AIS(data::Array{<:Real, 2}, label::Array{<:Real, 1}, l1::Real, l2::Real; max_epoch::Int=100, eta_0::Real=1e-3, tau_0::Real=1e+1, alpha::Real=1e-1, beta::Real=2e+0) n, d = size(data) x, y = zeros(d), zeros(n) G, bsvec, prob = ones(n*d), ones(n*d), ones(n*d) alpha0, alpha1 = 0, 0 ret = zeros(max_epoch) println("SPD_AIS: eta_0=$eta_0, tau_0=$tau_0, alpha=$alpha, beta=$beta") for epoch = 1 : max_epoch func_val = norm(max.(1 .- (data * x) .* label, 0))^2 / n + l1 * norm(x, 1) + 0.5 * l2 * norm(x)^2 ret[epoch] = func_val sparsity = norm(x, 0) / d println("Epoch: $epoch, Func_Val: $func_val, Sparsity: $sparsity") eta = eta_0 / (1.0 + epoch * alpha) tau = tau_0 / (1.0 + epoch * beta) alpha2 = alpha0 + alpha1 * epoch / max_epoch prob = (alpha2 / sum(G)) .* G + ((1 - alpha2) / (n*d)) .* ones(n*d) temp = 0 for i = 1 : n*d bsvec[i] = temp + prob[i] temp = bsvec[i] end for _ = 1 : n*d randnum = rand() left, right = 0, n while left + 1 < right mid = convert(Int64,floor((left + right) / 2)) if randnum > bsvec[mid]
left = mid
else
right = mid
end
end
randindx = right
i = convert(Int64,ceil(randindx / d))
j = randindx - (i-1)*d
a, x_tmp = data[i,j], x[j]
eta_j = eta / (n*d*prob[randindx])
tau_i = tau / (n*d*prob[randindx])
xj = proximal_mapping(x[j] - eta * a * y[i], l1, l2, eta)
yi = dual_prox(y[i] + tau * a * x_tmp, label[i], tau / d)
G[randindx] = norm(xj - x[j]) + norm(yi - y[i])
x[j] = proximal_mapping(x[j] - eta_j * a * y[i], l1, l2, eta_j)
y[i] = dual_prox(y[i] + tau_i * a * x_tmp, label[i], tau_i / d)
end
end

return ret
end


data, label = load_libsvm_data("/Users/jjli/desktop/ds/a2a")
l1, l2 = 0.0, 1.0

@time SPD1_VR(data, label, l1, l2, max_epoch=50)
@time SPD1_VR_AIS(data, label, l1, l2, max_epoch=50)


function load_libsvm_data(file_name::AbstractString)
max_idx::Int32 = 0
n::Int32 = 0
open(file_name) do f
for line in eachline(f)
for entry in split(line, " ")[2:end]
s = split(entry, ":")
if length(s) > 1
max_idx = max(max_idx, parse(Int32, s[1]))
end
end
n += 1
end
end

println("Problem size: $n×$max_idx")
data = zeros(Float64, n, max_idx)
label = zeros(Float64, n)
open(file_name) do f
for (idx, line) in enumerate(eachline(f))
strs = split(line, " ")
label[idx] = parse(Float64, strs[1])
if length(label) == 1
continue
end
for entry in strs[2:end]
if length(entry) > 1
pos, val = split(entry, ":")
data[idx, parse(Int32, pos)] = parse(Float32, val)
end
end
end
end

return data, label
end

在线提交订单