Group lasso with overlap

Comparison of solvers for a least squares with overlapping group lasso regularization.

References

This example is modeled after the experiments in Adaptive Three Operator Splitting, Appendix E.3.

../../_images/sphx_glr_plot_overlapping_group_lasso_001.png

Out:

beta = 0.0001
beta = 0.001
beta = 0.01
beta = 0.1

import copt as cp
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing

import copt.loss
import copt.penalty

np.random.seed(0)

n_samples, n_features = 100, 1002

# .. generate some data ..
# .. the first set of blocks is
groups = [np.arange(8 * i, 8 * i + 10) for i in range(125)]
ground_truth = np.zeros(n_features)
g = np.random.randint(0, len(groups), 10)
for i in g:
    ground_truth[groups[i]] = np.random.randn()

A = np.random.randn(n_samples, n_features)
p = 0.95  # create a matrix with correlations between features
for i in range(1, n_features):
    A[:, i] = p * A[:, i] + (1 - p) * A[:, i-1]
A[:, 0] /= np.sqrt(1 - p ** 2)
A = preprocessing.StandardScaler().fit_transform(A)
b = A.dot(ground_truth) + np.random.randn(n_samples)

# make labels in {0, 1}
b = np.sign(b)
b = (b + 1) // 2


# .. compute the step-size ..
max_iter = 5000
f = copt.loss.LogLoss(A, b)
step_size = 1. / f.lipschitz

# .. run the solver for different values ..
# .. of the regularization parameter beta ..
all_betas = np.logspace(-4, -1, 4)
all_trace_ls, all_trace_nols, all_trace_pdhg_nols, all_trace_pdhg = [], [], [], []
all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_nols_time, all_trace_pdhg_time = [], [], [], []
out_img = []
for i, beta in enumerate(all_betas):
    print('beta = %s' % beta)
    G1 = copt.penalty.GroupL1(beta, groups[::2])
    G2 = copt.penalty.GroupL1(beta, groups[1::2])

    def loss(x):
        return f(x) + G1(x) + G2(x)

    cb_tosls = cp.utils.Trace()
    x0 = np.zeros(n_features)
    tos_ls = cp.minimize_three_split(
        f.f_grad, x0, G1.prox, G2.prox, step_size=10 * step_size,
        max_iter=max_iter, tol=1e-14, verbose=1,
        callback=cb_tosls, h_Lipschitz=beta)
    trace_ls = np.array([loss(x) for x in cb_tosls.trace_x])
    all_trace_ls.append(trace_ls)
    all_trace_ls_time.append(cb_tosls.trace_time)

    cb_tos = cp.utils.Trace()
    x0 = np.zeros(n_features)
    tos = cp.minimize_three_split(
        f.f_grad, x0, G1.prox, G2.prox,
        step_size=step_size,
        max_iter=max_iter, tol=1e-14, verbose=1,
        line_search=True, callback=cb_tos)
    trace_nols = np.array([loss(x) for x in cb_tos.trace_x])
    all_trace_nols.append(trace_nols)
    all_trace_nols_time.append(cb_tos.trace_time)
    out_img.append(tos.x)

    cb_pdhg = cp.utils.Trace()
    x0 = np.zeros(n_features)
    pdhg = cp.minimize_primal_dual(
        f.f_grad, x0, G1.prox, G2.prox,
        callback=cb_pdhg, max_iter=max_iter,
        step_size=step_size,
        step_size2=(1. / step_size) / 2, tol=0, line_search=False)
    trace_pdhg = np.array([loss(x) for x in cb_pdhg.trace_x])
    all_trace_pdhg.append(trace_pdhg)
    all_trace_pdhg_time.append(cb_pdhg.trace_time)

    cb_pdhg_nols = cp.utils.Trace()
    x0 = np.zeros(n_features)
    pdhg_nols = cp.minimize_primal_dual(
        f.f_grad, x0, G1.prox, G2.prox,
        callback=cb_pdhg_nols, max_iter=max_iter,
        step_size=step_size,
        step_size2=(1. / step_size) / 2, tol=0, line_search=False)
    trace_pdhg_nols = np.array([loss(x) for x in cb_pdhg_nols.trace_x])
    all_trace_pdhg_nols.append(trace_pdhg_nols)
    all_trace_pdhg_nols_time.append(cb_pdhg_nols.trace_time)


# .. plot the results ..
fig, ax = plt.subplots(2, 4, sharey=False)
xlim = [2000, 2000, 1000, 2000]
markevery = [x//5 for x in xlim]
for i, beta in enumerate(all_betas):
    ax[0, i].set_title(r'$\lambda=%s$' % beta)
    ax[0, i].set_title(r'$\lambda=%s$' % beta)
    ax[0, i].plot(out_img[i] / np.max(out_img[i]))
    ax[0, i].plot(ground_truth / np.max(ground_truth))
    ax[0, i].set_xticks(())
    ax[0, i].set_yticks(())
    ax[0, i].set_ylim((-0.5, 1.5))

    fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_nols[i]))
    scale = 1. # all_trace_ls[i][0] - fmin
    plot_tos, = ax[1, i].plot(
        (all_trace_ls[i] - fmin) / scale, '--',
        lw=2, marker='o', markevery=markevery[i],
        markersize=5)

    plot_nols, = ax[1, i].plot(
        (all_trace_nols[i] - fmin) / scale,
        lw=2, marker='h', markevery=markevery[i],
        markersize=5)

    plot_pdhg, = ax[1, i].plot(
        (all_trace_pdhg[i] - fmin) / scale,
        lw=2, marker='^', markevery=markevery[i],
        markersize=5)

    plot_pdhg_nols, = ax[1, i].plot(
        (all_trace_pdhg_nols[i] - fmin) / scale,
        lw=2, marker='d', markevery=markevery[i],
        markersize=5)

    ax[1, i].set_xlabel('Iterations')
    ax[1, i].set_yscale('log')
    ax[1, i].set_ylim((1e-10, None))
    ax[1, i].set_xlim((0, xlim[i]))
    ax[1, i].grid(True)


plt.gcf().subplots_adjust(bottom=0.25)
plt.figlegend(
    (plot_tos, plot_nols, plot_pdhg, plot_pdhg_nols),
    ('TOS with line search', 'TOS without line search', 'PDHG with line search', 'PDHG without line search'), 'lower center', ncol=2,
    scatterpoints=1, frameon=False,)

ax[1, 0].set_ylabel('Objective minus optimum')
plt.show()

Total running time of the script: ( 3 minutes 31.896 seconds)

Estimated memory usage: 8 MB

Gallery generated by Sphinx-Gallery