.. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_proximal_splitting_plot_overlapping_group_lasso.py: Group lasso with overlap ======================== Comparison of solvers for a least squares with overlapping group lasso regularization. References ---------- This example is modeled after the experiments in `Adaptive Three Operator Splitting `_, Appendix E.3. .. image:: /auto_examples/proximal_splitting/images/sphx_glr_plot_overlapping_group_lasso_001.png :class: sphx-glr-single-img .. rst-class:: sphx-glr-script-out Out: .. code-block:: none beta = 0.0001 beta = 0.001 beta = 0.01 beta = 0.1 | .. code-block:: default import copt as cp import matplotlib.pyplot as plt import numpy as np from sklearn import preprocessing import copt.loss import copt.penalty np.random.seed(0) n_samples, n_features = 100, 1002 # .. generate some data .. # .. the first set of blocks is groups = [np.arange(8 * i, 8 * i + 10) for i in range(125)] ground_truth = np.zeros(n_features) g = np.random.randint(0, len(groups), 10) for i in g: ground_truth[groups[i]] = np.random.randn() A = np.random.randn(n_samples, n_features) p = 0.95 # create a matrix with correlations between features for i in range(1, n_features): A[:, i] = p * A[:, i] + (1 - p) * A[:, i-1] A[:, 0] /= np.sqrt(1 - p ** 2) A = preprocessing.StandardScaler().fit_transform(A) b = A.dot(ground_truth) + np.random.randn(n_samples) # make labels in {0, 1} b = np.sign(b) b = (b + 1) // 2 # .. compute the step-size .. max_iter = 5000 f = copt.loss.LogLoss(A, b) step_size = 1. / f.lipschitz # .. run the solver for different values .. # .. of the regularization parameter beta .. all_betas = np.logspace(-4, -1, 4) all_trace_ls, all_trace_nols, all_trace_pdhg_nols, all_trace_pdhg = [], [], [], [] all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_nols_time, all_trace_pdhg_time = [], [], [], [] out_img = [] for i, beta in enumerate(all_betas): print('beta = %s' % beta) G1 = copt.penalty.GroupL1(beta, groups[::2]) G2 = copt.penalty.GroupL1(beta, groups[1::2]) def loss(x): return f(x) + G1(x) + G2(x) cb_tosls = cp.utils.Trace() x0 = np.zeros(n_features) tos_ls = cp.minimize_three_split( f.f_grad, x0, G1.prox, G2.prox, step_size=10 * step_size, max_iter=max_iter, tol=1e-14, verbose=1, callback=cb_tosls, h_Lipschitz=beta) trace_ls = np.array([loss(x) for x in cb_tosls.trace_x]) all_trace_ls.append(trace_ls) all_trace_ls_time.append(cb_tosls.trace_time) cb_tos = cp.utils.Trace() x0 = np.zeros(n_features) tos = cp.minimize_three_split( f.f_grad, x0, G1.prox, G2.prox, step_size=step_size, max_iter=max_iter, tol=1e-14, verbose=1, line_search=True, callback=cb_tos) trace_nols = np.array([loss(x) for x in cb_tos.trace_x]) all_trace_nols.append(trace_nols) all_trace_nols_time.append(cb_tos.trace_time) out_img.append(tos.x) cb_pdhg = cp.utils.Trace() x0 = np.zeros(n_features) pdhg = cp.minimize_primal_dual( f.f_grad, x0, G1.prox, G2.prox, callback=cb_pdhg, max_iter=max_iter, step_size=step_size, step_size2=(1. / step_size) / 2, tol=0, line_search=False) trace_pdhg = np.array([loss(x) for x in cb_pdhg.trace_x]) all_trace_pdhg.append(trace_pdhg) all_trace_pdhg_time.append(cb_pdhg.trace_time) cb_pdhg_nols = cp.utils.Trace() x0 = np.zeros(n_features) pdhg_nols = cp.minimize_primal_dual( f.f_grad, x0, G1.prox, G2.prox, callback=cb_pdhg_nols, max_iter=max_iter, step_size=step_size, step_size2=(1. / step_size) / 2, tol=0, line_search=False) trace_pdhg_nols = np.array([loss(x) for x in cb_pdhg_nols.trace_x]) all_trace_pdhg_nols.append(trace_pdhg_nols) all_trace_pdhg_nols_time.append(cb_pdhg_nols.trace_time) # .. plot the results .. fig, ax = plt.subplots(2, 4, sharey=False) xlim = [2000, 2000, 1000, 2000] markevery = [x//5 for x in xlim] for i, beta in enumerate(all_betas): ax[0, i].set_title(r'$\lambda=%s$' % beta) ax[0, i].set_title(r'$\lambda=%s$' % beta) ax[0, i].plot(out_img[i] / np.max(out_img[i])) ax[0, i].plot(ground_truth / np.max(ground_truth)) ax[0, i].set_xticks(()) ax[0, i].set_yticks(()) ax[0, i].set_ylim((-0.5, 1.5)) fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_nols[i])) scale = 1. # all_trace_ls[i][0] - fmin plot_tos, = ax[1, i].plot( (all_trace_ls[i] - fmin) / scale, '--', lw=2, marker='o', markevery=markevery[i], markersize=5) plot_nols, = ax[1, i].plot( (all_trace_nols[i] - fmin) / scale, lw=2, marker='h', markevery=markevery[i], markersize=5) plot_pdhg, = ax[1, i].plot( (all_trace_pdhg[i] - fmin) / scale, lw=2, marker='^', markevery=markevery[i], markersize=5) plot_pdhg_nols, = ax[1, i].plot( (all_trace_pdhg_nols[i] - fmin) / scale, lw=2, marker='d', markevery=markevery[i], markersize=5) ax[1, i].set_xlabel('Iterations') ax[1, i].set_yscale('log') ax[1, i].set_ylim((1e-10, None)) ax[1, i].set_xlim((0, xlim[i])) ax[1, i].grid(True) plt.gcf().subplots_adjust(bottom=0.25) plt.figlegend( (plot_tos, plot_nols, plot_pdhg, plot_pdhg_nols), ('TOS with line search', 'TOS without line search', 'PDHG with line search', 'PDHG without line search'), 'lower center', ncol=2, scatterpoints=1, frameon=False,) ax[1, 0].set_ylabel('Objective minus optimum') plt.show() .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 3 minutes 31.896 seconds) **Estimated memory usage:** 8 MB .. _sphx_glr_download_auto_examples_proximal_splitting_plot_overlapping_group_lasso.py: .. only :: html .. container:: sphx-glr-footer :class: sphx-glr-footer-example .. container:: sphx-glr-download :download:`Download Python source code: plot_overlapping_group_lasso.py ` .. container:: sphx-glr-download :download:`Download Jupyter notebook: plot_overlapping_group_lasso.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_