From 1e25751ce99f7c746cab865ebf8dfd62c0732ac0 Mon Sep 17 00:00:00 2001 From: clane9 Date: Thu, 15 Dec 2016 12:19:03 -0500 Subject: [PATCH 1/3] Technical fixes to roi_extract. - betas are now computed in a more numerically stable way in the fit_glm function. - redundant spike regressors are handled better in roi_extract. --- CHANGELOG.md | 9 +++++++++ bin/roi_extract | 11 +++++++++++ lib/npdl_utils.py | 14 +++++++------- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a934af..59338b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ neat. ### Changed - Changed takesnap inf view angle to fix shadow from @judyseinkim. - Changed scripts to record version number from @clane9. +- Changed method for computing beta values in roi_extract. Previously, betas were + computed the naive way, by explicitly inverting `X' * X`. Although correct, + this is slow and numerically unstable. It's better to use the SVD, as + discussed [on wikipedia](https://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)#Orthogonal_decomposition_methods). + +### Fixed +- Redundant spike covariates are now removed before fitting the GLM. This + prevents the "Design matrix is rank-deficient" error from coming up in this + case. ## [1.0] - 2016-08-09 diff --git a/bin/roi_extract b/bin/roi_extract index 9b730da..aede272 100755 --- a/bin/roi_extract +++ b/bin/roi_extract @@ -632,7 +632,10 @@ def read_run_cfds(run_cfds, run_len): # Determine type of confound # Normalize continuous covariates (in particular, center) + # Also, remove any redundant spike covariates spike_mask = np.ones(run_cfd.shape[1], dtype=int) + keep_mask = np.ones(run_cfd.shape[1], dtype=int) + spike_inds = []; for i in range(run_cfd.shape[1]): if np.sum(run_cfd[:, i] == np.round(run_cfd[:, i])) < run_len: run_cfd[:, i] = normalize(run_cfd[:, i]) @@ -640,6 +643,14 @@ def read_run_cfds(run_cfds, run_len): elif np.sum(run_cfd[:, i]) != 1.: raise NPDLError(('Only continuous covariates and spike regressors' + 'accepted as confounds.')) + else: + # Check if spike covariate is redundant. + spike_ind = np.argmax(run_cfd[:, i]) + if spike_ind in spike_inds: + keep_mask[i] = 0 + else: + spike_inds.append(spike_ind) + run_cfd = run_cfd(:, keep_mask==1) return run_cfd, spike_mask class Events(object): diff --git a/lib/npdl_utils.py b/lib/npdl_utils.py index 93e94b1..edd3a30 100644 --- a/lib/npdl_utils.py +++ b/lib/npdl_utils.py @@ -413,7 +413,7 @@ def prep_fig(leadbuff, backbuff, bottombuff, topbuff, figw, figh): def fit_glm(TS, DM, cfd_mat=None, intercept=True, outdir=None, out_prefix=None, logger=None): - """Calculate GLM beta weights. Return baseline and PSC betas. + """Calculate GLM beta weights using SVD. Return baseline and PSC betas. """ # prepend intercept if intercept: @@ -426,7 +426,7 @@ def fit_glm(TS, DM, cfd_mat=None, intercept=True, outdir=None, np.savetxt('{}/{}_design.txt'.format(outdir, out_prefix), DM, delimiter=' ') # calculate singular values try: - S = np.linalg.svd(DM, compute_uv=False) + U, S, VT = np.linalg.svd(DM, full_matrices=False, compute_uv=True) except LinAlgError: raise NPDLError('Bad design matrix: SVD did not converge.') S_str = ' '.join(['{0:.0f}'.format(s) for s in S]) @@ -434,17 +434,17 @@ def fit_glm(TS, DM, cfd_mat=None, intercept=True, outdir=None, logger.log('Singular values for {} design mat: {}.'.format(out_prefix, S_str)) # determine if rank deficient as in matrix_rank function eps = np.finfo('f8').eps - thresh = S.max() * np.max(DM.shape) * eps + thresh = S[0] * np.max(DM.shape) * eps rank = np.sum(S > thresh) if logger is not None and out_prefix is not None: logger.log('Design matrix width for {}: {}; rank: {}.'.format(out_prefix, DM.shape[1], rank)) if rank < S.size: raise NPDLError('Design matrix is rank deficient.') - # calculate betas - # y = X*beta - # beta = (X'X)^-1 * X' * y + # calculate betas using numerically stable SVD method. + # See: https://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)#Orthogonal_decomposition_methods TS = TS.reshape((-1, 1)) - beta = np.dot(np.linalg.inv(np.dot(DM.T, DM)), np.dot(DM.T, TS)) + Sinv = (S ** -1).reshape(-1, 1) + beta = np.dot(VT.T, Sinv * np.dot(U.T, TS)) if outdir is not None and out_prefix is not None: np.savetxt('{}/{}_beta.txt'.format(outdir, out_prefix), beta) beta = beta.reshape(-1) From a02fdb1dbc4412d82c6952982f78c8ba4c083e51 Mon Sep 17 00:00:00 2001 From: Connor Lane Date: Sun, 5 Feb 2017 17:07:14 -0500 Subject: [PATCH 2/3] updated Changelog. --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59338b0..0406146 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,12 +12,13 @@ neat. - Changed method for computing beta values in roi_extract. Previously, betas were computed the naive way, by explicitly inverting `X' * X`. Although correct, this is slow and numerically unstable. It's better to use the SVD, as - discussed [on wikipedia](https://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)#Orthogonal_decomposition_methods). + discussed [on wikipedia](https://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)#Orthogonal_decomposition_methods) + (from @clane9). ### Fixed - Redundant spike covariates are now removed before fitting the GLM. This prevents the "Design matrix is rank-deficient" error from coming up in this - case. + case (from @clane9). ## [1.0] - 2016-08-09 From 841243a64f846ed8f5a454fa86bc6efdc209169a Mon Sep 17 00:00:00 2001 From: clane9 Date: Tue, 17 Apr 2018 14:21:38 -0400 Subject: [PATCH 3/3] Fix typo in roi_extract --- bin/roi_extract | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/roi_extract b/bin/roi_extract index aede272..5ef37f9 100755 --- a/bin/roi_extract +++ b/bin/roi_extract @@ -650,7 +650,7 @@ def read_run_cfds(run_cfds, run_len): keep_mask[i] = 0 else: spike_inds.append(spike_ind) - run_cfd = run_cfd(:, keep_mask==1) + run_cfd = run_cfd[:, keep_mask==1] return run_cfd, spike_mask class Events(object):