-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathDAgostiniUnfolder.hh
418 lines (332 loc) · 16 KB
/
DAgostiniUnfolder.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
#pragma once
// Standard library includes
#include <cmath>
#include <iostream>
#include <stdexcept>
// ROOT includes
#include "TVectorD.h"
// STV analysis includes
#include "Unfolder.hh"
// Implementation of the iterative D'Agostini unfolding method
// G. D'Agostini, Nucl. Instrum. Methods Phys. Res. A 362, 487-498 (1995)
// https://hep.physics.utoronto.ca/~orr/wwwroot/Unfolding/d-agostini.pdf.
//
// Uncertainties on the measurement are propagated through the unfolding
// procedure following a corrected expression given in Eq. (20) from
// https://arxiv.org/abs/1806.03350. The original paper by D'Agostini
// propagates the measurement uncertainties as if the unfolding matrix is
// independent of the measured data points, but this is only true for the first
// iteration.
class DAgostiniUnfolder : public Unfolder {
public:
// The maximum number of iterations to perform (to avoid infinite loops)
static constexpr unsigned int DAGOSTINI_MAX_ITERATIONS = 99;
// Enumerated type describing the rule to use to
// stop the iterations
enum ConvergenceCriterion {
FixedIterations = 0, // Stop after a predefined iteration count
FigureOfMerit = 1, // Stop once a figure of merit falls below threshold
};
DAgostiniUnfolder( unsigned int default_iterations = 1u )
: Unfolder(), conv_criter_( ConvergenceCriterion::FixedIterations ),
num_iterations_( default_iterations ) {}
DAgostiniUnfolder( ConvergenceCriterion cc, double fig_merit_target )
: Unfolder(), conv_criter_( cc ),
num_iterations_( DAGOSTINI_MAX_ITERATIONS ),
fig_merit_target_( fig_merit_target ) {}
// Trick taken from https://stackoverflow.com/a/18100999
using Unfolder::unfold;
virtual UnfoldedMeasurement unfold( const TMatrixD& data_signal,
const TMatrixD& data_covmat, const TMatrixD& smearcept,
const TMatrixD& prior_true_signal ) const override;
inline unsigned int get_iterations() const { return num_iterations_; }
inline void set_iterations( unsigned int iters )
{ num_iterations_ = iters; }
// Sets the value of the flag which determines whether MC statistical
// uncertainties on the response (smearceptance) matrix elements should
// be propagated through the unfolding procedure. This is more correct,
// but it is often a small effect and relatively slow to compute.
inline void set_include_respmat_covariance( bool do_it )
{ include_respmat_covariance_ = do_it; }
protected:
// Calculates the "figure of merit" used to determine convergence of the
// iterations when using the ConvergenceCriterion::FigureOfMerit option
double calc_figure_of_merit( const TMatrixD& old_true_signal,
const TMatrixD& new_true_signal ) const;
ConvergenceCriterion conv_criter_;
unsigned int num_iterations_;
double fig_merit_target_;
bool include_respmat_covariance_ = false;
};
UnfoldedMeasurement DAgostiniUnfolder::unfold( const TMatrixD& data_signal,
const TMatrixD& data_covmat, const TMatrixD& smearcept,
const TMatrixD& prior_true_signal ) const
{
// Check input matrix dimensions for sanity
this->check_matrices( data_signal, data_covmat,
smearcept, prior_true_signal );
int num_ordinary_reco_bins = smearcept.GetNrows();
int num_true_signal_bins = smearcept.GetNcols();
// Start the iterations by copying the prior on the true signal
auto* true_signal = new TMatrixD( prior_true_signal );
// Precompute the efficiency in each true bin by summing over reco bins in
// the smearceptance matrix. Note that the smearceptance matrix remains
// constant across iterations, and thus the efficiency does as well. Note
// that the precomputed efficiencies are expressed as a column vector.
TMatrixD eff_vec( num_true_signal_bins, 1 );
for ( int t = 0; t < num_true_signal_bins; ++t ) {
double efficiency = 0.;
for ( int r = 0; r < num_ordinary_reco_bins; ++r ) {
efficiency += smearcept( r, t );
}
eff_vec( t, 0 ) = efficiency;
}
// Create the unfolding matrix (always applied to the original
// background-subtracted data). Note that its rows correspond to true signal
// bins and its columns correspond to ordinary reco bins. This matrix will be
// updated upon every iteration.
auto unfold_mat = std::make_unique< TMatrixD >( num_true_signal_bins,
num_ordinary_reco_bins );
// Also create a matrix that we'll need to propagate the measurement
// uncertainties through the unfolding procedure. The iterations introduce
// non-trivial correlations which cause this matrix to differ from the
// unfolding matrix itself.
auto* err_prop_mat = new TMatrixD( num_true_signal_bins,
num_ordinary_reco_bins );
err_prop_mat->Zero(); // Zero out the elements (just in case)
// We need a 3D tensor to do the propagation of MC uncertainties. It is
// convenient in this case to represent it as a vector of TMatrixD objects.
auto err_prop_mc_vec = std::vector< TMatrixD >();
// Only populate the 3D tensor if it is needed (i.e., because we're including
// the MC uncertainties on the final result)
if ( include_respmat_covariance_ ) {
for ( int s = 0; s < num_true_signal_bins; ++s ) {
// Using emplace_back() here returns a reference to the new vector
// element. We will use it below to explicitly zero out the matrix
// contents (just in case). Use the same matrix element indexing scheme
// as the smearceptance matrix (i.e., rows are ordinary reco bins,
// columns are true signal bins).
auto& mat_ref = err_prop_mc_vec.emplace_back( num_ordinary_reco_bins,
num_true_signal_bins );
mat_ref.Zero();
}
}
// Start the iterations for the D'Agostini method
int it = 0;
double fm = DBL_MAX;
while ( true ) {
// If we've hit the configured or global maximum number of iterations, then
// stop the loop
if ( conv_criter_ == ConvergenceCriterion::FixedIterations
&& it >= num_iterations_ ) break;
else if ( it >= DAGOSTINI_MAX_ITERATIONS ) break;
// If the figure of merit for convergence has gone below threshold, then
// stop the loop
if ( fm < fig_merit_target_ ) break;
// Compute the column vector of expected reco-space signal event counts
// given the (fixed) smearceptance matrix and the current estimate of the
// signal events in true-space
TMatrixD reco_expected( smearcept, TMatrixD::EMatrixCreatorsOp2::kMult,
*true_signal );
// Update the unfolding matrix for the current iteration
for ( int t = 0; t < num_true_signal_bins; ++t ) {
for ( int r = 0; r < num_ordinary_reco_bins; ++r ) {
double smearcept_element = smearcept( r, t );
double true_signal_element = true_signal->operator()( t, 0 );
double efficiency = eff_vec( t, 0 );
double reco_expected_element = reco_expected( r, 0 );
double unfold_mat_element = ( smearcept_element * true_signal_element )
/ ( efficiency * reco_expected_element );
unfold_mat->operator()( t, r ) = unfold_mat_element;
}
}
// Temporarily keep the true signal event counts from the previous
// iteration (needed to update the error propagation matrices below)
TMatrixD old_true_signal( *true_signal );
// Update the estimated true signal event counts by applying the current
// unfolding matrix to the background-subtracted data
delete true_signal;
true_signal = new TMatrixD( *unfold_mat,
TMatrixD::EMatrixCreatorsOp2::kMult, data_signal );
// Update the matrix used to propagate the uncertainties on the measured
// data points through the unfolding procedure. We used to do this
// element-by-element. The faster (and equivalent) procedure below was
// taken from the RooUnfold implementation (http://roounfold.web.cern.ch/)
// Initialize some vectors of factors that will be applied to rows and
// columns of matrices below
TVectorD minus_eff_over_old_iter( num_true_signal_bins );
TVectorD new_iter_over_old_iter( num_true_signal_bins );
for ( int t = 0; t < num_true_signal_bins; ++t ) {
double ots = old_true_signal( t, 0 );
if ( ots <= 0. ) {
minus_eff_over_old_iter( t ) = 0.;
new_iter_over_old_iter( t ) = 0.;
continue;
}
double ts = true_signal->operator()( t, 0 );
double eff = eff_vec( t, 0 );
minus_eff_over_old_iter( t ) = -eff / ots;
new_iter_over_old_iter( t ) = ts / ots;
}
// Duplicate the existing error propagation matrix so that we can
// update the existing version in place
TMatrixD temp_mat1( *err_prop_mat );
// If the second argument passed to TMatrixD::NormByColumn() or
// TMatrixD::NormByRow() is "D", then the matrix elements will be
// divided by the input TVectorD elements rather than multiplied
// by them. Following RooUnfold, I use "M" here to stress that
// multiplication is the intended behavior.
temp_mat1.NormByColumn( new_iter_over_old_iter, "M" );
TMatrixD temp_mat2(
TMatrixD::EMatrixCreatorsOp1::kTransposed, *unfold_mat );
// The measured data points already exist as a one-column TMatrixD
// rather than a TVectorD. To avoid an unnecessary copy, we scale
// the columns of temp_mat2 here manually rather than calling
// TMatrixD::NormByColumn()
for ( int r2 = 0; r2 < num_ordinary_reco_bins; ++r2 ) {
for ( int t2 = 0; t2 < num_true_signal_bins; ++t2 ) {
temp_mat2( r2, t2 ) = temp_mat2( r2, t2 ) * data_signal( r2, 0 );
}
}
temp_mat2.NormByRow( minus_eff_over_old_iter, "M" );
TMatrixD temp_mat3( temp_mat2,
TMatrixD::EMatrixCreatorsOp2::kMult, *err_prop_mat );
// We're ready. Update the measurement error propagation matrix.
err_prop_mat->Mult( *unfold_mat, temp_mat3 );
err_prop_mat->operator+=( *unfold_mat );
err_prop_mat->operator+=( temp_mat1 );
// Also update the 3D tensor needed to propagate the MC statistical
// uncertainties on the smearceptance matrix through the unfolding
// procedure. As was done for the other error propagation matrix above,
// first make a full copy of the tensor, then update the original.
auto old_err_prop_mc_vec = std::vector< TMatrixD >();
// Only update the 3D tensor if it is needed (i.e., because we're including
// the MC uncertainties on the final result)
if ( include_respmat_covariance_ ) {
for ( int s = 0; s < num_true_signal_bins; ++s ) {
old_err_prop_mc_vec.emplace_back( err_prop_mc_vec.at(s) );
}
// The outer loop over true signal bins runs over the bins used to report
// the unfolded measurement. The inner loops run over the elements of the
// smearceptance matrix.
for ( int t = 0; t < num_true_signal_bins; ++t ) {
for ( int r = 0; r < num_ordinary_reco_bins; ++r ) {
for ( int t2 = 0; t2 < num_true_signal_bins; ++t2 ) {
// Handle the Kronecker delta in the first term using an if
// statement
double temp_el = 0.;
if ( t == t2 ) {
double aux1 = ( data_signal(r, 0) * old_true_signal(t, 0)
/ reco_expected(r, 0) ) - true_signal->operator()( t, 0 );
temp_el += aux1 / eff_vec( t, 0 );
}
temp_el -= data_signal( r, 0 ) * old_true_signal( t2, 0 )
* unfold_mat->operator()( t, r ) / reco_expected( r, 0 );
temp_el += true_signal->operator()( t, 0 )
* old_err_prop_mc_vec.at( t )( r, t2 ) / old_true_signal( t, 0 );
// These for loops take care of the sum in the last term
double last_term = 0.;
for ( int t3 = 0; t3 < num_true_signal_bins; ++t3 ) {
for ( int r2 = 0; r2 < num_ordinary_reco_bins; ++r2 ) {
last_term += data_signal( r2, 0 ) * eff_vec( t3, 0 )
* unfold_mat->operator()( t3, r2 )
* unfold_mat->operator()( t, r2 )
* old_err_prop_mc_vec.at( t3 )( r, t2 )
/ old_true_signal( t3, 0 );
}
}
// We account for the overall minus sign on the last term by
// subtracting on the line below
temp_el -= last_term;
// We're ready. Update the MC error propagation matrix.
err_prop_mc_vec.at( t )( r, t2 ) = temp_el;
}
}
}
}
// If needed, update the figure of merit for this iteration
if ( conv_criter_ == ConvergenceCriterion::FigureOfMerit ) {
fm = this->calc_figure_of_merit( old_true_signal, *true_signal );
}
// Proceed to the next iteration
++it;
} // D'Agostini method iterations
std::cout << "D'Agostini unfolding stopped after " << it << " iterations.\n";
// Now that we're finished with the iterations, we can also transform the
// data covariance matrix to the unfolded true space using the error
// propagation matrix.
TMatrixD err_prop_mat_tr( TMatrixD::kTransposed, *err_prop_mat );
TMatrixD temp_mat( data_covmat, TMatrixD::EMatrixCreatorsOp2::kMult,
err_prop_mat_tr );
auto* true_signal_covmat = new TMatrixD( *err_prop_mat,
TMatrixD::EMatrixCreatorsOp2::kMult, temp_mat );
if ( include_respmat_covariance_ ) {
// Here we also calculate a contribution to the covariance matrix on the
// unfolded result that comes from the MC statistical uncertainty on the
// smearceptance matrix elements
TMatrixD mc_covmat( num_true_signal_bins, num_true_signal_bins );
for ( int t = 0; t < num_true_signal_bins; ++t ) {
const TMatrixD& prop_mat1 = err_prop_mc_vec.at( t );
for ( int t2 = 0; t2 < num_true_signal_bins; ++t2 ) {
const TMatrixD& prop_mat2 = err_prop_mc_vec.at( t2 );
double temp_elem = 0.;
for ( int t3 = 0; t3 < num_true_signal_bins; ++t3 ) {
double prior_sig = prior_true_signal( t3, 0 );
if ( prior_sig <= 0. ) continue;
for ( int r = 0; r < num_ordinary_reco_bins; ++r ) {
double smear1 = smearcept( r, t3 );
for ( int r2 = 0; r2 < num_ordinary_reco_bins; ++r2 ) {
double smear2 = smearcept( r2, t3 );
// Calculate the covariance matrix element for the smearceptance
// matrix elements. Assume independent multinomial distributions
// for each true bin (as D'Agostini does)
double covariance = 0.;
// TODO: Account for effective statistics when using
// weighted events
// TODO: Account for situations in which the prior
// differs from the true event counts used to compute
// the smearceptance matrix elements
if ( r == r2 ) {
covariance = smear1 * ( 1. - smear1 ) / prior_sig;
}
else {
covariance = -1. * smear1 * smear2 / prior_sig;
}
temp_elem += prop_mat1( r, t3 ) * covariance
* prop_mat2( r2, t3 );
}
}
}
mc_covmat( t, t2 ) = temp_elem;
}
}
// Add the MC statistical uncertainty to the other uncertainties to obtain
// the final covariance matrix on the unfolded measurement
true_signal_covmat->operator+=( mc_covmat );
}
// Compute the additional smearing matrix
auto* add_smear = new TMatrixD( *unfold_mat,
TMatrixD::EMatrixCreatorsOp2::kMult, smearcept );
// Copy the response matrix into the results
auto* resp_mat = dynamic_cast< TMatrixD* >( smearcept.Clone() );
UnfoldedMeasurement result( true_signal, true_signal_covmat,
unfold_mat.release(), err_prop_mat, add_smear, resp_mat );
return result;
}
double DAgostiniUnfolder::calc_figure_of_merit(
const TMatrixD& old_true_signal, const TMatrixD& new_true_signal ) const
{
int num_rows = old_true_signal.GetNrows();
if ( num_rows != new_true_signal.GetNrows() ) {
throw std::runtime_error( "Row mismatch in DAgostiniUnfolder::"
"calc_figure_of_merit" );
}
double fm = 0.;
for ( int r = 0; r < num_rows; ++r ) {
double old_val = old_true_signal( r, 0 );
double new_val = new_true_signal( r, 0 );
fm += std::abs( new_val - old_val ) / new_val;
}
fm /= num_rows;
return fm;
}