#include "TMVA/Timer.h"
#include "QFramework/TQGridScanner.h"
#include "QFramework/TQGridScanPoint.h"
#include "QFramework/TQGridScanObservable.h"
#include "QFramework/TQGridScanStyle.h"
#include "TLine.h"
#include <algorithm>
#include <iostream>
#include "TCanvas.h"
#include "QFramework/TQSignificanceEvaluator.h"
#include "QFramework/TQHistogramUtils.h"
#include "QFramework/TQTHnBaseUtils.h"
#include "QFramework/TQStringUtils.h"
#include "QFramework/TQFolder.h"
#include "QFramework/TQUtils.h"
#include "QFramework/TQIterator.h"
#include "QFramework/TQPlotter.h"
#include "QFramework/TQPathManager.h"
#include "TLatex.h"
#include "TList.h"
#include "TPaletteAxis.h"
#include "TLegend.h"
#include "TSystem.h"
#include "TStyle.h"
#include "TROOT.h"
#include "THnBase.h"
#include "QFramework/TQLibrary.h"
#include <math.h>
#include <limits>
#define _DEBUG_
#define PRECISION 0.000000001 // for double/float comparison
using Bound = TQGridScanBound;
using Bounds = TQGridScanBounds;
using BoundsType = TQGridScanBound::Type;
using BoundRange = TQGridScanBound::Range;
using BinExtrema = TQGridScanBound::BinExtrema;
using ObsVec = TQGridScanner::ObsVec;
using CutBounds = TQGridScanner::CutBounds;
using SplitCutVals = TQGridScanner::SplitCutVals;
using SplitCutBins = TQGridScanner::SplitCutBins;
using BoundDirection = TQGridScanner::BoundDirection;
using std::cout;
using std::endl;
using std::vector;
using std::pair;
using std::tie;
using std::unique_ptr;
ClassImp(TQGridScanner)
TQGridScanner::TQGridScanner(const TString& resultsName, TQSignificanceEvaluator* evaluator):
TNamed("TQGridScanner", "TQGridScanner"),
m_evaluator(evaluator),
m_results(resultsName),
m_runTimer("Scanning observables"),
m_nDimHistName(resultsName)
{
if (not evaluator) throw std::invalid_argument("Missing significance evaluator in TQGridScanner constructor");
init();
}
TQGridScanner::TQGridScanner(const TString& resultsName, TQSignificanceEvaluator* evaluator, TList* obsToScan):
TNamed("TQGridScanner", "TQGridScanner"),
m_evaluator(evaluator),
m_results(resultsName),
m_runTimer("Scanning observables"),
m_nDimHistName(resultsName)
{
if (not evaluator) throw std::invalid_argument("Missing significance evaluator in TQGridScanner constructor");
for(int i=0; i<=obsToScan->LastIndex(); i++) {
m_obsNamesToScan.push_back(obsToScan->At(i)->GetName());
}
init();
}
TQGridScanner::~TQGridScanner() {
for (auto obs : m_observables) {
delete obs.second;
obs.second = NULL;
}
}
void TQGridScanner::init() {
if (not m_evaluator->initialize(this)) {
BREAKclass("Significance evaluator failed to initialize in TQGridScanner constructor");
}
INFOclass("Initializing observables");
std::vector<TAxis*> axes;
std::vector<TString> obs_names;
if (m_evaluator->isSimple()) {
for (int i=0; i<m_evaluator->signalHists().at(0)->GetNdimensions(); ++i) {
axes.clear();
obs_names.clear();
for(size_t ireg=0; ireg<m_evaluator->regions().size(); ireg++) {
axes.push_back(m_evaluator->signalHists().at(ireg)->GetAxis(i));
axes.push_back(m_evaluator->bkgHists().at(ireg)->GetAxis(i));
obs_names.push_back(axes.back()->GetName());
obs_names.push_back(axes.back()->GetName());
}
m_observables.emplace(obs_names.back(), new TQGridScanObservable(obs_names, axes));
}
}
else {
for (int idim=0; idim<m_evaluator->multiDimHists().at(0)->GetNdimensions(); ++idim) {
axes.clear();
obs_names.clear();
for(size_t ichan=0; ichan<m_evaluator->multiDimHists().size(); ichan++) {
axes.push_back(m_evaluator->multiDimHists().at(ichan)->GetAxis(idim));
obs_names.push_back(axes.back()->GetName());
}
m_observables.emplace(obs_names.back(), new TQGridScanObservable(obs_names, axes));
}
}
}
TQGridScanObservable* TQGridScanner::getObs(const TString& obsName) {
if ( m_observables.find(obsName) == m_observables.end() ) {
BREAKclass("Observable with name %s not found in input histogram! Please check your input configuration and try again.", obsName.Data());
return nullptr;
} else {
return m_observables.at(obsName);
}
}
bool TQGridScanner::addSignalHist(THnBase* hist) {
m_signalHists.push_back(hist);
if (m_signalHists.size() > 1) {
return TQTHnBaseUtils::checkConsistency(hist, m_signalHists[0], true);
}
return true;
}
bool TQGridScanner::addBkgHist(THnBase* hist) {
m_bkgHists.push_back(hist);
if (m_bkgHists.size() > 1) {
return TQTHnBaseUtils::checkConsistency(hist, m_bkgHists[0], true);
}
return true;
}
void TQGridScanner::addFOMDefinitions(const std::vector <TString>& definitions) {
this->results()->FOMDefinitions = definitions;
}
void TQGridScanner::extractInputHistogramProjections() {
DEBUGclass("This function will save the input distributions in the TQGridScanResults for the first defined region only!");
gROOT->SetBatch(true);
int ireg = 0;
for (auto reg : m_evaluator->regions()) {
TQGridScanResults::InputHists newInputHists;
newInputHists.regionName = reg;
m_results.inputHistsSig()->push_back(newInputHists);
m_results.inputHistsBkg()->push_back(newInputHists);
for (int i=0; i<m_evaluator->signalHists().at(0)->GetNdimensions(); i++) {
auto hist_sig = unique_ptr<TH1F>();
auto hist_bkg = unique_ptr<TH1F>();
hist_sig = unique_ptr<TH1F>(reinterpret_cast<TH1F*>(m_evaluator->signalHists().at(ireg)->Projection(i)));
hist_sig->SetName(m_evaluator->signalHists().at(ireg)->GetAxis(i)->GetName());
m_results.inputHistsSig()->back().hists.push_back(*hist_sig.get());
hist_sig->SetName(TString::Format("%s_%d", hist_sig->GetName(), i));
hist_bkg = unique_ptr<TH1F>(reinterpret_cast<TH1F*>(m_evaluator->bkgHists().at(ireg)->Projection(i)));
hist_bkg->SetName(m_evaluator->bkgHists().at(ireg)->GetAxis(i)->GetName());
m_results.inputHistsBkg()->back().hists.push_back(*hist_bkg.get());
hist_bkg->SetName(TString::Format("%s_%d", hist_bkg->GetName(), i));
}
ireg++;
}
gROOT->SetBatch(false);
}
void TQGridScanner::dumpInputHistogramProjections(TQTaggable& tags) {
DEBUGclass("This function will plot the input distributions for the first defined region only!");
gROOT->SetBatch(true);
TStyle *style = TQHistogramUtils::ATLASstyle();
style->SetName("ATLAS");
gROOT->SetStyle("ATLAS");
gROOT->ForceStyle();
for (int i=0; i<m_signalHists[0]->GetNdimensions(); i++) {
auto hist_sig = unique_ptr<TH1F>();
auto hist_bkg = unique_ptr<TH1F>();
auto c = TQGridScanStyle::defaultCanvas();
c->cd();
hist_sig = unique_ptr<TH1F>(reinterpret_cast<TH1F*>(m_signalHists[0]->Projection(i)));
hist_bkg = unique_ptr<TH1F>(reinterpret_cast<TH1F*>(m_bkgHists[0]->Projection(i)));
TQGridScanStyle::setStyle(c.get(), hist_sig.get(), tags);
TQGridScanStyle::setStyleInputHists(hist_sig.get(), hist_bkg.get(), tags);
unique_ptr<TLegend> leg (new TLegend(0.65, 0.75, 0.9, 0.9));
leg->AddEntry(hist_sig.get(), "Signal", "F");
leg->AddEntry(hist_bkg.get(), "Total Bkg", "F");
hist_bkg->GetYaxis()->SetTitle("Events");
hist_bkg->Draw("hist");
hist_sig->Draw("histsame");
leg->Draw("same");
TString baseFilePath = tags.getTagStringDefault("plotDirectory", "plots/");
baseFilePath = TQFolder::concatPaths(baseFilePath, "input-observables");
TString obsName = TQFolder::makeValidIdentifier(m_signalHists[0]->GetAxis(i)->GetName());
TString extension = tags.getTagStringDefault("plotFormat","pdf");
TQUtils::ensureDirectory(TQPathManager::getPathManager()->getTargetPath(baseFilePath));
TString outputname = TQFolder::concatPaths(baseFilePath, "inputDistribution_"+obsName);
c->SaveAs( TQPathManager::getPathManager()->getTargetPath(outputname+"."+extension).c_str() );
c->Clear();
hist_sig->Scale(1./hist_sig->GetSumOfWeights());
hist_bkg->Scale(1./hist_bkg->GetSumOfWeights());
double ymax1 = TQHistogramUtils::getHistogramMaximum(2, hist_sig.get(), hist_bkg.get());
hist_sig->GetYaxis()->SetRangeUser(0, ymax1*3/2.);
hist_sig->GetYaxis()->SetTitle("Normalized Events");
hist_sig->Draw("hist");
hist_bkg->Draw("histsame");
leg->Draw("same");
c->SaveAs( TQPathManager::getPathManager()->getTargetPath(outputname+"_norm."+extension).c_str() );
}
gROOT->SetBatch(false);
}
int TQGridScanner::prepare() {
for (auto& obsPair : m_observables) {
auto& obs = obsPair.second;
const auto& bounds = *obs->bounds();
if (bounds) {
const auto boundsType = bounds->which();
if (boundsType == BoundsType::normal) {
const auto& normalBounds = *obs->normalBounds();
const auto& lowerBounds = normalBounds.lower();
const auto& upperBounds = normalBounds.upper();
if (lowerBounds.isFixed() and upperBounds.isFixed()) {
int lowerBin = lowerBounds.range()[0];
int upperBin = upperBounds.range()[0];
obs->setAxesRange(lowerBin, upperBin);
float lowerVal = obs->axis()->GetBinLowEdge(lowerBin);
float upperVal = obs->axis()->GetBinUpEdge(upperBin);
TString info = "Setting a fixed cut on variable '%f";
if (lowerBin == TQGridScanBound::BinExtrema::min) info += " (underflow bin)";
info += " < %s < %f";
if (upperBin == TQGridScanBound::BinExtrema::max) info += " (overflow bin)";
info += "'";
INFOclass(TString::Format(info, lowerVal, obs->name().Data(), upperVal));
} else {
m_nPoints *= lowerBounds.nPoints();
m_nPoints *= upperBounds.nPoints();
m_normalObs.push_back(obs);
m_normalBounds.push_back({0, 0});
m_results.normalObs()->push_back(new TQGridScanNormalObservable(obs->axes(),normalBounds));
}
} else {
const auto splitBounds = *obs->splitBounds();
if (splitBounds.isFixed()) {
m_splitObs.push_back(obs);
m_splitBins.push_back(-1);
} else {
m_nPoints *= splitBounds.nPoints();
m_splitScanObs.push_back(obs);
m_splitScanBins.push_back(-1);
m_results.splitObs()->push_back(new TQGridScanSplitObservable(obs->axes(),splitBounds));
}
}
}
}
return true;
}
void TQGridScanner::run(std::vector<int> chunk) {
DEBUGclass("Running gridscanner for points in range [%d, %d]", chunk[0], chunk[1]);
m_runTimer.Init(m_nPoints);
m_heartbeat = TQUtils::getCurrentTime();
m_nPointsProcessed = 0;
if (m_nPoints > 1) {
INFOclass("Scanning %d points", m_nPoints);
scan(m_normalBounds.begin(), m_normalObs.begin(), chunk);
} else if (m_nPoints == 1) {
WARNclass("No cuts are specified to be scanned in your configuration! However, if you use the cl evaluator you are lucky; the gridscanner will dump the statistics model corresponding to your specified cuts. Maybe you even intended this behavior? Then: well done, Sir!");
TString outFileName = this->getTagStringDefault("cl.workspaceOutFileName", "workspace.root");
m_evaluator->exportWorkspace(outFileName);
}
}
void TQGridScanner::scan(CutBounds::iterator obsValsIter, ObsVec::iterator obsIter, std::vector<int> chunk) {
auto obs = *obsIter;
const auto& bounds = *obs->normalBounds();
const auto lowerBounds = bounds.lower();
const auto lowerBoundsRange = lowerBounds.range();
const auto upperBounds = bounds.upper();
const auto upperBoundsRange = upperBounds.range();
for (auto lowerBin = lowerBoundsRange[0]; lowerBin <= lowerBoundsRange[1]; lowerBin += lowerBoundsRange[2]) {
for (auto upperBin = upperBoundsRange[0]; upperBin <= upperBoundsRange[1]; upperBin += upperBoundsRange[2]) {
if (upperBin == 0) {
obs->setAxesRange(-1, 0);
} else {
obs->setAxesRange(lowerBin, upperBin);
}
obsValsIter->first = lowerBin;
obsValsIter->second = upperBin;
updateHeartbeat();
auto isFinalLevel = (obsIter + 1 == m_normalObs.end());
bool isPointInChunk = false;
if (isFinalLevel) {
if (m_splitScanObs.empty()) {
isPointInChunk = evaluatePoint(chunk);
updateProgress();
}
if (not m_splitScanObs.empty()) {
scanSplit(m_splitScanBins.begin(), m_splitScanObs.begin(), chunk);
} else if (not m_splitObs.empty()) {
if (isPointInChunk) {
auto signif2 = splitSignif2(m_splitBins.begin(), m_splitObs.begin(), false);
auto signif = sqrt(signif2);
m_evaluator->info += TString::Format("FOM=%f", signif);
m_results.points()->emplace_back(m_normalBounds, signif, m_evaluator->info);
}
} else {
if (isPointInChunk) {
auto foms = m_evaluator->evaluateMultiple();
if (foms.empty()) {BREAKclass("No valid figure of merits could be retrieved from evaluator. Please check your configuration and try again!");}
m_results.points()->emplace_back(m_normalBounds, foms, m_evaluator->info);
}
}
} else {
scan(obsValsIter + 1, obsIter + 1, chunk);
}
}
}
}
double TQGridScanner::splitSignif2(SplitCutBins::iterator obsBinsIter, ObsVec::iterator obsIter, bool isScanSplit) {
double signifQuadrature = 0;
auto obs = *obsIter;
const auto bin = *obsBinsIter;
auto isFinalLevel = (obsIter + 1 == (isScanSplit ? m_splitScanObs.end() : m_splitObs.end()));
obs->setAxesRange(BinExtrema::min, bin - 1);
signifQuadrature += splitSignifHandleRecursion(isFinalLevel, isScanSplit, obsBinsIter, obsIter);
obs->setAxesRange(bin, BinExtrema::max);
signifQuadrature += splitSignifHandleRecursion(isFinalLevel, isScanSplit, obsBinsIter, obsIter);
return signifQuadrature;
}
double TQGridScanner::splitSignifHandleRecursion(
bool isFinalLevel,
bool isScanSplit,
SplitCutBins::iterator obsBinsIter,
ObsVec::iterator obsIter
) {
double signifQuadrature = 0;
if (isFinalLevel) {
if (isScanSplit and not m_splitObs.empty()) {
auto signif2 = splitSignif2(m_splitBins.begin(), m_splitObs.begin(), false);
signifQuadrature += signif2;
} else {
double signif = 0;
if (m_evaluator->m_multipleFOMsCompatible) {
auto foms = m_evaluator->evaluateMultiple();
if (foms.empty()) {BREAKclass("No valid figure of merits could be retrieved from evaluator. Please check your configuration and try again!");}
signif = foms[0];
} else {
signif = m_evaluator->evaluate();
}
signifQuadrature += signif*signif;
}
} else {
auto signif2 = splitSignif2(obsBinsIter + 1, obsIter + 1, isScanSplit);
signifQuadrature += signif2;
}
return signifQuadrature;
}
void TQGridScanner::scanSplit(SplitCutBins::iterator obsBinsIter, ObsVec::iterator obsIter, std::vector<int> chunk) {
auto obs = *obsIter;
const auto bound = *(obs->splitBounds());
const auto boundRange = bound.range();
for (auto bin = boundRange[0]; bin <= boundRange[1]; bin += boundRange[2]) {
*obsBinsIter = bin;
updateHeartbeat();
auto isFinalLevel = ((obsIter + 1) == m_splitScanObs.end());
bool isPointInChunk = true;
if (isFinalLevel) {
isPointInChunk = evaluatePoint(chunk);
updateProgress();
if (isPointInChunk) {
auto signif2 = splitSignif2(m_splitScanBins.begin(), m_splitScanObs.begin(), true);
auto signif = sqrt(signif2);
m_evaluator->info += TString::Format("FOM=%f", signif);
std::vector<double> foms = {signif};
m_results.points()->emplace_back(m_normalBounds, m_splitScanBins, foms, m_evaluator->info);
}
} else {
scanSplit(obsBinsIter + 1, obsIter + 1, chunk);
}
}
}
bool TQGridScanner::updateHeartbeat() {
if(m_heartBeatInterval == 0)
return false;
if(m_heartBeatCommand.IsNull())
return false;
unsigned long t = TQUtils::getCurrentTime();
if(m_heartbeat + m_heartBeatInterval > t)
return false;
m_heartbeat = t;
gSystem->Exec(m_heartBeatCommand.Data());
return true;
}
bool TQGridScanner::evaluatePoint(std::vector<int> chunk) {
if (chunk.empty()) {
DEBUGclass("No chunk specified, evaluting point!");
return true;
} else
if (chunk[0] >= m_nPointsProcessed && m_nPointsProcessed < chunk[1]) {
DEBUGclass("Current point within specified chunk, evaluate point!");
return true;
} else {
DEBUGclass("Current point not within specified chunk, don't evaluate point!");
return false;
}
}
void TQGridScanner::updateProgress() {
++m_nPointsProcessed;
m_runTimer.DrawProgressBar(m_nPointsProcessed);
if ( ( (m_nPoints < 10 )|| (m_nPointsProcessed % (m_nPoints / 10) == 0) ) && (m_nPoints > 0)) {
INFOclass("%d percent points processed!", (int)((float)(m_nPointsProcessed)/m_nPoints*100+1));
}
}