#include <iostream>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <vector>
#include <limits>
#include <iterator>
#include "SFramework/TSModelBuilder.h"
#include "RooStats/HistFactory/HistoToWorkspaceFactoryFast.h"
#include "QFramework/TQSampleFolder.h"
#include "QFramework/TQHistogramUtils.h"
#include "QFramework/TQSampleDataReader.h"
#include "QFramework/TQStringUtils.h"
#include "QFramework/TQTaggable.h"
#include "QFramework/TQIterator.h"
#include "QFramework/TQUtils.h"
#include "QFramework/TQPathManager.h"
#include "QFramework/TQLibrary.h"
#include "TSystem.h"
#include "TMath.h"
#include "RooWorkspace.h"
#include "RooMsgService.h"
#include "TStopwatch.h"
ClassImp(TSModelBuilder)
TSModelBuilder::TSModelBuilder() {
}
TSModelBuilder::~TSModelBuilder() {
}
void TSModelBuilder::info(TString message) {
std::cout << "SFramework/TSModelBuilder: " << message.Data() << std::endl;
}
void TSModelBuilder::error(TString message) {
errorCount++;
if(errorCount < maxErrorCount){
info(TQStringUtils::makeBoldRed(TString("ERROR: ") + message));
} else if(errorCount == maxErrorCount){
info(TQStringUtils::makeBoldRed(TString("ERROR: ") + message));
info(TQStringUtils::makeBoldRed("ERROR: too many error messages, suppressing further output"));
}
}
void TSModelBuilder::warn(TString message) {
warnCount++;
if(warnCount < maxErrorCount){
info(TQStringUtils::makeBoldYellow(TString("WARNING: ") + message));
} else if(warnCount == maxErrorCount){
info(TQStringUtils::makeBoldYellow(TString("WARNING: ") + message));
info(TQStringUtils::makeBoldRed("WARNING: too many warning messages, suppressing further output"));
}
}
Bool_t TSModelBuilder::createChannelDefinitions(TQFolder * config, TQFolder * model) {
Bool_t success = true;
TQIterator itr(config->getListOfFolders("Channels/?"), "!.*", true);
if(!itr.hasNext()){
success = false;
error("createChannelDefinitions(...): No channel definitions given!");
} else {
while (itr.hasNext()) {
TString name = itr.readNext()->GetName();
if (!createChannelDefinition(config, model, name)) {
success = false;
error(TString::Format("createChannelDefinitions(...): "
"Failed to create definition for channel '%s'", name.Data()));
}
}
}
return success;
}
Bool_t TSModelBuilder::createChannelDefinition(TQFolder * config, TQFolder * model, TString name) {
TQFolder * channelConfig = config->getFolder(TQFolder::concatPaths("Channels", name));
if (!channelConfig) {
error(TString::Format("createChannelDefinition(...): "
"Configuration for channel '%s' missing", name.Data()));
return false;
}
TQFolder * channelDef = model->getFolder(TString::Format("Channel.%s+", name.Data()));
channelDef->setTagString("Channel",name);
channelDef->importTags(channelConfig);
TQIterator itrSamples(config->getListOfFolders("Samples/?"), "!.*", true);
while (itrSamples.hasNext()) {
TQFolder * sampleConfig = (TQFolder*)itrSamples.readNext();
TString nameSample = sampleConfig->GetName();
TString sampleType = sampleConfig->getTagStringDefault("Type","B");
TString exclude;
if(sampleConfig->getTagString("Exclude",exclude) && TQStringUtils::matchesFilter(name,exclude,",")) continue;
TString include;
if(sampleConfig->getTagString("Include",include) && !TQStringUtils::matchesFilter(name,include,",")) continue;
TQFolder * sampleDef = channelDef->getFolder(TString::Format("Sample.%s+", nameSample.Data()));
sampleDef->setTagString("Sample",nameSample);
sampleDef->setTagString("Type", sampleType);
sampleDef->importTags(sampleConfig);
TQIterator itrElements(sampleConfig->getListOfFolders("?"), "!.*", true);
while (itrElements.hasNext()) {
TQFolder * element = (TQFolder*)itrElements.readNext();
TString nameElement = element->GetName();
TString excludeElement;
if(element->getTagString("Exclude",excludeElement) && TQStringUtils::matchesFilter(name,excludeElement,",")) continue;
TString includeElement;
if(element->getTagString("Include",includeElement) && !TQStringUtils::matchesFilter(name,includeElement,",")) continue;
sampleDef->addObject(element->copy());
}
}
return true;
}
TQFolder * TSModelBuilder::createDefinition(TQFolder * config) {
TString modelName = config->GetName();
TQFolder * model = TQFolder::newFolder("Model");
TQFolder * histos = model->getFolder(".Histograms+");
this->setRepository(histos);
TQFolder * parConfig = config->getFolder("Parameters");
if (parConfig) {
parConfig->getTagString("Name", modelName);
config->getTagString("Name", modelName);
parConfig->setTagString("Name", modelName);
parConfig->exportTags(model);
modelName = parConfig->replaceInText(modelName);
}
modelName = TQFolder::makeValidIdentifier(modelName);
model->SetName(modelName);
info(TString::Format("createDefinition(): Creating definition for model '%s' ...", modelName.Data()));
if (!createChannelDefinitions(config, model)) {
error("createDefinition(): Failed to create model definition");
return NULL;
}
TQFolder * paramSettings = config->getFolder("ParamSettings");
if (paramSettings) {
model->addObject(paramSettings->copy());
}
TQFolderIterator edits(config->getListOfFolders("Edits/?"),true);
while(edits.hasNext()){
TQFolder* edit = edits.readNext();
if(!edit) continue;
this->applyEdits(edit,model);
}
return model;
}
void TSModelBuilder::applyEdits(TQFolder* edit, TQFolder* model){
TString file;
if(edit->getTagString("file",file)){
TString msg;
INFOfunc(TString::Format("applying edits from file '%s'",file.Data()));
if(!model->importFromTextFile(TQPathManager::getPathManager()->findConfigPath(file).c_str(),msg)){
ERRORfunc(msg);
}
}
std::vector<TString> commands = edit->getTagVString("commands");
if (commands.empty() && edit->hasTagString("command")) commands.push_back(edit->getTagStringDefault("command",""));
for (auto com:commands) {
TString msg;
com.ReplaceAll("\n","");
com.ReplaceAll(" ","");
INFOfunc(TString::Format("applying edit command '%s'",com.Data()));
if(!model->importFromText(com,msg)){
ERRORfunc(msg);
}
}
}
Bool_t TSModelBuilder::finalizeModel(TQFolder * model, TQFolder * config) {
if (!model || !config) {
error("finalizeModel(...): Model and/or configuration missing. Stopping ...");
return false;
}
if(config->getTagBoolDefault("collectHistograms",true)){
if (!collectAllHistograms(config, model)) {
error("finalizeModel(...): Failed to collect histograms. Stopping ...");
return false;
}
} else {
info("finalizeModel(...): Skipping histogram collection");
}
if(config->getTagBoolDefault("includeSystematics",true)){
TQFolder* parameters = config->getFolder("Parameters");
TQFolder* systematics = config->getFolder("Systematics");
TList* allSamples = model->getListOfFolders("Channel.*/Sample.*");
std::shared_ptr<TQFolder> empty_parameters = std::make_shared<TQFolder>();
if (parameters == nullptr) {
parameters = empty_parameters.get();
}
bool ok = true;
if (ok && !includeAllSystematics(systematics, allSamples)) {
error("finalizeModel(...): Failed to include systematics. Stopping ...");
ok = false;
}
if(config->getTagBoolDefault("processSystematics",true)){
if (ok && !processAllSystematics(parameters, allSamples)) {
error("finalizeModel(...): Failed to process systematics. Stopping ...");
ok = false;
}
}
delete allSamples;
if(!ok) return false;
}
model->sortByNameRecursive();
return true;
}
TQFolder * TSModelBuilder::buildModel(TQFolder * config) {
TQFolder * model = createDefinition(config);
if (!model) {
error("buildModel(...): Failed to prepare model. Stopping ...");
return NULL;
}
if (!finalizeModel(model, config)) {
error("buildModel(...): Failed to finalize model. Stopping ...");
}
return model;
}
void TSModelBuilder::purgeVariation(TQFolder* model, const TString& name, Bool_t ){
warn(TString::Format("purgeVariation('%s') : removing variation... ",name.Data()));
model->removeObject(TQFolder::concatPaths("Variations",name));
TQIterator itr(model->getListOfFolders("Systematics/?"),true);
while(itr.hasNext()){
TQFolder* f = dynamic_cast<TQFolder*>(itr.readNext());
if(!f) continue;
if((f->getTagStringDefault("Up","") == name) || (f->getTagStringDefault("Down","") == name)){
warn(TString::Format("purgeVariation('%s') : removing systematic '%s'",name.Data(),f->GetName()));
model->deleteObject(TQFolder::concatPaths("Systematics",f->getName()));
}
}
}
Bool_t TSModelBuilder::collectAllHistograms(TQFolder * config, TQFolder * model) {
info("collectAllHistograms(): Start collecting histograms...");
TQFolder* variations = config->getFolder("Variations+");
bool ok = collectHistograms(config, model, variations->getFolder("Nominal+"));
if(!ok) return false;
TQFolderIterator itr(variations->getListOfFolders("?"), "!.*", true);
while (itr.hasNext()) {
TQFolder* variation = itr.readNext();
TString name = variation->GetName();
if (name.CompareTo("Nominal") != 0) {
bool success = collectHistograms(config, model, variation);
if(!success && config->getTagBoolDefault("skipMissingVariations",false)){
this->purgeVariation(config,name,true);
}
}
}
return true;
}
Bool_t TSModelBuilder::parseConversion(TString def, Bool_t &alongX,
Bool_t &includeUnderflowX, Bool_t &includeOverflowX,
Bool_t &includeUnderflowY, Bool_t &includeOverflowY) {
Bool_t uv1 = TQStringUtils::removeLeading(def, "^", 1);
TString axis1;
TQStringUtils::readToken(def, axis1, "XYxy");
Bool_t ov1 = TQStringUtils::removeLeading(def, "^", 1);
if (!TQStringUtils::removeLeading(def, ":", 1)) {
return false;
}
Bool_t uv2 = TQStringUtils::removeLeading(def, "^", 1);
TString axis2;
TQStringUtils::readToken(def, axis2, "XYxy");
Bool_t ov2 = TQStringUtils::removeLeading(def, "^", 1);
if (!def.IsNull()) {
return false;
}
if (axis1.CompareTo("x", TString::kIgnoreCase) == 0
&& axis2.CompareTo("y", TString::kIgnoreCase) == 0) {
alongX = true;
includeUnderflowX = uv1;
includeOverflowX = ov1;
includeUnderflowY = uv2;
includeOverflowY = ov2;
return true;
} else if (axis1.CompareTo("y", TString::kIgnoreCase) == 0
&& axis2.CompareTo("x", TString::kIgnoreCase) == 0) {
alongX = false;
includeUnderflowX = uv2;
includeOverflowX = ov2;
includeUnderflowY = uv1;
includeOverflowY = ov1;
return true;
} else {
return false;
}
}
std::vector<int> TSModelBuilder::getRemapping(TQFolder * channelConfig, TQSampleFolder * refSamples,
TString refPath, TString refHistogram, TQTaggable * refHistogramOptions,
Int_t remapXTo, Int_t remapYTo, Int_t &dim, Bool_t remapSlices) {
TString nameChannel = channelConfig->GetName();
std::vector<int> borders;
TQFolder * remappingConfig = channelConfig->getFolder("Remapping");
if (remappingConfig) {
borders = remappingConfig->getTagVInteger("Borders");
remappingConfig->getTagInteger("Dim", dim);
remappingConfig->getTagInteger("RemapXTo", remapXTo);
if (dim > 1) {
remappingConfig->getTagInteger("RemapYTo", remapYTo);
}
} else {
TQSampleDataReader rd(refSamples);
rd.setApplyStyles(false);
TH1 * h_ref = rd.getHistogram(refPath, refHistogram, refHistogramOptions);
if (h_ref) {
dim = TQHistogramUtils::getDimension(h_ref);
if (remapYTo > 0 && dim < 2) {
error(TString::Format(
"getRemapping(...): Cannot remap Y axis of one dimensional histogram in channel '%s'",
nameChannel.Data()));
delete h_ref;
return borders;
}
if (remapXTo > 0 && remapYTo > 0) {
error(TString::Format(
"getRemapping(...): Cannot remap more than one dimension in channel '%s'",
nameChannel.Data()));
delete h_ref;
return borders;
}
if (dim == 1) {
borders = TQHistogramUtils::getBinBordersFlat(h_ref, remapXTo, true);
} else if (dim == 2) {
if (remapYTo > 0) {
borders = TQHistogramUtils::getBinBordersFlat2D((TH2*)h_ref, remapYTo, false, true, remapSlices);
} else if (remapXTo > 0) {
borders = TQHistogramUtils::getBinBordersFlat2D((TH2*)h_ref, remapXTo, true, true, remapSlices);
}
} else {
error("getRemapping(...): Remapping only supported for one- or two-dimensional histograms");
delete h_ref;
return borders;
}
delete h_ref;
if (borders.size()==0) {
error("getRemapping(...): Somehow failed to determine histogram remapping");
return borders;
}
remappingConfig = channelConfig->getFolder("Remapping+");
for (UInt_t i = 0; i < borders.size(); i++) {
remappingConfig->setTagInteger(TString::Format("i%d", i), borders.at(i));
}
remappingConfig->setTagBool("RemapSlices", remapSlices);
remappingConfig->setTagInteger("Dim", dim);
remappingConfig->setTagInteger("RemapXTo", remapXTo);
if (dim > 1) {
remappingConfig->setTagInteger("RemapYTo", remapYTo);
}
} else {
error(TString::Format("getRemapping(...): Failed to load remapping reference histogram '%s' for channel '%s' from path '%s' with options '%s'",
refHistogram.Data(), nameChannel.Data(), refPath.Data(), refHistogramOptions->exportTagsAsString().Data()));
}
}
return borders;
}
std::vector<int> TSModelBuilder::getRemappingOptimizedSgnf(TQFolder * channelConfig, TQSampleFolder * refSamples,
TString sigPath, TString bkgPath, TString histname, TQTaggable * histogramOptions,
Int_t &dim) {
TString nameChannel = channelConfig->GetName();
if (channelConfig->getTagBoolDefault("verboseRebinningAlgorithm", false)) {
INFO("********************************************************");
INFO(TString::Format("==>> Running rebinning on Channel: %s", nameChannel.Data()));
INFO("*********************************************************");
}
std::vector<int> borders;
double minSignal;
double maxSignal;
double minBkg;
double maxBkgUnc;
double maxSigUnc;
double minBinWidth;
bool significanceAgnostic;
bool mergeBins;
double estimatedMaxSignInBin;
dim = 1;
TQFolder * remappingConfig = channelConfig->getFolder("Remapping");
if (remappingConfig) {
Int_t id = 0;
Int_t value = 0;
while (remappingConfig->getTagInteger(TString::Format("i%d", id++), value)) {
borders.push_back(value);
}
remappingConfig->getTagInteger("Dim", dim);
remappingConfig->getTagDouble("minSignal", minSignal);
remappingConfig->getTagDouble("maxSignal", maxSignal);
remappingConfig->getTagDouble("minBkg", minBkg);
remappingConfig->getTagDouble("maxBkgUnc", maxBkgUnc);
remappingConfig->getTagDouble("maxSigUnc", maxSigUnc);
remappingConfig->getTagBool("significanceAgnostic", significanceAgnostic);
remappingConfig->getTagDouble("estimatedMaxSignInBin", estimatedMaxSignInBin);
remappingConfig->getTagDouble("minBinWidth", minBinWidth);
remappingConfig->getTagBool("mergeBins", mergeBins);
} else {
TQSampleDataReader rd(refSamples);
rd.setApplyStyles(false);
TH1 * hsig = rd.getHistogram(sigPath, histname, histogramOptions);
TH1 * hbkg = rd.getHistogram(bkgPath, histname, histogramOptions);
if (hsig || hbkg) {
dim = TQHistogramUtils::getDimension(hsig);
if (dim > 1) {
error(TString::Format(
"getRemappingOptimizedSgnf(...): Remapping with significance optimization algorithm is not support "
"for >1D histograms!"));
delete hsig;
delete hbkg;
return borders;
}
dim = channelConfig->getTagIntegerDefault("Dim", 1);
minSignal = channelConfig->getTagDoubleDefault("rebinMinSignal", 10);
maxSignal = channelConfig->getTagDoubleDefault("rebinMaxSignal", 30);
minBkg = channelConfig->getTagDoubleDefault("rebinMinBkg", 10);
maxBkgUnc = channelConfig->getTagDoubleDefault("rebinMaxBkgUnc", 0.2);
maxSigUnc = channelConfig->getTagDoubleDefault("rebinMaxSigUnc", 0.8);
minBinWidth = channelConfig->getTagDoubleDefault("rebinMinBinWidth", -1);
mergeBins = channelConfig->getTagBoolDefault("rebinMergeBins", true);
significanceAgnostic = channelConfig->getTagBoolDefault("significanceAgnostic", false);
estimatedMaxSignInBin = channelConfig->getTagDoubleDefault("rebinEstimatedMaxSignInBin", -1);
borders = TQHistogramUtils::getOptimizedBinBorders(hsig, hbkg, minSignal, minBkg, maxBkgUnc,
significanceAgnostic, maxSignal, estimatedMaxSignInBin,
channelConfig->getTagBoolDefault("verboseRebinningAlgorithm", false), maxSigUnc, minBinWidth, mergeBins);
if (borders.size()==0) {
error("getRemappingOptimizedSgnf(...): Somehow failed to determine histogram remapping");
delete hsig;
delete hbkg;
return borders;
}
for (int i=1; i < hsig->GetNbinsX()+1; i++) {
INFO(TString::Format("%f", hsig->GetBinLowEdge(i)));
}
TString info_pre_remap = "{";
if (channelConfig->getTagBoolDefault("verboseRebinningAlgorithm", false)) {
INFO("\n*********************************************************");
INFO("==>> binborders to be used for <rebinXList={BEGIN_of_distribution,..,border1,border2,..,END_of_distribution}>");
for (size_t i=0; i < borders.size(); i++) {
info_pre_remap.Append(TString::Format("%f,", hsig->GetBinLowEdge(borders[i])));
INFO(TString::Format("%f", hsig->GetBinLowEdge(borders[i])));
}
info_pre_remap.Append("}");
INFO("*********************************************************");
}
INFO(info_pre_remap);
delete hsig;
delete hbkg;
remappingConfig = channelConfig->getFolder("Remapping+");
for (UInt_t i = 0; i < borders.size(); i++) {
remappingConfig->setTagInteger(TString::Format("i%d", i), borders.at(i));
}
remappingConfig->setTagInteger("Dim", 1);
remappingConfig->setTagDouble("minSignal", minSignal);
remappingConfig->setTagDouble("maxSignal", maxSignal);
remappingConfig->setTagDouble("minBkg", minBkg);
remappingConfig->setTagDouble("maxBkgUnc", maxBkgUnc);
remappingConfig->setTagBool("significanceAgnostic", significanceAgnostic);
remappingConfig->setTagDouble("estimatedMaxSignInBin", estimatedMaxSignInBin);
} else {
error(TString::Format("getRemappingOptimizedSgnf(...): Failed to load remapping "
"histograms for channel '%s'", nameChannel.Data()));
}
}
return borders;
}
std::vector<int> TSModelBuilder::getMergeBins(TQFolder * config, TQFolder * channelConfig, std::map<TQFolder*,TH1*> histograms, TString varname, Bool_t isNominal, Int_t &dim){
std::vector<int> borders;
TQFolder * mergeBinsConfig = channelConfig->getFolder("MergeBins");
if (mergeBinsConfig) {
Int_t id = 0;
Int_t value = 0;
while (mergeBinsConfig->getTagInteger(TString::Format("i%d", id++), value)) {
borders.push_back(value);
}
mergeBinsConfig->getTagInteger("Dim", dim);
} else {
Double_t minimalBinContent;
Double_t minimalSummedBinContent;
std::vector<int> hardBorders;
std::map< const TString, std::vector<Double_t> > totalBinContent;
std::vector<Double_t> sumBinContent;
minimalBinContent = channelConfig->getTagDoubleDefault("mergeBins.minimalBinContent",0.);
minimalSummedBinContent = channelConfig->getTagDoubleDefault("mergeBins.minimalSummedBinContent",1.);
hardBorders = channelConfig->getTagVInt("mergeBins.hardBorders");
for(auto it:histograms){
std::vector<Double_t> histogramBinContent;
TQFolder* sampleDef = it.first;
TString nameSample = sampleDef->GetName();
TString sampleType = sampleDef->getTagStringDefault("Type","B");
if (!TQStringUtils::removeLeadingText(nameSample, "Sample.")) {
continue;
}
if (!isNominal && sampleType=="D"){
continue;
}
TQFolder * sampleConfig = config->getFolder(TQFolder::concatPaths("Samples", nameSample));
if (!sampleConfig) {
error(TString::Format(
"collectHistograms('%s'): Failed to load configuration for sample '%s'. "
"Skipping this channel ...", varname.Data(), nameSample.Data()));
continue;
}
TH1* histo = it.second;
dim = TQHistogramUtils::getDimension(histo);
if(dim != 1){
throw std::runtime_error("dynamic bin merging currently only supported for one-dimensional histograms!");
}
if (histo->GetNbinsX() <= 1) {
continue;
}
if (sumBinContent.empty()) {
for (UInt_t i = 0; i < (unsigned) histo->GetNbinsX()+1; i++) {
histogramBinContent.push_back(histo->GetBinContent(i));
sumBinContent.push_back(histo->GetBinContent(i));
if (i != (unsigned) histo->GetNbinsX()){
borders.push_back(i);
}
}
totalBinContent[nameSample] = histogramBinContent;
} else {
for (UInt_t i = 0; i < (unsigned) histo->GetNbinsX()+1; i++) {
histogramBinContent.push_back(histo->GetBinContent(i));
sumBinContent[i]+=histo->GetBinContent(i);
}
totalBinContent[nameSample] = histogramBinContent;
}
}
if (!totalBinContent.empty()){
for (UInt_t iBin = totalBinContent.begin()->second.size()-1; iBin > 0; iBin--) {
bool merge = false;
for (auto& sample:totalBinContent){
if (sumBinContent[iBin] < minimalSummedBinContent || sample.second[iBin] < minimalBinContent) {
merge = true;
for (UInt_t iHardBorder = 0; iHardBorder < hardBorders.size(); iHardBorder++) {
if (borders[iBin-1] == hardBorders[iHardBorder]){
merge = false;
break;
}
}
break;
}
}
if (merge == true) {
borders[iBin-1]=-1;
sumBinContent[iBin-1]+=sumBinContent[iBin];
sumBinContent[iBin] = 0;
for (auto& sample:totalBinContent){
sample.second[iBin-1]+=sample.second[iBin];
sample.second[iBin]=0;
}
}
}
}
if (!totalBinContent.empty()){
for (UInt_t iBin = 0; iBin < totalBinContent.begin()->second.size()-1; iBin++) {
bool merge = false;
for (auto& sample:totalBinContent){
if ((sumBinContent[iBin] < minimalSummedBinContent || sample.second[iBin] < minimalBinContent) && sumBinContent[iBin] != 0.) {
merge = true;
for (UInt_t iHardBorder = 0; iHardBorder < hardBorders.size(); iHardBorder++) {
if (borders[iBin] == hardBorders[iHardBorder]){
merge = false;
warn(TString::Format("negative bins still present in sample '%s' in bin '%d', cannot merge!",sample.first.Data(),iBin).Data());
break;
}
}
break;
}
}
if (merge == true) {
borders[iBin]=-1;
sumBinContent[iBin+1]+=sumBinContent[iBin];
sumBinContent[iBin] = 0;
for (auto& sample:totalBinContent){
sample.second[iBin+1]+=sample.second[iBin];
sample.second[iBin]=0;
}
}
}
}
int erasedBins = 0;
unsigned bordersSize = borders.size();
for (UInt_t i = 0; i < bordersSize; i++){
if (borders[i-erasedBins] == -1){
borders.erase(borders.begin()+i-erasedBins);
erasedBins++;
}
}
mergeBinsConfig = channelConfig->getFolder("MergeBins+");
for (UInt_t i = 0; i < borders.size(); i++) {
mergeBinsConfig->setTagInteger(TString::Format("i%d", i), borders.at(i));
}
mergeBinsConfig->setTagInteger("Dim",dim);
}
return borders;
}
Bool_t TSModelBuilder::collectHistograms(TQFolder * config, TQFolder * model, TQFolder* variation) {
if(!variation->hasTag("Variation")) variation->setTagString("Variation",variation->GetName());
bool allowHistogramProcessing = config->getTagBoolDefault("processHistograms",true);
bool useHistoTags = config->getTagBoolDefault("useHistogramTags",true);
TQFolder* fallbackVariation = NULL;
TString fallbackVariationName;
if(variation->getTagString("Fallback",fallbackVariationName)){
fallbackVariation = variation->getBase()->getFolder(fallbackVariationName);
}
TString varname = variation->GetName();
Bool_t isNominal = TQStringUtils::equal("Nominal",variation->GetName());
bool applyStyles = config->getTagBoolDefault("applyStyles", isNominal);
TQFolder* parameters = config->getFolder("Parameters");
std::shared_ptr<TQFolder> empty_parameters = std::make_shared<TQFolder>();
if (parameters == nullptr) {
parameters = empty_parameters.get();
}
info(TString::Format("collectHistograms('%s'): Collecting histograms...", varname.Data()));
TString filterInclude = variation->getTagStringDefault("Include", "*");
TString filterExclude = variation->getTagStringDefault("Exclude", "");
int nHistos = 0;
TQFolderIterator itrChannels(model->getListOfFolders("?"), true);
while (itrChannels.hasNext()) {
TQFolder * channelDef = itrChannels.readNext();
TString nameChannel = channelDef->GetName();
if (!TQStringUtils::removeLeadingText(nameChannel, "Channel.")) {
continue;
}
TQFolder * channelConfig = config->getFolder(TQFolder::concatPaths("Channels", nameChannel));
if (!channelConfig) {
error(TString::Format("collectHistograms('%s'): Failed to load configuration for "
"channel '%s'. Skipping this channel ...", varname.Data(), nameChannel.Data()));
continue;
}
TString name;
bool useCounter = channelConfig->getTagString("Counter",name);
bool useHistogram = channelConfig->getTagString("Histogram",name);
if (useHistogram && useCounter){
error(TString::Format(
"collectHistograms('%s'): Cannot use both histogram and counter for channel '%s'. "
"Skipping this channel ...", varname.Data(), nameChannel.Data()));
continue;
}
channelConfig->getTagBool("useCounter",useCounter);
useHistogram = useHistogram && !useCounter;
if (!useHistogram && !useCounter){
error(TString::Format(
"collectHistograms('%s'): Neither histogram nor counter provided for channel '%s'. "
"Skipping this channel ...", varname.Data(), nameChannel.Data()));
continue;
}
Int_t dim = 0;
Int_t remapXTo = channelConfig->getTagIntegerDefault("RemapXTo", 0);
Int_t remapYTo = channelConfig->getTagIntegerDefault("RemapYTo", 0);
Bool_t remapSlices = channelConfig->getTagBoolDefault("RemapSlices", true);
Bool_t rebinToOptimalSgnf = channelConfig->getTagIntegerDefault("rebinToOptimalSgnf", false);
if (!useCounter && (remapXTo > 0 || remapYTo > 0 || rebinToOptimalSgnf) ) {
TQSampleFolder * samples = getSampleFolder(variation);
if (!samples) {
error(TString::Format("collectHistograms('%s'): Failed to get sample folder",variation->GetName()));
return false;
}
TQSampleDataReader rd(samples);
rd.setApplyStyles(applyStyles);
std::vector<int> borders;
if ( (remapXTo > 0 || remapYTo > 0) && rebinToOptimalSgnf) {
warn(TString::Format(
"collectHistograms('%s'): configured two types of rebinning mechanisms, "
"flat signal rebinning gets preference!", varname.Data()));
rebinToOptimalSgnf = false;
}
TString refPath;
if (channelConfig->getTagString("PathRemapFlat", refPath)) {
refPath = channelConfig->replaceInText(refPath);
refPath = parameters->replaceInText(refPath);
borders = getRemapping(channelConfig, samples, refPath, name,
channelConfig, remapXTo, remapYTo, dim, remapSlices);
} else {
if (!rebinToOptimalSgnf) {
error(TString::Format(
"collectHistograms('%s'): No reference path provided for remapping in channel "
"'%s'. Skipping remapping ...", varname.Data(), nameChannel.Data()));
}
}
if (rebinToOptimalSgnf) {
TString sigPath;
TString bkgPath;
if (!channelConfig->getTagString("rebinSigPath", sigPath) || !channelConfig->getTagString("rebinBkgPath", bkgPath)) {
error(TString::Format(
"collectHistograms('%s'): No valid reference paths for signal and background provided for significance "
"optimized rebinning in channel '%s'. Skipping remapping ...", varname.Data(), nameChannel.Data()));
}
sigPath = channelConfig->replaceInText(sigPath);
sigPath = parameters->replaceInText(sigPath);
bkgPath = channelConfig->replaceInText(bkgPath);
bkgPath = parameters->replaceInText(bkgPath);
borders = getRemappingOptimizedSgnf(channelConfig, samples, sigPath, bkgPath, name,channelConfig, dim);
}
if (borders.size()==0) {
error(TString::Format(
"collectHistograms('%s'): Failed to obtain remapping information for channel "
"'%s'. Skipping remapping ...", varname.Data(), nameChannel.Data()));
} else {
channelDef->setTagList(".Remapping.Bins",borders);
channelDef->setTagBool(".Remapped", true);
}
}
std::map<TQFolder*,TH1*> histograms;
std::map<TQFolder*,TString> finalHistoPaths;
TQFolderIterator itrSamples(channelDef->getListOfFolders("?"), true);
while (itrSamples.hasNext()) {
TQFolder * sampleDef = itrSamples.readNext();
TQTaggable sampleVarTags;
sampleVarTags.importTags(variation->getBase());
sampleVarTags.importTags(sampleDef);
sampleVarTags.importTags(variation);
TQSampleFolder * samples = getSampleFolder(&sampleVarTags);
if (!samples) {
error(TString::Format("collectHistograms('%s'): Failed to get sample folder",variation->GetName()));
return false;
}
TQSampleDataReader rd(samples);
rd.setApplyStyles(applyStyles);
TString nameSample = sampleDef->GetName();
TString sampleType = sampleDef->getTagStringDefault("Type","B");
if (!TQStringUtils::removeLeadingText(nameSample, "Sample.")) {
continue;
}
if (!isNominal && sampleType == "D"){
continue;
}
Bool_t passesInclude = TQStringUtils::matchesFilter(nameChannel + ":" + nameSample, filterInclude, ",", true);
Bool_t passesExclude = TQStringUtils::matchesFilter(nameChannel + ":" + nameSample, filterExclude, ",", true);
if (!passesInclude || passesExclude) {
continue;
}
std::map<TString,TString> paths;
TQTaggable tags;
tags.importTags(sampleDef,true,true);
tags.importTags(parameters,true,true);
TString defaultPath = sampleDef->getTagStringDefault("Path");
tags.importTags(variation,true,true);
paths["path"] = tags.replaceInTextRecursive(defaultPath);
if(fallbackVariation){
tags.importTags(fallbackVariation,true,true);
paths["fallbackPath"] = tags.replaceInTextRecursive(defaultPath);
}
bool hasAlternate = false;
for(auto expr:sampleDef->getTagVString("AlternatePath.selectChannels")){
if(TQStringUtils::matches(nameChannel,expr)){
hasAlternate=true;
}
if(!hasAlternate) break;
TString alternatePath;
hasAlternate = sampleDef->getTagString("AlternatePath",alternatePath);
tags.importTags(variation,true,true);
if(hasAlternate){
alternatePath = tags.replaceInTextRecursive(alternatePath);
paths["alternatePath"] = alternatePath;
if(fallbackVariation){
tags.importTags(fallbackVariation,true,true);
paths["fallbackAlternatePath"] = tags.replaceInTextRecursive(alternatePath);
}
}
}
TQTaggable finalHistoTags(channelConfig);
if(useHistoTags){
sampleDef->exportTags(&finalHistoTags);
TString prefixChannel = TString::Format("forChannel.%s.", nameChannel.Data());
sampleDef->exportTags(&finalHistoTags, "", prefixChannel + "*");
finalHistoTags.renameTags(prefixChannel, "");
TString prefixSample = TString::Format("forSample.%s.", nameSample.Data());
channelConfig->exportTags(&finalHistoTags, "", prefixSample + "*");
finalHistoTags.renameTags(prefixSample, "");
}
std::map<TString,TH1*> histos;
for(auto it:paths){
TString key(it.first);
TString path(it.second);
TString pathToTransferFrom;
TString channelToTransferFrom;
if(variation->getTagString("transferRelativeErrorsFromChannel", channelToTransferFrom)){
TQStringUtils::removeLeading(path, "/");
std::vector<TString> layers = TQStringUtils::tokenizeVector(path, "/");
if ((int)layers.size() > variation->getTagIntegerDefault("channelFolderLayer", 1)) {
layers[variation->getTagIntegerDefault("channelFolderLayer", 1)] = channelToTransferFrom;
} else {
error(TString::Format("Cannot construct path to transfer from for histogram with name '%s' and path '%s'", name.Data(), path.Data()));
}
pathToTransferFrom = TQStringUtils::concat(layers, "/");
}
if (!channelToTransferFrom.IsNull()) finalHistoTags.setTagString("transferRelativeErrorsFromPath", pathToTransferFrom);
TH1 * histo = NULL;
TList* sfList = new TList();
if (useHistogram) {
TString finalHistoName(tags.replaceInTextRecursive(name));
histo = rd.getHistogram(path, finalHistoName, &finalHistoTags,sfList);
} else {
TString finalHistoName(tags.replaceInTextRecursive(name));
TQCounter * cnt = rd.getCounter(path, finalHistoName, &finalHistoTags,sfList);
if (cnt) {
histo = TQHistogramUtils::counterToHistogram(cnt);
if (applyStyles) rd.applyStyleToElement(histo,sfList,&finalHistoTags);
delete cnt;
}
}
delete sfList;
histos[key] = histo;
}
int verbosity = parameters->getTagIntegerDefault("verbosity",0);
TH1* histo = histos["path"];
TH1* althisto = NULL;
if(hasAlternate) althisto = histos["alternatePath"];
TString origpath = paths["path"];
TString path = origpath;
if(!histo && fallbackVariation){
histo = histos["fallbackPath"];
if(hasAlternate) althisto = histos["fallbackAlternatePath"];
TString fallbackPath = paths["fallbackPath"];
if (histo && verbosity>4) warn(TString::Format("collectHistograms('%s'): Failed to load counter '%s' for sample '%s' in channel '%s' from path '%s', using fallback path '%s'", varname.Data(), histo->GetName(), nameSample.Data(), nameChannel.Data(), path.Data(),fallbackPath.Data()));
path = fallbackPath;
}
if(hasAlternate){
std::vector<int> altBins = sampleDef->getTagVInteger("AlternatePath.bins");
if(altBins.size() == 0){
warn(TString::Format("collectHistograms('%s'): Failed to merge alternate histograms for sample '%s' in channel '%s', no bins given!", varname.Data(), nameSample.Data(), nameChannel.Data()));
} else {
if(!TQHistogramUtils::replaceBins(histo,althisto,altBins)){
warn(TString::Format("collectHistograms('%s'): Failed to merge alternate histograms for sample '%s' in channel '%s', histograms incompatible!", varname.Data(), nameSample.Data(), nameChannel.Data()));
} else {
info(TString::Format("collectHistograms('%s'): Merged alternate histogram in bins %s for sample '%s' in channel '%s'!", varname.Data(), TQStringUtils::concat(altBins,",").Data(), nameSample.Data(), nameChannel.Data()));
}
}
delete althisto;
}
if (!histo) {
error(TString::Format("collectHistograms('%s'): Failed to load histogram '%s' for sample '%s' in channel '%s'. Path given was %s", varname.Data(), name.Data(), nameSample.Data(), nameChannel.Data(), (fallbackVariation ? origpath + " or " + path : origpath).Data()));
continue;
}
if (isNominal) {
sampleDef->setTagString(".HistoDetails.Original",
TQHistogramUtils::getDetailsAsString(histo));
sampleDef->setTagString(".HistoFromPath", path);
sampleDef->setTagDouble(".integral.nominal",TQHistogramUtils::getIntegral(histo,true) );
sampleDef->setTagDouble("Bins",histo->GetNbinsX()*histo->GetNbinsY()*histo->GetNbinsZ());
sampleDef->importTags(finalHistoTags);
}
TString histoSamplePath = TQFolder::concatPaths(varname, nameChannel);
histo->SetName(nameSample);
histograms[sampleDef]=histo;
finalHistoPaths[sampleDef]=path;
}
if (!useCounter) {
Bool_t mergeBinsManual = channelConfig->getTagBoolDefault("mergeBinsManual", false);
Bool_t mergeBins = channelConfig->getTagBoolDefault("mergeBins", false);
std::vector<int> borders;
if(mergeBinsManual && mergeBins){
error(TString::Format("Cannot both merge bins manually and automatically, refusing to do either!"));
} else if(mergeBins) {
if (remapXTo > 0 || remapYTo > 0){
error(TString::Format("Cannot both remap and merge bins! Remap will be skipped!"));
}
borders = getMergeBins(config, channelConfig, histograms, varname, isNominal, dim);
channelDef->setTagList(".Remapping.Bins",borders);
channelDef->setTagBool(".Remapped", true);
} else if(mergeBinsManual){
borders = channelConfig->getTagVInt("mergeBinsManual.borders");
channelDef->setTagList(".Remapping.Bins",borders);
channelDef->setTagBool(".Remapped", true);
}
}
TString histoSamplePath = TQFolder::concatPaths(varname, nameChannel);
int nDim = -1;
for(auto it:histograms){
TQFolder* sampleDef = it.first;
TString nameSample = sampleDef->GetName();
TString sampleType = sampleDef->getTagStringDefault("Type","B");
if (!TQStringUtils::removeLeadingText(nameSample, "Sample.")) {
continue;
}
if (!isNominal && sampleType == "D"){
continue;
}
TH1* histo = it.second;
if(allowHistogramProcessing){
TH1* newhisto = processHistogram(config,sampleDef,histo);
if(newhisto){
TQFolder* target = this->repository()->getFolder(histoSamplePath+"+");
TQFolder* before = target->getFolder(".BeforeProcessing+");
sampleDef->setTagString(".Histogram.BeforeProcessing",TQFolder::concatPaths(before->getPath(),histo->GetName()));
if(isNominal){
sampleDef->setTagDouble(".integral.nominal",TQHistogramUtils::getIntegral(newhisto,true) );
}
target->Remove(histo);
before->addObject(histo);
histo = newhisto;
}
}
TQFolder* histoFolder = this->repository()->getFolder(histoSamplePath+"+");
histoFolder->setTagString(TString::Format("Path.%s",histo->GetName()),finalHistoPaths[sampleDef].Data());
if (!histoFolder->addObject(histo)){
error(TString::Format("collectHistograms('%s'): Failed to store histogram", varname.Data()));
} else {
nHistos++;
}
if (isNominal) {
it.first->setTagString("Histo", TQFolder::concatPaths(histoSamplePath, nameSample));
}
if (applyStyles && !sampleDef->hasMatchingTag("style.*")) {
TQHistogramUtils::extractStyle(histo, sampleDef, "/");
}
int thisdim = TQHistogramUtils::getDimension(histo);
if(thisdim == 1 && histo->GetNbinsX() == 1) thisdim = 0;
if(nDim < 0){
nDim = thisdim;
} else if(nDim != thisdim){
error(TString::Format("collectHistograms('%s'): Inconsistent dimensions of histogram!", varname.Data()));
}
}
channelDef->setTagInteger("nDim",nDim);
}
return nHistos>0;
}
TH1* TSModelBuilder::processHistogram(TQFolder* config, TQFolder* sampleDef, const TH1* orighisto){
TQFolder* parameters = config->getFolder("Parameters");
std::shared_ptr<TQFolder> empty_parameters = std::make_shared<TQFolder>();
if (parameters == nullptr) {
parameters = empty_parameters.get();
}
TH1* histo = NULL;
TString nameSample = sampleDef->GetName();
Double_t minBinContent = parameters->getTagDoubleDefault("MinBinContent", -1.);
bool ignoreNegative = parameters->getTagBoolDefault("ignoreNegative", false);
bool flipToPositive = parameters->getTagBoolDefault("flipToPositive", false);
bool assignUncertainty = parameters->getTagBoolDefault("assignUncertainty", true);
bool remap = parameters->getTagBoolDefault("remap", true);
bool ensureAbsMinimumContent = sampleDef->getTagBoolDefault("ensureAbsMinimumContent", false);
std::vector<int> borders = sampleDef->getTagVInteger("~.Remapping.Bins");
int dim = sampleDef->getTagIntegerDefault("~dim",1);
Int_t remapXTo = sampleDef->getTagIntegerDefault("~RemapXTo", 0);
Int_t remapYTo = sampleDef->getTagIntegerDefault("~RemapYTo", 0);
if (borders.size()!=0) {
TH1* h_remapped = NULL;
if (dim == 1) {
h_remapped = TQHistogramUtils::getRebinned(orighisto, borders, remap);
} else if (dim == 2) {
if (remapYTo > 0) {
h_remapped = TQHistogramUtils::getRemapped2D((TH2*)orighisto, borders, false);
} else if (remapXTo > 0) {
h_remapped = TQHistogramUtils::getRemapped2D((TH2*)orighisto, borders, true);
}
}
if (h_remapped) {
histo = h_remapped;
} else {
warn(TString::Format("processHistograms(): Somehow failed to remap %d-dim histogram '%s' into %zu bins ", dim, orighisto->GetName(), (borders.size()-1)));
histo = (TH1*) orighisto->Clone();
histo->SetDirectory(orighisto->GetDirectory());
}
}
Int_t dimHisto = TQHistogramUtils::getDimension(histo);
TString convertTo1DDef = sampleDef->getTagStringDefault("~convertTo1D");
if (dimHisto == 2 && !convertTo1DDef.IsNull()) {
Bool_t alongX = false;
Bool_t inclUVX = false;
Bool_t inclOVX = false;
Bool_t inclUVY = false;
Bool_t inclOVY = false;
if (!parseConversion(convertTo1DDef, alongX, inclUVX, inclOVX, inclUVY, inclOVY)) {
warn(TString::Format(
"collectHistograms(): Invalid histogram conversion to 1D '%s'",
convertTo1DDef.Data()));
} else {
TH1 * h_converted = TQHistogramUtils::convertTo1D(
(TH2*)histo, alongX, inclUVX, inclOVX, inclUVY, inclOVY);
if (h_converted) {
sampleDef->setTagBool(".ConvertedTo1D", true);
if(histo) delete histo;
histo = h_converted;
} else {
warn("collectHistograms(): Somehow failed to convert histogram to 1D");
}
}
}
std::vector<int> onlyBins = sampleDef->getTagVInteger("onlyBins");
if(onlyBins.size() > 0){
if(!histo) histo = TQHistogramUtils::copyHistogram(orighisto);
for(int i=0; i<histo->GetNbinsX()+2; ++i){
bool keep = false;
for(size_t j=0; j<onlyBins.size(); ++j){
if(i == onlyBins[j]) keep = true;
}
if(!keep){
histo->SetBinContent(i,0);
histo->SetBinError(i,0);
}
}
}
std::vector<int> onlyBinsX = sampleDef->getTagVInteger("onlyBinsX");
std::vector<int> onlyBinsY = sampleDef->getTagVInteger("onlyBinsY");
if(onlyBinsX.size() > 0 || onlyBinsY.size() > 0){
if(!histo) histo = TQHistogramUtils::copyHistogram(orighisto);
for(int i=0; i<histo->GetXaxis()->GetNbins()+2; ++i){
for(int j=0; j<histo->GetYaxis()->GetNbins()+2; ++j){
bool keep = false;
for(size_t k=0; k<onlyBinsX.size(); ++k){
if(i == onlyBinsX[k]) keep = true;
}
for(size_t k=0; k<onlyBinsY.size(); ++k){
if(j == onlyBinsY[k]) keep = true;
}
if(!keep){
histo->SetBinContent(i,j,0);
histo->SetBinError(i,j,0);
}
}
}
}
if (minBinContent >= 0.){
if(!histo) histo = TQHistogramUtils::copyHistogram(orighisto);
if(ensureAbsMinimumContent==true){
TQHistogramUtils::ensureAbsMinimumBinContent(histo, minBinContent);
info(TString::Format("processHistogram(): ensureAbsMinimumContent on sample: '%s'", nameSample.Data()));
} else {
TQHistogramUtils::ensureMinimumBinContent(histo, minBinContent, ignoreNegative, flipToPositive, assignUncertainty);
}
}
double averageWeight = 0;
if(sampleDef->getTag("~inflateStatUncertaintyLowYield.averageWeight",averageWeight)) {
if(!histo) histo = TQHistogramUtils::copyHistogram(orighisto);
TQHistogramUtils::ensureMinimumBinError(histo,averageWeight);
}
if(config->getTagBoolDefault("warnings.dips",true) && TQHistogramUtils::getNDips(histo) > 0) {
warn(TString::Format("processHistogram(): Histogram for sample '%s' in channel"
" '%s' has dips. Please consider using less bins.", sampleDef->getTagStringDefault("~Sample","<unknown>").Data(),sampleDef->getTagStringDefault("~Channel","<unknown>").Data()));
}
return histo;
}
void TSModelBuilder::applyStyle(TQFolder* model, const TString& samplename, TH1* hist){
TQFolderIterator itr(model->getListOfFolders(TQFolder::concatPaths("?","Sample."+samplename)),true);
while(itr.hasNext()){
TQFolder* sample = itr.readNext();
if(sample->hasMatchingTag("style.*")){
TQHistogramUtils::applyStyle(hist,sample,"/");
return;
}
}
}