-
Notifications
You must be signed in to change notification settings - Fork 5
Fastpuppi v1 dask integration #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: fastpuppi_v1_dask
Are you sure you want to change the base?
Changes from all commits
6c0076a
5779497
f59679c
4ebe318
3283616
f272de1
e8857db
b903552
86c7c54
6c88a4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,6 @@ | ||
| import awkward as ak | ||
| import hist.dask as dah | ||
| import hist | ||
| from hist import Hist | ||
|
|
||
| import awkward as ak | ||
|
|
||
| def TH1F(name, title, nbins, bin_low, bin_high): | ||
| b_axis_name = 'X' | ||
|
|
@@ -10,12 +9,13 @@ def TH1F(name, title, nbins, bin_low, bin_high): | |
| b_axis_name = title_split[1] | ||
| b_name = title_split[0] | ||
| b_label = name | ||
| return Hist( | ||
| hist.axis.Regular(bins=nbins, start=bin_low, stop=bin_high, name=b_axis_name), | ||
| label=b_label, | ||
| name=b_name, | ||
| storage=hist.storage.Weight() | ||
| ) | ||
|
|
||
| return hist.dask.Hist( | ||
| hist.axis.Regular(bins=nbins, start=bin_low, stop=bin_high, name=b_axis_name), | ||
| label=b_label, | ||
| name=b_name, | ||
| storage=hist.storage.Weight() | ||
| ) | ||
|
|
||
| def TH2F(name, title, x_nbins, x_bin_low, x_bin_high, y_nbins, y_bin_low, y_bin_high): | ||
| b_x_axis_name = 'X' | ||
|
|
@@ -27,51 +27,32 @@ def TH2F(name, title, x_nbins, x_bin_low, x_bin_high, y_nbins, y_bin_low, y_bin_ | |
| b_y_axis_name = title_split[2] | ||
| b_name = title_split[0] | ||
| b_label = name | ||
| return Hist( | ||
| hist.axis.Regular(bins=x_nbins, start=x_bin_low, stop=x_bin_high, name=b_x_axis_name), | ||
| hist.axis.Regular(bins=y_nbins, start=y_bin_low, stop=y_bin_high, name=b_y_axis_name), | ||
| label=b_label, | ||
| name=b_name, | ||
| storage=hist.storage.Weight() | ||
| ) | ||
|
|
||
|
|
||
| def TH2F_category(name, title, x_categories, y_nbins, y_bin_low, y_bin_high): | ||
| b_x_axis_name = 'X' | ||
| b_y_axis_name = 'Y' | ||
| title_split = title.split(';') | ||
| if len(title_split) > 1: | ||
| b_x_axis_name = title_split[1] | ||
| if len(title_split) > 2: | ||
| b_y_axis_name = title_split[2] | ||
| b_name = title_split[0] | ||
| b_label = name | ||
| return Hist( | ||
| hist.axis.StrCategory(x_categories, name=b_x_axis_name), | ||
| hist.axis.Regular(bins=y_nbins, start=y_bin_low, stop=y_bin_high, name=b_y_axis_name), | ||
| label=b_label, | ||
|
|
||
| return hist.dask.Hist( | ||
| hist.axis.Regular(bins=x_nbins, start=x_bin_low, stop=x_bin_high, name=b_x_axis_name), | ||
| hist.axis.Regular(bins=y_nbins, start=y_bin_low, stop=y_bin_high, name=b_y_axis_name), | ||
| label=b_label, | ||
| name=b_name, | ||
| storage=hist.storage.Weight() | ||
| ) | ||
|
|
||
|
|
||
| def fill_1Dhist(hist, array, weights=None): | ||
| flar = ak.drop_none(ak.flatten(array)) | ||
|
|
||
| if weights is None: | ||
| hist.fill(flar, threads=None) | ||
| # ROOT.fill_1Dhist(hist=hist, array=flar) | ||
| else: | ||
| hist.fill(flar, weights) | ||
| # ROOT.fill_1Dhist(hist=hist, array=flar, weights=weights) | ||
|
|
||
GintasS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def fill_2Dhist(hist, arrayX, arrayY, weights=None): | ||
| flar_x = ak.drop_none(ak.flatten(arrayX)) | ||
| flar_y = ak.drop_none(ak.flatten(arrayY)) | ||
|
|
||
| if weights is None: | ||
| # ROOT.fill_2Dhist(hist=hist, arrayX=flar_x, arrayY=flar_y) | ||
| hist.fill(flar_x, flar_y, threads=None) | ||
| else: | ||
| # ROOT.fill_2Dhist(hist=hist, arrayX=flar_x, arrayY=flar_y, weights=weights) | ||
| hist.fill(flar_x, flar_y, weights) | ||
|
|
||
| hist.fill(flar_x, flar_y, weights) | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing new-line at the end of file?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what do you mean? |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,9 @@ | |
| import awkward as ak | ||
| import vector | ||
|
|
||
| import coffea | ||
| from coffea.nanoevents import NanoEventsFactory, NanoAODSchema, BaseSchema | ||
|
|
||
| vector.register_awkward() | ||
|
|
||
| class TreeReader: | ||
|
|
@@ -94,28 +97,40 @@ def getDataFrame(self, prefix, entry_block, fallback=None): | |
| if br.startswith(f'{prefix}_') and | ||
| br != f'{prefix}_n'] | ||
| names = ['_'.join(br.split('_')[1:]) for br in branches] | ||
| name_map = dict(zip(names, branches, strict=False)) | ||
| name_map = dict(zip(names, branches)) | ||
| if len(branches) == 0: | ||
| if fallback is not None: | ||
| return self.getDataFrame(prefix=fallback, entry_block=entry_block) | ||
| prefs = set([br.split('_')[0] for br in self._branches]) | ||
| print(f'stored branch prefixes are: {prefs}') | ||
| raise ValueError(f'[TreeReader::getDataFrame] No branches with prefix: {prefix}') | ||
|
|
||
| akarray = self.tree.arrays(names, | ||
| library='ak', | ||
| aliases=name_map, | ||
| entry_start=self.file_entry, | ||
| entry_stop=self.file_entry+entry_block) | ||
| dask_akarray = NanoEventsFactory.from_root( | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this still up to date? did you try avoiding the loopo over the files opening all of them at once moving the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now it is up-to-date. |
||
| self.tree, | ||
| schemaclass=NanoAODSchema).events() | ||
|
|
||
| #akarray = self.tree.arrays(names, | ||
| #library='ak', | ||
| #aliases=name_map, | ||
| #entry_start=self.file_entry, | ||
| #entry_stop=self.file_entry+entry_block) | ||
| #print("[0] prefix to select: ", prefix) | ||
|
|
||
| dask_akarray = dask_akarray[prefix] | ||
| #print("[1] Selected fields from prefix", dask_akarray.fields) | ||
|
|
||
| dask_akarray = dask_akarray[names] | ||
|
|
||
| #print("[2] specific fields with names", dask_akarray.fields) | ||
| dask_akarray = dask_akarray[self.file_entry : self.file_entry + entry_block] | ||
|
|
||
| # print(akarray) | ||
| records = {} | ||
| for field in akarray.fields: | ||
| records[field] = akarray[field] | ||
| for field in dask_akarray.fields: | ||
| records[field] = dask_akarray[field] | ||
|
|
||
| if 'pt' in names and 'eta' in names and 'phi' in names: | ||
| if 'mass' not in names and 'energy' not in names: | ||
| records['mass'] = 0.*akarray['pt'] | ||
| records['mass'] = 0.*dask_akarray['pt'] | ||
| return vector.zip(records) | ||
|
|
||
| return ak.zip(records) | ||
|
|
@@ -124,5 +139,4 @@ def getDataFrame(self, prefix, entry_block, fallback=None): | |
| # ele_rec = ak.zip({'pt': tkele.pt, 'eta': tkele.eta, 'phi': tkele.phi}, with_name="pippo") | ||
| # this would allow to handle the records and assign behaviours.... | ||
|
|
||
| # return akarray | ||
|
|
||
| # return akarray | ||
Uh oh!
There was an error while loading. Please reload this page.