Skip to content

h5 does not contain key 'text/bert' #8

Description

@dafei-qin

Thanks for sharing the data! I found some annoying errors that might be caused by the data.

When I run this:

from data import Data
from tqdm import tqdm
common_kwargs = dict(path2data = 'data',
                     speaker = ['bee'],
                     modalities = ['pose/data', 'audio/log_mel_512', 'text/bert'],
                     fs_new = [15, 15, 15],
                     batch_size = 4,
                     window_hop = 5)
data = Data(**common_kwargs)

It raises an error below. Seems that the file data/processed/bee/cmu0000025765.h5 does not contain the text/bertpart. By further investigation, I found many of the h5 in bee lost their text/bert part.

G:\PATS\pats\data\dataUtils.py:134: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  self.df = self.df.append(pd.read_csv((Path(self.path2data)/'cmu_intervals_df_transforms.csv').as_posix())) ## file with evil twins
  0%|                                                                                          | 0/576 [00:00<?, ?it/s]
data/processed/bee/cmu0000025765.h5 text/bert
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File G:\PATS\pats\data\dataUtils.py:485, in MiniData.__init__(self, path2h5, modalities, fs_new, time, modality_classes, window_hop, style, repeat_text, text_in_modalities, filler, **kwargs)
    484 try:
--> 485   data, h5 = self.load(self.path2h5, modality)
    486 except:

File G:\PATS\pats\data\common.py:39, in HDF5.load(filename, key)
     36 # if key not in h5:
     37 #   print(f"{key} not in P{filename}!")
     38 #   return [], h5
---> 39 data = h5[key]
     40 return data, h5

File h5py\_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py\_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File ~\anaconda3\envs\pats\lib\site-packages\h5py\_hl\group.py:305, in Group.__getitem__(self, name)
    304 elif isinstance(name, (bytes, str)):
--> 305     oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
    306 else:

File h5py\_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py\_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File h5py\h5o.pyx:190, in h5py.h5o.open()

KeyError: 'Unable to open object (component not found)'

During handling of the above exception, another exception occurred:

SystemExit                                Traceback (most recent call last)
    [... skipping hidden 1 frame]

Input In [3], in <cell line: 1>()
----> 1 data = Data(**common_kwargs)



File G:\PATS\pats\data\dataUtils.py:155, in Data.__init__(self, path2data, speaker, modalities, fs_new, time, split, batch_size, shuffle, num_workers, window_hop, load_data, style_iters, num_training_sample, sample_all_styles, repeat_text, quantile_sample, quantile_num_training_sample, weighted, filler, num_training_iters)
    153 #if self.load_data:
    154 ## get train-dev-test split
--> 155 self.datasets = self.tdt_split()
    156 self.dataLoader_kwargs = {'batch_size':batch_size,
    157                           'shuffle':shuffle,
    158                           'num_workers':num_workers,
    159                           'pin_memory':False}

File G:\PATS\pats\data\dataUtils.py:292, in Data.tdt_split(self)
    290 self.test_intervals = test_intervals
--> 292 dataset_train = ConcatDatasetIndex(self.get_minidata_list(train_intervals))
    293 dataset_dev = ConcatDatasetIndex(self.get_minidata_list(dev_intervals))

File G:\PATS\pats\data\dataUtils.py:247, in Data.get_minidata_list(self, intervals)
    246 def get_minidata_list(self, intervals):
--> 247   return [MiniData(self.getPath2file(interval_id), style=self.getStyle(interval_id), **self.minidataKwargs)
    248                                  for interval_id in tqdm(intervals)]

File G:\PATS\pats\data\dataUtils.py:247, in <listcomp>(.0)
    246 def get_minidata_list(self, intervals):
--> 247   return [MiniData(self.getPath2file(interval_id), style=self.getStyle(interval_id), **self.minidataKwargs)
    248                                  for interval_id in tqdm(intervals)]

File G:\PATS\pats\data\dataUtils.py:491, in MiniData.__init__(self, path2h5, modalities, fs_new, time, modality_classes, window_hop, style, repeat_text, text_in_modalities, filler, **kwargs)
    488   # print(f"{modality} not in {self.path2h5}")
    489   # return
--> 491   sys.exit(1)
    493 self.shapes.append(data.shape)

SystemExit: 1

During handling of the above exception, another exception occurred:

AssertionError                            Traceback (most recent call last)
    [... skipping hidden 1 frame]

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\interactiveshell.py:1972, in InteractiveShell.showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
   1969 if exception_only:
   1970     stb = ['An exception has occurred, use %tb to see '
   1971            'the full traceback.\n']
-> 1972     stb.extend(self.InteractiveTB.get_exception_only(etype,
   1973                                                      value))
   1974 else:
   1975     try:
   1976         # Exception classes can customise their traceback - we
   1977         # use this in IPython.parallel for exceptions occurring
   1978         # in the engines. This should return a list of strings.

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:578, in ListTB.get_exception_only(self, etype, value)
    570 def get_exception_only(self, etype, value):
    571     """Only print the exception type and message, without a traceback.
    572 
    573     Parameters
   (...)
    576     value : exception value
    577     """
--> 578     return ListTB.structured_traceback(self, etype, value)

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:436, in ListTB.structured_traceback(self, etype, evalue, etb, tb_offset, context)
    433     chained_exc_ids.add(id(exception[1]))
    434     chained_exceptions_tb_offset = 0
    435     out_list = (
--> 436         self.structured_traceback(
    437             etype, evalue, (etb, chained_exc_ids),
    438             chained_exceptions_tb_offset, context)
    439         + chained_exception_message
    440         + out_list)
    442 return out_list

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:1105, in AutoFormattedTB.structured_traceback(self, etype, value, tb, tb_offset, number_of_lines_of_context)
   1103 else:
   1104     self.tb = tb
-> 1105 return FormattedTB.structured_traceback(
   1106     self, etype, value, tb, tb_offset, number_of_lines_of_context)

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:999, in FormattedTB.structured_traceback(self, etype, value, tb, tb_offset, number_of_lines_of_context)
    996 mode = self.mode
    997 if mode in self.verbose_modes:
    998     # Verbose modes need a full traceback
--> 999     return VerboseTB.structured_traceback(
   1000         self, etype, value, tb, tb_offset, number_of_lines_of_context
   1001     )
   1002 elif mode == 'Minimal':
   1003     return ListTB.get_exception_only(self, etype, value)

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:852, in VerboseTB.structured_traceback(self, etype, evalue, etb, tb_offset, number_of_lines_of_context)
    850 """Return a nice text document describing the traceback."""
    851 assert etb is not None
--> 852 formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,
    853                                                        tb_offset)
    855 colors = self.Colors  # just a shorthand + quicker name lookup
    856 colorsnormal = colors.Normal  # used a lot

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:786, in VerboseTB.format_exception_as_a_whole(self, etype, evalue, etb, number_of_lines_of_context, tb_offset)
    784 assert isinstance(tb_offset, int)
    785 head = self.prepare_header(etype, self.long_header)
--> 786 records = self.get_records(etb, number_of_lines_of_context, tb_offset)
    788 frames = []
    789 skipped = 0

File ~\anaconda3\envs\pats\lib\site-packages\IPython\core\ultratb.py:840, in VerboseTB.get_records(self, etb, number_of_lines_of_context, tb_offset)
    834 options = stack_data.Options(
    835     before=before,
    836     after=after,
    837     pygments_formatter=formatter,
    838 )
    839 assert etb is not None
--> 840 return list(stack_data.FrameInfo.stack_data(etb, options=options))[tb_offset:]

File ~\anaconda3\envs\pats\lib\site-packages\stack_data\core.py:546, in FrameInfo.stack_data(cls, frame_or_tb, options, collapse_repeated_frames)
    530 @classmethod
    531 def stack_data(
    532         cls,
   (...)
    536         collapse_repeated_frames: bool = True
    537 ) -> Iterator[Union['FrameInfo', RepeatedFrames]]:
    538     """
    539     An iterator of FrameInfo and RepeatedFrames objects representing
    540     a full traceback or stack. Similar consecutive frames are collapsed into RepeatedFrames
   (...)
    544     and optionally an Options object to configure.
    545     """
--> 546     stack = list(iter_stack(frame_or_tb))
    548     # Reverse the stack from a frame so that it's in the same order
    549     # as the order from a traceback, which is the order of a printed
    550     # traceback when read top to bottom (most recent call last)
    551     if is_frame(frame_or_tb):

File ~\anaconda3\envs\pats\lib\site-packages\stack_data\utils.py:98, in iter_stack(frame_or_tb)
     96 while frame_or_tb:
     97     yield frame_or_tb
---> 98     if is_frame(frame_or_tb):
     99         frame_or_tb = frame_or_tb.f_back
    100     else:

File ~\anaconda3\envs\pats\lib\site-packages\stack_data\utils.py:91, in is_frame(frame_or_tb)
     90 def is_frame(frame_or_tb: Union[FrameType, TracebackType]) -> bool:
---> 91     assert_(isinstance(frame_or_tb, (types.FrameType, types.TracebackType)))
     92     return isinstance(frame_or_tb, (types.FrameType,))

File ~\anaconda3\envs\pats\lib\site-packages\stack_data\utils.py:172, in assert_(condition, error)
    170 if isinstance(error, str):
    171     error = AssertionError(error)
--> 172 raise error

AssertionError:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions