Hi @timy90022 ,
Thanks for sharing your excellent work.
When I try to understand the implementation of Eq(6), I find a bit difference between the implementation and the Eq(6) in the paper.
def exclude_func_and_ratio(self):
# instance-level weight
bg_ind = self.n_c
weight = (self.gt_classes != bg_ind)
gt_classes = self.gt_classes[weight]
# exclude_ratio = \mu_{f_j}}
exclude_ratio = torch.mean((self.freq_info[gt_classes] < self.lambda_).float())
# weight = E(r)
weight = weight.float().view(self.n_i, 1).expand(self.n_i, self.n_c)
return weight, exclude_ratio
def threshold_func(self):
# class-level weight
weight = self.pred_class_logits.new_zeros(self.n_c)
# weight = T_{lambda}(f_j)
weight[self.freq_info < self.lambda_] = 1
weight = weight.view(1, self.n_c).expand(self.n_i, self.n_c)
# fg = E(r)
# ratio = \mu_{f_j}
fg, ratio = self.exclude_func_and_ratio()
# bg = 1 - E(r)
bg = 1 - fg
# random = (1-E(r)) * rand
random = torch.rand_like(bg) * bg
random = torch.where(random>ratio, torch.ones_like(random), torch.zeros_like(random))
# weight = { [ (1-E(r)) * rand > \mu_{f_j} ? 1 : 0 ] + E(r) } * T_{\lambda}(f_j)
weight = (random + fg) * weight
return weight
def drop_loss(self):
self.n_i, self.n_c = self.pred_class_logits.size()
def expand_label(pred, gt_classes):
target = pred.new_zeros(self.n_i, self.n_c + 1)
target[torch.arange(self.n_i), gt_classes] = 1
return target[:, :self.n_c]
target = expand_label(self.pred_class_logits, self.gt_classes)
# drop_w = 1 - { [ (1-E(r)) * rand > \mu_{f_j} ? 1 : 0 ] + E(r) } * T_{\lambda}(f_j) * (1-y_j)
# When E(r) = 1, drop_w = 1 - T_{\lambda}(f_j) * (1-y_j)
# When E(r) = 0, drop_w = 1 - { [ (1-E(r)) * rand > \mu_{f_j} ? 1 : 0 ] } * T_{\lambda}(f_j) * (1-y_j) ????
# when rand > \mu_{f_j}, drop_w = 1 - T_{\lambda}(f_j) * (1-y_j)
# when rand <= \mu_{f_j}, drop_w = 1
self.drop_w = 1 - self.threshold_func() * (1 - target)
self.cls_loss = F.binary_cross_entropy_with_logits(self.pred_class_logits, target,
reduction='none')
return torch.sum(self.cls_loss * self.drop_w) / self.n_i
# When E(r) = 0, drop_w = 1 - [ rand > \mu_{f_j} ? 1 : 0 ] * T_{\lambda}(f_j) * (1-y_j)
# when rand > \mu_{f_j}, drop_w = 1 - T_{\lambda}(f_j) * (1-y_j)???????
# when rand <= \mu_{f_j}, drop_w = 1
The implementation is inconsistent with the otherwise condition in Eq(6) .
drop_w = 1 - T_{\lambda}(f_j) * (1-y_j)
Could you please explain that?
Is there any misunderstanding about the code?
Hi @timy90022 ,
Thanks for sharing your excellent work.
When I try to understand the implementation of Eq(6), I find a bit difference between the implementation and the Eq(6) in the paper.
The implementation is inconsistent with the otherwise condition in Eq(6) .
Could you please explain that?
Is there any misunderstanding about the code?