diff --git a/demo/notebooks/inter_annotator_agreement.ipynb b/demo/notebooks/inter_annotator_agreement.ipynb index c8404e4..f4538f9 100644 --- a/demo/notebooks/inter_annotator_agreement.ipynb +++ b/demo/notebooks/inter_annotator_agreement.ipynb @@ -219,10 +219,8 @@ "metadata": {}, "cell_type": "markdown", "source": [ - "As the confusion matrix shows for the calculation of Scott's pi, only the annotations from annotation collection 1\n", - "with the tag 'process_event' have been taken into account.\n", - "From annotation collection 2 there are still two annotations considered, with the tags 'stative_event' and 'change_of_state' respectively.\n", - "But we can filter both annotation collections, too:" + "As the confusion matrix shows for the calculation of Scott's Pi, only the annotations with the tag 'process_event' have been taken into account.\n", + "If we would want to filter only the first collection and compare it with a second, unfiltered collection, we can use the parameter `filter_both_ac`:" ] }, { @@ -233,7 +231,7 @@ " ac1_name_or_inst='ac_1',\n", " ac2_name_or_inst='ac_2',\n", " tag_filter=['process_event'],\n", - " filter_both_ac=True\n", + " filter_both_ac=False\n", ")" ], "outputs": [], diff --git a/gitma/_metrics.py b/gitma/_metrics.py index 037481e..646f0ec 100644 --- a/gitma/_metrics.py +++ b/gitma/_metrics.py @@ -22,7 +22,7 @@ def filter_ac_by_tag( ac1 (AnnotationCollection): First annotation collection. ac2 (AnnotationCollection): Second annotation collection. tag_filter (list, optional): The list of tags to be included. Defaults to None. - filter_both_ac (bool, optional): If `True` both collections get filtered . Defaults to True. + filter_both_ac (bool, optional): If `True` both collections get filtered. Defaults to True. Returns: Tuple[List[Annotation]]: Two filtered list of annotations. @@ -231,7 +231,7 @@ def get_annotation_pairs( ac1: AnnotationCollection, ac2: AnnotationCollection, tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, property_filter: str = None, verbose: bool = True) -> List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: """ @@ -245,7 +245,8 @@ def get_annotation_pairs( ac1 (AnnotationCollection): First annotation collection. ac2 (AnnotationCollection): Second annotation collection. tag_filter (list, optional): The list of tags to be included. Defaults to `None` (no filter / all tags included). - filter_both_ac (bool, optional): If `True` the `tag_filter` is applied to both collections. Defaults to `False`. + filter_both_ac (bool, optional): If `True` the `tag_filter` is applied to both collections, if `False`, the `tag_filter` is applied\ + only to the first collection. Defaults to `True`. property_filter (str, optional): If not `None`, only annotations with this property are included. Defaults to\ `None` (no filter / all annotations included). verbose (bool, optional): Whether to print results to stdout. Defaults to `True`. diff --git a/gitma/project.py b/gitma/project.py index 92f6e66..f12f177 100644 --- a/gitma/project.py +++ b/gitma/project.py @@ -726,7 +726,7 @@ def get_iaa( ac1_name_or_inst: Union[str, AnnotationCollection], ac2_name_or_inst: Union[str, AnnotationCollection], tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, level: str = 'tag', include_empty_annotations: bool = True, distance: str = 'binary', @@ -744,8 +744,8 @@ def get_iaa( ac2_name_or_inst (str): The name or instance of the second annotation collection, whose annotations will be searched for\ matches to those in the first. tag_filter (list, optional): Which tags should be included. Defaults to `None` (all tags). - filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `False`\ - (only applied to the first collection). + filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `True`\ + (applies to both collections). level (str, optional): Whether the annotations' tags or a specified property (prefixed with 'prop:') should be compared. Defaults\ to 'tag'. include_empty_annotations (bool, optional): If `False`, only annotations with a matching annotation in the second collection are\ @@ -894,7 +894,7 @@ def calculate_scotts_pi( ac1_name_or_inst: Union[str, AnnotationCollection], ac2_name_or_inst: Union[str, AnnotationCollection], tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, level: str = 'tag', include_empty_annotations: bool = True, distance: str = 'binary', @@ -910,8 +910,8 @@ def calculate_scotts_pi( ac2_name_or_inst (str): The name or instance of the second annotation collection, whose annotations will be searched for\ matches to those in the first. tag_filter (list, optional): Which tags should be included. Defaults to `None` (all tags). - filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `False`\ - (only applied to the first collection). + filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `True`\ + (applies to both collections). level (str, optional): Whether the annotations' tags or a specified property (prefixed with 'prop:') should be compared. Defaults\ to 'tag'. include_empty_annotations (bool, optional): If `False`, only annotations with a matching annotation in the second collection are\ @@ -936,7 +936,7 @@ def calculate_cohens_kappa( ac1_name_or_inst: Union[str, AnnotationCollection], ac2_name_or_inst: Union[str, AnnotationCollection], tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, level: str = 'tag', include_empty_annotations: bool = True, distance: str = 'binary', @@ -952,8 +952,8 @@ def calculate_cohens_kappa( ac2_name_or_inst (str): The name or instance of the second annotation collection, whose annotations will be searched for\ matches to those in the first. tag_filter (list, optional): Which tags should be included. Defaults to `None` (all tags). - filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `False`\ - (only applied to the first collection). + filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `True`\ + (applies to both collections). level (str, optional): Whether the annotations' tags or a specified property (prefixed with 'prop:') should be compared. Defaults\ to 'tag'. include_empty_annotations (bool, optional): If `False`, only annotations with a matching annotation in the second collection are\