From e21123df90aded96e128ef674e28575d47271665 Mon Sep 17 00:00:00 2001 From: ypislon Date: Wed, 10 Jun 2026 09:39:13 +0200 Subject: [PATCH 1/3] update default for the filter_both_ac parameter to be True in IAA score calculations --- gitma/project.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gitma/project.py b/gitma/project.py index 4a584f9..a5f091e 100644 --- a/gitma/project.py +++ b/gitma/project.py @@ -726,7 +726,7 @@ def get_iaa( ac1_name_or_inst: Union[str, AnnotationCollection], ac2_name_or_inst: Union[str, AnnotationCollection], tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, level: str = 'tag', include_empty_annotations: bool = True, distance: str = 'binary', @@ -744,8 +744,8 @@ def get_iaa( ac2_name_or_inst (str): The name or instance of the second annotation collection, whose annotations will be searched for\ matches to those in the first. tag_filter (list, optional): Which tags should be included. Defaults to `None` (all tags). - filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `False`\ - (only applied to the first collection). + filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `True`\ + (applies to both collections). level (str, optional): Whether the annotations' tags or a specified property (prefixed with 'prop:') should be compared. Defaults\ to 'tag'. include_empty_annotations (bool, optional): If `False`, only annotations with a matching annotation in the second collection are\ @@ -900,7 +900,7 @@ def calculate_scotts_pi( ac1_name_or_inst: Union[str, AnnotationCollection], ac2_name_or_inst: Union[str, AnnotationCollection], tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, level: str = 'tag', include_empty_annotations: bool = True, distance: str = 'binary', @@ -916,8 +916,8 @@ def calculate_scotts_pi( ac2_name_or_inst (str): The name or instance of the second annotation collection, whose annotations will be searched for\ matches to those in the first. tag_filter (list, optional): Which tags should be included. Defaults to `None` (all tags). - filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `False`\ - (only applied to the first collection). + filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `True`\ + (applies to both collections). level (str, optional): Whether the annotations' tags or a specified property (prefixed with 'prop:') should be compared. Defaults\ to 'tag'. include_empty_annotations (bool, optional): If `False`, only annotations with a matching annotation in the second collection are\ @@ -942,7 +942,7 @@ def calculate_cohens_kappa( ac1_name_or_inst: Union[str, AnnotationCollection], ac2_name_or_inst: Union[str, AnnotationCollection], tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, level: str = 'tag', include_empty_annotations: bool = True, distance: str = 'binary', @@ -958,8 +958,8 @@ def calculate_cohens_kappa( ac2_name_or_inst (str): The name or instance of the second annotation collection, whose annotations will be searched for\ matches to those in the first. tag_filter (list, optional): Which tags should be included. Defaults to `None` (all tags). - filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `False`\ - (only applied to the first collection). + filter_both_ac (bool, optional): Whether the tag filter should be applied to both annotation collections. Defaults to `True`\ + (applies to both collections). level (str, optional): Whether the annotations' tags or a specified property (prefixed with 'prop:') should be compared. Defaults\ to 'tag'. include_empty_annotations (bool, optional): If `False`, only annotations with a matching annotation in the second collection are\ From 0a11de34478544c9f15c25ff467bc96427b09a57 Mon Sep 17 00:00:00 2001 From: ypislon Date: Thu, 18 Jun 2026 15:18:58 +0200 Subject: [PATCH 2/3] update demo notebook to show default usage of filter_both_ac parameter, change default in get_annotation_pairs --- demo/notebooks/inter_annotator_agreement.ipynb | 8 +++----- gitma/_metrics.py | 11 ++++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/demo/notebooks/inter_annotator_agreement.ipynb b/demo/notebooks/inter_annotator_agreement.ipynb index dd91d15..4b01b4c 100644 --- a/demo/notebooks/inter_annotator_agreement.ipynb +++ b/demo/notebooks/inter_annotator_agreement.ipynb @@ -219,10 +219,8 @@ "metadata": {}, "cell_type": "markdown", "source": [ - "As the confusion matrix shows for the calculation of Scott's Pi, only the annotations from annotation collection 1\n", - "with the tag 'process_event' have been taken into account.\n", - "From annotation collection 2 there are still two annotations considered, with the tags 'stative_event' and 'change_of_state' respectively.\n", - "But we can filter both annotation collections, too:" + "As the confusion matrix shows for the calculation of Scott's Pi, only the annotations with the tag 'process_event' have been taken into account.\n", + "If we would want to filter only the first collection and compare it with a second, unfiltered collection, we can use the parameter `filter_both_ac`:" ] }, { @@ -233,7 +231,7 @@ " ac1_name_or_inst='ac_1',\n", " ac2_name_or_inst='ac_2',\n", " tag_filter=['process_event'],\n", - " filter_both_ac=True\n", + " filter_both_ac=False\n", ")" ], "outputs": [], diff --git a/gitma/_metrics.py b/gitma/_metrics.py index 026d81d..0b9f7cf 100644 --- a/gitma/_metrics.py +++ b/gitma/_metrics.py @@ -19,7 +19,7 @@ def filter_ac_by_tag( ac1 (AnnotationCollection): First annotation collection. ac2 (AnnotationCollection): Second annotation collection. tag_filter (list, optional): The list of tags to be included. Defaults to None. - filter_both_ac (bool, optional): If `True` both collections get filtered . Defaults to True. + filter_both_ac (bool, optional): If `True` both collections get filtered. Defaults to True. Returns: Tuple[List[Annotation]]: Two filtered list of annotations. @@ -195,9 +195,9 @@ def get_annotation_pairs( ac1: AnnotationCollection, ac2: AnnotationCollection, tag_filter: list = None, - filter_both_ac: bool = False, + filter_both_ac: bool = True, property_filter: str = None, - verbose: bool = True) -> List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: + verbose: bool = False) -> List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: """ For each annotation in `ac1`, finds the best matching annotation (maximum overlap) in `ac2`. Where there is no matching annotation in `ac2`, an `EmptyAnnotation` is substituted. Returns a list of tuples of the matched pairs. @@ -209,10 +209,11 @@ def get_annotation_pairs( ac1 (AnnotationCollection): First annotation collection. ac2 (AnnotationCollection): Second annotation collection. tag_filter (list, optional): The list of tags to be included. Defaults to `None` (no filter / all tags included). - filter_both_ac (bool, optional): If `True` the `tag_filter` is applied to both collections. Defaults to `False`. + filter_both_ac (bool, optional): If `True` the `tag_filter` is applied to both collections, if `False`, the `tag_filter` is applied\ + only to the first collection. Defaults to `True`. property_filter (str, optional): If not `None`, only annotations with this property are included. Defaults to\ `None` (no filter / all annotations included). - verbose (bool, optional): Whether to print results to stdout. Defaults to `True`. + verbose (bool, optional): Whether to print results to stdout. Defaults to `False`. Returns: List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: List of paired annotation tuples. From 5e3a889aaa7d0041144b484f46e7078fafd919fc Mon Sep 17 00:00:00 2001 From: ypislon Date: Thu, 18 Jun 2026 15:20:46 +0200 Subject: [PATCH 3/3] fix wrong change in verbose default parameter --- gitma/_metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gitma/_metrics.py b/gitma/_metrics.py index 0b9f7cf..72ac950 100644 --- a/gitma/_metrics.py +++ b/gitma/_metrics.py @@ -197,7 +197,7 @@ def get_annotation_pairs( tag_filter: list = None, filter_both_ac: bool = True, property_filter: str = None, - verbose: bool = False) -> List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: + verbose: bool = True) -> List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: """ For each annotation in `ac1`, finds the best matching annotation (maximum overlap) in `ac2`. Where there is no matching annotation in `ac2`, an `EmptyAnnotation` is substituted. Returns a list of tuples of the matched pairs. @@ -213,7 +213,7 @@ def get_annotation_pairs( only to the first collection. Defaults to `True`. property_filter (str, optional): If not `None`, only annotations with this property are included. Defaults to\ `None` (no filter / all annotations included). - verbose (bool, optional): Whether to print results to stdout. Defaults to `False`. + verbose (bool, optional): Whether to print results to stdout. Defaults to `True`. Returns: List[Union[Tuple[Annotation, EmptyAnnotation], Tuple[Annotation, Annotation]]]: List of paired annotation tuples.