diff --git a/CHANGELOG.md b/CHANGELOG.md index c1e472f..a1f0a9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- GIF params in `params.py` +- `clear_gif_metadata` function in `functions.py` - JPEG params in `params.py` - `clear_jpeg_metadata` function in `functions.py` - `clear_png_metadata` function in `functions.py` diff --git a/dmeta/functions.py b/dmeta/functions.py index 4cff4db..bd2927a 100644 --- a/dmeta/functions.py +++ b/dmeta/functions.py @@ -11,7 +11,10 @@ from .params import CORE_XML_MAP, APP_XML_MAP, OVERVIEW, DMETA_VERSION, \ UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR, SUPPORTED_MICROSOFT_FORMATS, \ JPEG_MARKER_PREFIX, JPEG_SOI, JPEG_EOI, JPEG_SOS, JPEG_COM, \ - JPEG_APP_FIRST, JPEG_APP_LAST, JPEG_STANDALONE_MARKERS + JPEG_APP_FIRST, JPEG_APP_LAST, JPEG_STANDALONE_MARKERS, \ + GIF_TRAILER, GIF_EXTENSION_INTRODUCER, GIF_IMAGE_DESCRIPTOR, \ + GIF_EXT_GRAPHIC_CONTROL, GIF_EXT_APPLICATION, \ + GIF_APP_EXT_NETSCAPE_IDENTIFIER def overwrite_metadata( @@ -334,6 +337,99 @@ def clear_jpeg_metadata(jpeg_file_name, in_place=False, verbose=False): return output_path +def clear_gif_metadata(gif_file_name, in_place=False, verbose=False): + """ + Remove all metadata from a GIF file without re-encoding pixel data. + + Preserves per-frame Graphic Control and NETSCAPE2.0 loop blocks; removes + Comment, Plain Text, and all other Application Extensions. + + :param gif_file_name: path to original GIF file + :type gif_file_name: str + :param in_place: if True, overwrite the original file with cleaned version + :type in_place: bool + :param verbose: if True, print detailed output + :type verbose: bool + :return: path to cleaned GIF file + """ + if not os.path.exists(gif_file_name) or not gif_file_name.lower().endswith(".gif"): + return + + with open(gif_file_name, "rb") as f: + data = f.read() + if not (data.startswith(b"GIF87a") or data.startswith(b"GIF89a")): + return + + n = len(data) + + def skip_sub_blocks(start): + j = start + while j < n: + size = data[j] + j += 1 + if size == 0: + return j + j += size + return j + + # Header + Logical Screen Descriptor + optional Global Color Table. + out = bytearray(data[:13]) + packed = data[10] + i = 13 + if packed & 0x80: + gct = 3 * (1 << ((packed & 0x07) + 1)) + out += data[i:i + gct] + i += gct + + # Walk data stream per GIF89a; drop metadata-bearing extensions only. + while i < n: + b = data[i] + if b == GIF_TRAILER: + out += bytes([GIF_TRAILER]) + break + if b == GIF_EXTENSION_INTRODUCER and i + 1 < n: + label = data[i + 1] + if label == GIF_EXT_GRAPHIC_CONTROL: + out += data[i:i + 8] + i += 8 + elif label == GIF_EXT_APPLICATION and i + 2 < n: + ident_len = data[i + 2] + ident = data[i + 3:i + 3 + ident_len] + j = skip_sub_blocks(i + 3 + ident_len) + if ident == GIF_APP_EXT_NETSCAPE_IDENTIFIER: + out += data[i:j] + i = j + else: + # Comment, Plain Text, and any other extension carry metadata: drop. + i = skip_sub_blocks(i + 2) + elif b == GIF_IMAGE_DESCRIPTOR and i + 10 <= n: + packed2 = data[i + 9] + j = i + 10 + if packed2 & 0x80: + j += 3 * (1 << ((packed2 & 0x07) + 1)) + j += 1 # LZW minimum code size + j = skip_sub_blocks(j) + out += data[i:j] + i = j + else: + break + + if in_place: + output_path = gif_file_name + else: + base, ext = os.path.splitext(gif_file_name) + output_path = base + "_cleaned" + ext + + with open(output_path, "wb") as f: + f.write(bytes(out)) + + if verbose: + action = "overwritten" if in_place else f"saved to {output_path}" + print(f"Metadata cleared for: {gif_file_name} ({action})") + + return output_path + + def extract_metadata(microsoft_file_name): """ Extract all the editable metadata from the given Microsoft file. diff --git a/dmeta/params.py b/dmeta/params.py index 4eccf93..c3a1ba0 100644 --- a/dmeta/params.py +++ b/dmeta/params.py @@ -44,6 +44,14 @@ JPEG_COM = 0xFE # Comment JPEG_APP_FIRST, JPEG_APP_LAST = 0xE0, 0xEF # APP0..APP15 JPEG_STANDALONE_MARKERS = frozenset({0x00, 0x01, JPEG_SOI, JPEG_EOI} | set(range(0xD0, 0xD8))) + +# GIF block markers per GIF89a specification. +GIF_TRAILER = 0x3B +GIF_EXTENSION_INTRODUCER = 0x21 +GIF_IMAGE_DESCRIPTOR = 0x2C +GIF_EXT_GRAPHIC_CONTROL = 0xF9 # per-frame timing (kept) +GIF_EXT_APPLICATION = 0xFF +GIF_APP_EXT_NETSCAPE_IDENTIFIER = b"NETSCAPE2.0" # animation loop (kept) INVALID_CONFIG_FILE_NAME_ERROR = "Config file name is not a string." CONFIG_FILE_DOES_NOT_EXIST_ERROR = "Given config file doesn't exist." UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR = "No config file provided. Set the .json config file with --config command." diff --git a/tests/test.gif b/tests/test.gif new file mode 100644 index 0000000..4187adb Binary files /dev/null and b/tests/test.gif differ diff --git a/tests/test_dmeta.py b/tests/test_dmeta.py index 776adda..a88b65a 100644 --- a/tests/test_dmeta.py +++ b/tests/test_dmeta.py @@ -3,6 +3,7 @@ from dmeta.functions import update, update_all, clear, clear_all from dmeta.functions import clear_jpeg_metadata from dmeta.functions import clear_png_metadata +from dmeta.functions import clear_gif_metadata from dmeta.functions import extract_metadata @@ -93,3 +94,19 @@ def test12(): clear_jpeg_metadata(jpeg_file, in_place=True, verbose=False) with Image.open(jpeg_file) as img: assert img.info == {} + + +def test13(): + # clear the metadata of the .gif file [not inplace] + gif_file = os.path.join(TESTS_DIR_PATH, "test.gif") + output_path = clear_gif_metadata(gif_file, in_place=False, verbose=False) + with Image.open(output_path) as img: + assert "comment" not in img.info + + +def test14(): + # clear the metadata of the .gif file [inplace] + gif_file = os.path.join(TESTS_DIR_PATH, "test.gif") + clear_gif_metadata(gif_file, in_place=True, verbose=False) + with Image.open(gif_file) as img: + assert "comment" not in img.info