Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]
### Added
- GIF params in `params.py`
- `clear_gif_metadata` function in `functions.py`
- JPEG params in `params.py`
- `clear_jpeg_metadata` function in `functions.py`
- `clear_png_metadata` function in `functions.py`
Expand Down
98 changes: 97 additions & 1 deletion dmeta/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from .params import CORE_XML_MAP, APP_XML_MAP, OVERVIEW, DMETA_VERSION, \
UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR, SUPPORTED_MICROSOFT_FORMATS, \
JPEG_MARKER_PREFIX, JPEG_SOI, JPEG_EOI, JPEG_SOS, JPEG_COM, \
JPEG_APP_FIRST, JPEG_APP_LAST, JPEG_STANDALONE_MARKERS
JPEG_APP_FIRST, JPEG_APP_LAST, JPEG_STANDALONE_MARKERS, \
GIF_TRAILER, GIF_EXTENSION_INTRODUCER, GIF_IMAGE_DESCRIPTOR, \
GIF_EXT_GRAPHIC_CONTROL, GIF_EXT_APPLICATION, \
GIF_APP_EXT_NETSCAPE_IDENTIFIER


def overwrite_metadata(
Expand Down Expand Up @@ -334,6 +337,99 @@ def clear_jpeg_metadata(jpeg_file_name, in_place=False, verbose=False):
return output_path


def clear_gif_metadata(gif_file_name, in_place=False, verbose=False):
"""
Remove all metadata from a GIF file without re-encoding pixel data.

Preserves per-frame Graphic Control and NETSCAPE2.0 loop blocks; removes
Comment, Plain Text, and all other Application Extensions.

:param gif_file_name: path to original GIF file
:type gif_file_name: str
:param in_place: if True, overwrite the original file with cleaned version
:type in_place: bool
:param verbose: if True, print detailed output
:type verbose: bool
:return: path to cleaned GIF file
"""
if not os.path.exists(gif_file_name) or not gif_file_name.lower().endswith(".gif"):
return

with open(gif_file_name, "rb") as f:
data = f.read()
if not (data.startswith(b"GIF87a") or data.startswith(b"GIF89a")):
return

n = len(data)

def skip_sub_blocks(start):
j = start
while j < n:
size = data[j]
j += 1
if size == 0:
return j
j += size
return j

# Header + Logical Screen Descriptor + optional Global Color Table.
out = bytearray(data[:13])
packed = data[10]
i = 13
if packed & 0x80:
gct = 3 * (1 << ((packed & 0x07) + 1))
out += data[i:i + gct]
i += gct

# Walk data stream per GIF89a; drop metadata-bearing extensions only.
while i < n:
b = data[i]
if b == GIF_TRAILER:
out += bytes([GIF_TRAILER])
break
if b == GIF_EXTENSION_INTRODUCER and i + 1 < n:
label = data[i + 1]
if label == GIF_EXT_GRAPHIC_CONTROL:
out += data[i:i + 8]
i += 8
elif label == GIF_EXT_APPLICATION and i + 2 < n:
ident_len = data[i + 2]
ident = data[i + 3:i + 3 + ident_len]
j = skip_sub_blocks(i + 3 + ident_len)
if ident == GIF_APP_EXT_NETSCAPE_IDENTIFIER:
out += data[i:j]
i = j
else:
# Comment, Plain Text, and any other extension carry metadata: drop.
i = skip_sub_blocks(i + 2)
elif b == GIF_IMAGE_DESCRIPTOR and i + 10 <= n:
packed2 = data[i + 9]
j = i + 10
if packed2 & 0x80:
j += 3 * (1 << ((packed2 & 0x07) + 1))
j += 1 # LZW minimum code size
j = skip_sub_blocks(j)
out += data[i:j]
i = j
else:
break

if in_place:
output_path = gif_file_name
else:
base, ext = os.path.splitext(gif_file_name)
output_path = base + "_cleaned" + ext

with open(output_path, "wb") as f:
f.write(bytes(out))

if verbose:
action = "overwritten" if in_place else f"saved to {output_path}"
print(f"Metadata cleared for: {gif_file_name} ({action})")

return output_path


def extract_metadata(microsoft_file_name):
"""
Extract all the editable metadata from the given Microsoft file.
Expand Down
8 changes: 8 additions & 0 deletions dmeta/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@
JPEG_COM = 0xFE # Comment
JPEG_APP_FIRST, JPEG_APP_LAST = 0xE0, 0xEF # APP0..APP15
JPEG_STANDALONE_MARKERS = frozenset({0x00, 0x01, JPEG_SOI, JPEG_EOI} | set(range(0xD0, 0xD8)))

# GIF block markers per GIF89a specification.
GIF_TRAILER = 0x3B
GIF_EXTENSION_INTRODUCER = 0x21
GIF_IMAGE_DESCRIPTOR = 0x2C
GIF_EXT_GRAPHIC_CONTROL = 0xF9 # per-frame timing (kept)
GIF_EXT_APPLICATION = 0xFF
GIF_APP_EXT_NETSCAPE_IDENTIFIER = b"NETSCAPE2.0" # animation loop (kept)
INVALID_CONFIG_FILE_NAME_ERROR = "Config file name is not a string."
CONFIG_FILE_DOES_NOT_EXIST_ERROR = "Given config file doesn't exist."
UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR = "No config file provided. Set the .json config file with --config command."
Expand Down
Binary file added tests/test.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions tests/test_dmeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dmeta.functions import update, update_all, clear, clear_all
from dmeta.functions import clear_jpeg_metadata
from dmeta.functions import clear_png_metadata
from dmeta.functions import clear_gif_metadata
from dmeta.functions import extract_metadata


Expand Down Expand Up @@ -93,3 +94,19 @@ def test12():
clear_jpeg_metadata(jpeg_file, in_place=True, verbose=False)
with Image.open(jpeg_file) as img:
assert img.info == {}


def test13():
# clear the metadata of the .gif file [not inplace]
gif_file = os.path.join(TESTS_DIR_PATH, "test.gif")
output_path = clear_gif_metadata(gif_file, in_place=False, verbose=False)
with Image.open(output_path) as img:
assert "comment" not in img.info


def test14():
# clear the metadata of the .gif file [inplace]
gif_file = os.path.join(TESTS_DIR_PATH, "test.gif")
clear_gif_metadata(gif_file, in_place=True, verbose=False)
with Image.open(gif_file) as img:
assert "comment" not in img.info
Loading