Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.pyc
symhash.egg-info/
71 changes: 71 additions & 0 deletions bin/symfuzzy
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python

#
# Fussy hashing for Mach-O symbol table
# This program is inspired by impfuzzy (https://github.com/JPCERTCC/impfuzzy)
#
# Copyright (C) 2022 Minoru Kobayashi <unknownbit@gmail.com> (@unkn0wnbit)
#
# This software is released under the MIT License.
# https://opensource.org/licenses/MIT
#

import argparse
import sys

import ssdeep
from symhash import HashMode, create_sym_fuzzyhash


def main():
parser = argparse.ArgumentParser(description='SymFuzzy: a program to calculate Fuzzy Hash from symbol table of Mach-O files.')
parser.add_argument('-f', '--file', action='store', type=str,
help='Specify a Mach-O file to calculate Fuzzy Hash.', required=True)
parser.add_argument('-f2', '--file2', action='store', type=str,
help='Specify a Mach-O file to be compared with Fuzzy Hash of "--file"')
parser.add_argument('-m', '--mode', action='store', type=str, default='ALL',
help='Order of APIs for calculation hash, which can be specified "SYMTAB" or "SORT".')
args = parser.parse_args()

args.mode = args.mode.upper()
if args.mode == 'ALL':
hash_mode = HashMode.ALL
elif args.mode == 'SYMTAB':
hash_mode = HashMode.SYMTAB
elif args.mode == 'SORT':
hash_mode = HashMode.SORT
else:
sys.exit("Error: Unsupported mode = {}".format(args.mode))

sym_fuzzyhash = create_sym_fuzzyhash(args.file, hash_mode=hash_mode)
if not sym_fuzzyhash:
return

if args.file2:
sym_fuzzyhash2 = create_sym_fuzzyhash(args.file2, hash_mode=hash_mode)
if not sym_fuzzyhash2:
return

for arch, fuzzyhash in sym_fuzzyhash.items():
try:
fuzzyhash2 = sym_fuzzyhash2.pop(arch)
match_value = ssdeep.compare(fuzzyhash, fuzzyhash2)
print("Binary architecture: {}".format(arch))
print("{}: {}".format(args.file, fuzzyhash))
print("{}: {}".format(args.file2, fuzzyhash2))
print("Match value: {}".format(match_value))
print("-"*50)
except KeyError:
print("{} does not have an architecture binary for\"{}\"".format(args.file2, arch))

if len(sym_fuzzyhash2) > 0:
for arch in sym_fuzzyhash2.keys():
print("{} does not have an architecture binary for\"{}\"".format(args.file, arch))

else:
for arch, fuzzyhash in sym_fuzzyhash.items():
print("{}: {}".format(arch, fuzzyhash))


if __name__ == "__main__":
sys.exit(main())
18 changes: 16 additions & 2 deletions bin/symhash
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
# creates a list and hashes those

import argparse
import sys

from future.utils import iteritems
from symhash import create_sym_hash
from symhash import HashMode, create_sym_hash


def main():
Expand All @@ -24,15 +25,28 @@ def main():
opt.add_argument(
'-f', '--file', help='The file to create a SymHash from', required=True
)
opt.add_argument('-m', '--mode', action='store', type=str, default='ALL',
help='Order of APIs for calculation hash, which can be specified "SYMTAB" or "SORT".')
opt.add_argument(
'-v', '--verbose', help='Verbose output', required=False,
action='store_true'
)

options = opt.parse_args()

options.mode = options.mode.upper()
if options.mode == 'ALL':
hash_mode = HashMode.ALL
elif options.mode == 'SYMTAB':
hash_mode = HashMode.SYMTAB
elif options.mode == 'SORT':
hash_mode = HashMode.SORT
else:
sys.exit("Error: Unsupported mode = {}".format(options.mode))

f_name = options.file

s = create_sym_hash(f_name)
s = create_sym_hash(f_name, hash_mode=hash_mode)

if not s:
return
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
filemagic==1.6
future==0.15.2
python-magic>=0.4.27
python-magic-bin>=0.4.14
future>=0.18.2
ssdeep>=3.4
12 changes: 6 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name='symhash',
version='0.0.2',
version='0.0.3',
url='https://github.com/threatstream/symhash',
author='Aaron Shelmire',
author_email='aaron.shelmire@anomali.com',
Expand All @@ -16,16 +16,16 @@
'bin/symhash'
],
install_requires=[
'filemagic==1.6',
'future==0.15.2',
'python-magic>=0.4.27',
'python-magic-bin>=0.4.14',
'future>=0.18.2',
'ssdeep>=3.4',
],
description='Anomali Symhash',
classifiers=[
'License :: OSI Approved :: GNU General Public License v2 (GPLv2)',
'Operating System :: POSIX',
'Programming Language :: Other Scripting Engines',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
]
)
120 changes: 99 additions & 21 deletions symhash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,35 @@
# symhash walks the symbol table (read: loaded API calls)
# creates a list and hashes those

import magic
import os
import sys
from enum import Flag, auto
from hashlib import md5

import magic
import ssdeep

from symhash.machoinfo import MachOEntity, MachOParser, MachOParserError


def create_sym_hash(filename=None, data=None):
# create the sym hash
class HashMode(Flag):
SYMTAB = auto()
SORT = auto()
ALL = SYMTAB | SORT


def parse_macho(filename=None, data=None):
if filename:
with open(filename, 'rb') as f:
data = f.read()
if os.path.isfile(filename):
with open(filename, 'rb') as f:
data = f.read()
else:
sys.exit("Error: {} is not a file.".format(filename))

if not data:
return

with magic.Magic() as m:
filetype = m.id_buffer(data[0:1000])
filetype = magic.from_buffer(data[0:1024])

if 'Mach-O' not in filetype:
print("Data provided is not a valid Mach-O filetype")
Expand All @@ -42,26 +54,92 @@ def create_sym_hash(filename=None, data=None):
print("Error {}".format(e))
return

return macho_parser


def get_dylib_name_by_ordinal(dylib_list, library_ordinal, basename_only = False):
if library_ordinal > 0 and library_ordinal <= 253:
if basename_only:
return os.path.basename(dylib_list[library_ordinal - 1])
else:
return dylib_list[library_ordinal - 1]
elif library_ordinal in (0, 254, 255): # 0 = invalid, 254 = DYNAMIC_LOOKUP_ORDINAL, 255 = EXECUTABLE_ORDINAL
return None


def get_dylib_list(entity):
dylib_list = []
for cmd in entity.cmdlist:
if cmd['cmd'] == MachOEntity.LC_LOAD_DYLIB:
dylib_list.append(cmd['dylib'].decode())

return dylib_list


def get_import_symbol_list(entity, dylib_list):
sym_list = []
for cmd in entity.cmdlist:
if cmd['cmd'] == MachOEntity.LC_SYMTAB:
for sym in cmd['symbols']:
if not sym['is_stab']:
if sym['external'] is True:
if sym['n_type'] == '0x00': # 0x00 = N_UNDF
library_ordinal = (sym['n_desc'] >> 8) & 0xff
if library_ordinal > 0:
dylib_name = get_dylib_name_by_ordinal(dylib_list, library_ordinal)
if dylib_name:
sym_list.append("{}.{}".format(dylib_name, sym.get('string', '').decode()))
# print("{}\t{}".format(dylib_name, sym.get('string', '').decode()))

# print(','.join(sorted(sym_list)).encode())
# print("Number of symbols: {}".format(len(sym_list)))

return sym_list


def create_sym_hash(filename=None, data=None, hash_mode=HashMode.ALL):
macho_parser = parse_macho(filename, data)
sym_dict = {}

for entity in macho_parser.entities:
if entity.magic_str != 'Universal':
dylib_list = get_dylib_list(entity)
sym_list = get_import_symbol_list(entity, dylib_list)

entity_string = "{} {} {}".format(entity.cpu_type_str,
entity.filetype_str,
entity.magic_str)
entity_string = "{} {} {}".format(entity.cpu_type_str, entity.filetype_str, entity.magic_str)
# Order of APIs in symbol table
if hash_mode & HashMode.SYMTAB:
symhash = md5(','.join(sym_list).encode()).hexdigest()
sym_dict[entity_string] = symhash

sym_list = []
# Sort APIs into alphabetical order
if hash_mode & HashMode.SORT:
symhash_sorted = md5(','.join(sorted(sym_list)).encode()).hexdigest()
entity_string += " (Sorted APIs)"
sym_dict[entity_string] = symhash_sorted

for cmd in entity.cmdlist:
if cmd['cmd'] == MachOEntity.LC_SYMTAB:
for sym in cmd['symbols']:
if not sym['is_stab']:
if sym['external'] is True:
if sym['n_type'] == '0x00':
sym_list.append(sym.get('string', '').decode())
return sym_dict

symhash = md5(','.join(sorted(sym_list)).encode()).hexdigest()
sym_dict[entity_string] = symhash

return sym_dict
def create_sym_fuzzyhash(filename=None, data=None, hash_mode=HashMode.ALL):
macho_parser = parse_macho(filename, data)
sym_fuzzy_dict = {}

for entity in macho_parser.entities:
if entity.magic_str != 'Universal':
dylib_list = get_dylib_list(entity)
sym_list = get_import_symbol_list(entity, dylib_list)

entity_string = "{} {} {}".format(entity.cpu_type_str, entity.filetype_str, entity.magic_str)
# Order of APIs in symbol table
if hash_mode & HashMode.SYMTAB:
symfuzzyhash = ssdeep.hash(','.join(sym_list).encode())
sym_fuzzy_dict[entity_string] = symfuzzyhash

# Sort APIs into alphabetical order
if hash_mode & HashMode.SORT:
symfuzzyhash_sorted = ssdeep.hash(','.join(sorted(sym_list)).encode())
entity_string += " (Sorted APIs)"
sym_fuzzy_dict[entity_string] = symfuzzyhash_sorted

return sym_fuzzy_dict
23 changes: 18 additions & 5 deletions symhash/machoinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@
# XXX: There are a lot of comments indicating we should check we aren't
# parsing past the end of a slice. These should all be fixed. ;)

import struct
import binascii

from hashlib import md5
import struct
from builtins import range
from datetime import datetime
from hashlib import md5

from future.utils import iteritems


Expand All @@ -63,11 +63,14 @@ class MachOEntity(object):

# CPU Types (not complete)
CPU_ARCH_ABI64 = 0x01000000
CPU_ARCH_ABI64_32 = 0x02000000
CPU_TYPE_POWERPC = 0x00000012
CPU_TYPE_X86 = 0x00000007
CPU_TYPE_ARM = 0x0000000C
CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64
CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64
CPU_TYPE_ARM64_32 = CPU_TYPE_ARM | CPU_ARCH_ABI64_32

# CPU Subtypes (not complete)
CPU_SUBTYPE_MASK = 0xFF000000
Expand All @@ -83,6 +86,11 @@ class MachOEntity(object):
CPU_SUBTYPE_ARM_V7 = 0x00000009
CPU_SUBTYPE_ARM_V7F = 0x0000000A
CPU_SUBTYPE_ARM_V7K = 0x0000000C
CPU_SUBTYPE_ARM64_ALL = 0x00000000
CPU_SUBTYPE_ARM64_V8 = 0x00000001
CPU_SUBTYPE_ARM64E = 0x00000002
CPU_SUBTYPE_ARM64_32_ALL = 0x00000000
CPU_SUBTYPE_ARM64_32_V8 = 0x00000001

# Filetype
MH_OBJECT = 0x00000001
Expand Down Expand Up @@ -312,7 +320,8 @@ def __init__(self):
self.CPU_TYPE_X86: 'Intel',
self.CPU_TYPE_POWERPC64: 'PPC64',
self.CPU_TYPE_X86_64: 'Intel (64-bit)',
self.CPU_TYPE_ARM: 'ARM'
self.CPU_TYPE_ARM: 'ARM',
self.CPU_TYPE_ARM64: 'ARM64'
}

# CPU subtype mapping
Expand Down Expand Up @@ -614,7 +623,7 @@ def cpu_subtype_str(self):
return self.cpu_ppc_subtypes.get(self.cpu_subtype & ~self.CPU_SUBTYPE_MASK, "0x%08x" % self.cpu_subtype)
elif self.cpu_type in [self.CPU_TYPE_X86, self.CPU_TYPE_X86_64]:
return self.cpu_x86_subtypes.get(self.cpu_subtype & ~self.CPU_SUBTYPE_MASK, "0x%08x" % self.cpu_subtype)
elif self.cpu_type in [self.CPU_TYPE_ARM]:
elif self.cpu_type in [self.CPU_TYPE_ARM, self.CPU_TYPE_ARM64]:
return self.cpu_arm_subtypes.get(self.cpu_subtype & ~self.CPU_SUBTYPE_MASK, "0x%08x" % self.cpu_subtype)
else:
return "0x%08x" % self.cpu_subtype
Expand Down Expand Up @@ -1017,6 +1026,10 @@ def parse_lc_symtab_sub(self, cmd_dict, data):
else:
sym['external'] = False

sym['n_sect'] = n_sect
sym['n_desc'] = n_desc
sym['n_value'] = n_value

symbols.append(sym)
ptr = ptr[nlist_size:]

Expand Down