From 41c91993cf5a0877ab8516cc442fea334a6d6825 Mon Sep 17 00:00:00 2001 From: vvadimov Date: Sun, 18 Jan 2015 19:59:12 +0300 Subject: [PATCH 1/2] Update TLSH.java totalDiff method which compares two strings with hashes is implemented --- Utils/src/utils/hashing/TLSH.java | 48 +++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/Utils/src/utils/hashing/TLSH.java b/Utils/src/utils/hashing/TLSH.java index a9e5e93..3ea4d17 100755 --- a/Utils/src/utils/hashing/TLSH.java +++ b/Utils/src/utils/hashing/TLSH.java @@ -12,6 +12,8 @@ * FileComment: Java version of the TLSH similarity hashing algorithm */ +import java.util.Arrays; + public class TLSH { final private int BUCKETS = 256; final private int EFF_BUCKETS = 128; @@ -94,6 +96,14 @@ private String toHex(int [] buf) { return ret.toUpperCase(); } + private int [] fromHex(String s) { + int [] ret = new int [s.length() / 2]; + for (int i = 0; i < s.length(); i += 2) { + ret[i / 2] = Integer.parseInt(s.substring(i, i + 2), 16); + } + return ret; + } + private int bMapping(int salt, int i, int j, int k) { int h = 0; h = vTable[h ^ salt]; @@ -482,6 +492,44 @@ else if (ldiff == 1) return diff; } + final public int totalDiff(String hash1, String hash2, boolean lenDiff) { + int diff = 0; + int [] iHash1 = fromHex(hash1); + int [] iHash2 = fromHex(hash2); + + if (lenDiff) { + int ldiff = modDiff(iHash1[TLSH_CHECKSUM_LEN], iHash2[TLSH_CHECKSUM_LEN], RANGE_LVALUE); + if (ldiff == 0) + diff = 0; + else if (ldiff == 1) + diff = 1; + else + diff += ldiff * 12; + } + + int q1diff = modDiff(iHash1[TLSH_CHECKSUM_LEN + 1] & 0xf, iHash2[TLSH_CHECKSUM_LEN + 1] & 0xf, RANGE_QRATIO); + if (q1diff <= 1) + diff += q1diff; + else + diff += (q1diff - 1) * 12; + + int q2diff = modDiff(iHash1[TLSH_CHECKSUM_LEN + 1] >> 4, iHash2[TLSH_CHECKSUM_LEN + 1] >> 4, RANGE_QRATIO); + if (q2diff <= 1) + diff += q2diff; + else + diff += (q2diff - 1) * 12; + + for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) { + if (iHash1[k] != iHash2[k]) { + diff++; + break; + } + } + + diff += hDistance(Arrays.copyOfRange(iHash1, TLSH_CHECKSUM_LEN + 2, iHash1.length), Arrays.copyOfRange(iHash2, TLSH_CHECKSUM_LEN + 2, iHash2.length)); + + return diff; + } /* this method for testing only */ /* coincides with simple_unit */ public static void main(String [] args) { From 75b2981c831a72cc4f0880239ce242ecdffa291c Mon Sep 17 00:00:00 2001 From: vvadimov Date: Sun, 18 Jan 2015 20:05:48 +0300 Subject: [PATCH 2/2] Update TLSH.java totalDiff(String, String, bool) made static --- Utils/src/utils/hashing/TLSH.java | 39 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/Utils/src/utils/hashing/TLSH.java b/Utils/src/utils/hashing/TLSH.java index 3ea4d17..a387931 100755 --- a/Utils/src/utils/hashing/TLSH.java +++ b/Utils/src/utils/hashing/TLSH.java @@ -15,19 +15,19 @@ import java.util.Arrays; public class TLSH { - final private int BUCKETS = 256; - final private int EFF_BUCKETS = 128; - final private int CODE_SIZE = 32; - final private int TLSH_CHECKSUM_LEN = 1; - final private int TLSH_STRING_LEN = 70; - final private int SLIDING_WND_SIZE = 5; - final private int RANGE_LVALUE = 256; - final private int RANGE_QRATIO = 16; - final private float LOG_1_5 = 0.4054651f; - final private float LOG_1_3 = 0.26236426f; - final private float LOG_1_1 = 0.095310180f; - - final private int [] vTable = { + static final private int BUCKETS = 256; + static final private int EFF_BUCKETS = 128; + static final private int CODE_SIZE = 32; + static final private int TLSH_CHECKSUM_LEN = 1; + static final private int TLSH_STRING_LEN = 70; + static final private int SLIDING_WND_SIZE = 5; + static final private int RANGE_LVALUE = 256; + static final private int RANGE_QRATIO = 16; + static final private float LOG_1_5 = 0.4054651f; + static final private float LOG_1_3 = 0.26236426f; + static final private float LOG_1_1 = 0.095310180f; + + static final private int [] vTable = { (int)1, (int)87, (int)49, (int)12, (int)176, (int)178, (int)102, (int)166, (int)121, (int)193, (int)6, (int)84, (int)249, (int)230, (int)44, (int)163, (int)14, (int)197, (int)213, (int)181, (int)161, (int)85, (int)218, (int)80, (int)64, (int)239, (int)24, (int)226, (int)236, (int)142, (int)38, (int)200, (int)110, (int)177, (int)104, (int)103, (int)141, (int)253, (int)255, (int)50, (int)77, (int)101, (int)81, (int)18, (int)45, (int)96, (int)31, (int)222, @@ -72,7 +72,7 @@ public void setQHi(int x) { } private int [] aBucket = null; - private final int [] slideWindow = new int [SLIDING_WND_SIZE]; + private int [] slideWindow = new int [SLIDING_WND_SIZE]; private int dataLen = 0; private boolean lshCodeValid = false; LshBinStruct lshBin = new LshBinStruct(); @@ -96,7 +96,7 @@ private String toHex(int [] buf) { return ret.toUpperCase(); } - private int [] fromHex(String s) { + static private int [] fromHex(String s) { int [] ret = new int [s.length() / 2]; for (int i = 0; i < s.length(); i += 2) { ret[i / 2] = Integer.parseInt(s.substring(i, i + 2), 16); @@ -241,7 +241,7 @@ private int lCapturing(int len) { return (int) (i & 0xFF); } - private int modDiff(int x, int y, int R){ + static private int modDiff(int x, int y, int R){ int dl; int dr; if (y > x){ @@ -256,7 +256,7 @@ private int modDiff(int x, int y, int R){ return dl; } - private int hDistance(int [] x, int [] y) + static private int hDistance(int [] x, int [] y) { int diff = 0; for (int i = 0; i < x.length; i++) { @@ -492,7 +492,7 @@ else if (ldiff == 1) return diff; } - final public int totalDiff(String hash1, String hash2, boolean lenDiff) { + static final public int totalDiff(String hash1, String hash2, boolean lenDiff) { int diff = 0; int [] iHash1 = fromHex(hash1); int [] iHash2 = fromHex(hash2); @@ -530,8 +530,7 @@ else if (ldiff == 1) return diff; } - /* this method for testing only */ - /* coincides with simple_unit */ + public static void main(String [] args) { try { TLSH ti1 = new TLSH();