diff --git a/Utils/src/utils/hashing/TLSH.java b/Utils/src/utils/hashing/TLSH.java index a9e5e93..a387931 100755 --- a/Utils/src/utils/hashing/TLSH.java +++ b/Utils/src/utils/hashing/TLSH.java @@ -12,20 +12,22 @@ * FileComment: Java version of the TLSH similarity hashing algorithm */ +import java.util.Arrays; + public class TLSH { - final private int BUCKETS = 256; - final private int EFF_BUCKETS = 128; - final private int CODE_SIZE = 32; - final private int TLSH_CHECKSUM_LEN = 1; - final private int TLSH_STRING_LEN = 70; - final private int SLIDING_WND_SIZE = 5; - final private int RANGE_LVALUE = 256; - final private int RANGE_QRATIO = 16; - final private float LOG_1_5 = 0.4054651f; - final private float LOG_1_3 = 0.26236426f; - final private float LOG_1_1 = 0.095310180f; - - final private int [] vTable = { + static final private int BUCKETS = 256; + static final private int EFF_BUCKETS = 128; + static final private int CODE_SIZE = 32; + static final private int TLSH_CHECKSUM_LEN = 1; + static final private int TLSH_STRING_LEN = 70; + static final private int SLIDING_WND_SIZE = 5; + static final private int RANGE_LVALUE = 256; + static final private int RANGE_QRATIO = 16; + static final private float LOG_1_5 = 0.4054651f; + static final private float LOG_1_3 = 0.26236426f; + static final private float LOG_1_1 = 0.095310180f; + + static final private int [] vTable = { (int)1, (int)87, (int)49, (int)12, (int)176, (int)178, (int)102, (int)166, (int)121, (int)193, (int)6, (int)84, (int)249, (int)230, (int)44, (int)163, (int)14, (int)197, (int)213, (int)181, (int)161, (int)85, (int)218, (int)80, (int)64, (int)239, (int)24, (int)226, (int)236, (int)142, (int)38, (int)200, (int)110, (int)177, (int)104, (int)103, (int)141, (int)253, (int)255, (int)50, (int)77, (int)101, (int)81, (int)18, (int)45, (int)96, (int)31, (int)222, @@ -70,7 +72,7 @@ public void setQHi(int x) { } private int [] aBucket = null; - private final int [] slideWindow = new int [SLIDING_WND_SIZE]; + private int [] slideWindow = new int [SLIDING_WND_SIZE]; private int dataLen = 0; private boolean lshCodeValid = false; LshBinStruct lshBin = new LshBinStruct(); @@ -94,6 +96,14 @@ private String toHex(int [] buf) { return ret.toUpperCase(); } + static private int [] fromHex(String s) { + int [] ret = new int [s.length() / 2]; + for (int i = 0; i < s.length(); i += 2) { + ret[i / 2] = Integer.parseInt(s.substring(i, i + 2), 16); + } + return ret; + } + private int bMapping(int salt, int i, int j, int k) { int h = 0; h = vTable[h ^ salt]; @@ -231,7 +241,7 @@ private int lCapturing(int len) { return (int) (i & 0xFF); } - private int modDiff(int x, int y, int R){ + static private int modDiff(int x, int y, int R){ int dl; int dr; if (y > x){ @@ -246,7 +256,7 @@ private int modDiff(int x, int y, int R){ return dl; } - private int hDistance(int [] x, int [] y) + static private int hDistance(int [] x, int [] y) { int diff = 0; for (int i = 0; i < x.length; i++) { @@ -482,8 +492,45 @@ else if (ldiff == 1) return diff; } - /* this method for testing only */ - /* coincides with simple_unit */ + static final public int totalDiff(String hash1, String hash2, boolean lenDiff) { + int diff = 0; + int [] iHash1 = fromHex(hash1); + int [] iHash2 = fromHex(hash2); + + if (lenDiff) { + int ldiff = modDiff(iHash1[TLSH_CHECKSUM_LEN], iHash2[TLSH_CHECKSUM_LEN], RANGE_LVALUE); + if (ldiff == 0) + diff = 0; + else if (ldiff == 1) + diff = 1; + else + diff += ldiff * 12; + } + + int q1diff = modDiff(iHash1[TLSH_CHECKSUM_LEN + 1] & 0xf, iHash2[TLSH_CHECKSUM_LEN + 1] & 0xf, RANGE_QRATIO); + if (q1diff <= 1) + diff += q1diff; + else + diff += (q1diff - 1) * 12; + + int q2diff = modDiff(iHash1[TLSH_CHECKSUM_LEN + 1] >> 4, iHash2[TLSH_CHECKSUM_LEN + 1] >> 4, RANGE_QRATIO); + if (q2diff <= 1) + diff += q2diff; + else + diff += (q2diff - 1) * 12; + + for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) { + if (iHash1[k] != iHash2[k]) { + diff++; + break; + } + } + + diff += hDistance(Arrays.copyOfRange(iHash1, TLSH_CHECKSUM_LEN + 2, iHash1.length), Arrays.copyOfRange(iHash2, TLSH_CHECKSUM_LEN + 2, iHash2.length)); + + return diff; + } + public static void main(String [] args) { try { TLSH ti1 = new TLSH();