Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 65 additions & 18 deletions Utils/src/utils/hashing/TLSH.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,22 @@
* FileComment: <text> Java version of the TLSH similarity hashing algorithm </text>
*/

import java.util.Arrays;

public class TLSH {
final private int BUCKETS = 256;
final private int EFF_BUCKETS = 128;
final private int CODE_SIZE = 32;
final private int TLSH_CHECKSUM_LEN = 1;
final private int TLSH_STRING_LEN = 70;
final private int SLIDING_WND_SIZE = 5;
final private int RANGE_LVALUE = 256;
final private int RANGE_QRATIO = 16;
final private float LOG_1_5 = 0.4054651f;
final private float LOG_1_3 = 0.26236426f;
final private float LOG_1_1 = 0.095310180f;

final private int [] vTable = {
static final private int BUCKETS = 256;
static final private int EFF_BUCKETS = 128;
static final private int CODE_SIZE = 32;
static final private int TLSH_CHECKSUM_LEN = 1;
static final private int TLSH_STRING_LEN = 70;
static final private int SLIDING_WND_SIZE = 5;
static final private int RANGE_LVALUE = 256;
static final private int RANGE_QRATIO = 16;
static final private float LOG_1_5 = 0.4054651f;
static final private float LOG_1_3 = 0.26236426f;
static final private float LOG_1_1 = 0.095310180f;

static final private int [] vTable = {
(int)1, (int)87, (int)49, (int)12, (int)176, (int)178, (int)102, (int)166, (int)121, (int)193, (int)6, (int)84, (int)249, (int)230, (int)44, (int)163,
(int)14, (int)197, (int)213, (int)181, (int)161, (int)85, (int)218, (int)80, (int)64, (int)239, (int)24, (int)226, (int)236, (int)142, (int)38, (int)200,
(int)110, (int)177, (int)104, (int)103, (int)141, (int)253, (int)255, (int)50, (int)77, (int)101, (int)81, (int)18, (int)45, (int)96, (int)31, (int)222,
Expand Down Expand Up @@ -70,7 +72,7 @@ public void setQHi(int x) {
}

private int [] aBucket = null;
private final int [] slideWindow = new int [SLIDING_WND_SIZE];
private int [] slideWindow = new int [SLIDING_WND_SIZE];
private int dataLen = 0;
private boolean lshCodeValid = false;
LshBinStruct lshBin = new LshBinStruct();
Expand All @@ -94,6 +96,14 @@ private String toHex(int [] buf) {
return ret.toUpperCase();
}

static private int [] fromHex(String s) {
int [] ret = new int [s.length() / 2];
for (int i = 0; i < s.length(); i += 2) {
ret[i / 2] = Integer.parseInt(s.substring(i, i + 2), 16);
}
return ret;
}

private int bMapping(int salt, int i, int j, int k) {
int h = 0;
h = vTable[h ^ salt];
Expand Down Expand Up @@ -231,7 +241,7 @@ private int lCapturing(int len) {
return (int) (i & 0xFF);
}

private int modDiff(int x, int y, int R){
static private int modDiff(int x, int y, int R){
int dl;
int dr;
if (y > x){
Expand All @@ -246,7 +256,7 @@ private int modDiff(int x, int y, int R){
return dl;
}

private int hDistance(int [] x, int [] y)
static private int hDistance(int [] x, int [] y)
{
int diff = 0;
for (int i = 0; i < x.length; i++) {
Expand Down Expand Up @@ -482,8 +492,45 @@ else if (ldiff == 1)
return diff;
}

/* this method for testing only */
/* coincides with simple_unit */
static final public int totalDiff(String hash1, String hash2, boolean lenDiff) {
int diff = 0;
int [] iHash1 = fromHex(hash1);
int [] iHash2 = fromHex(hash2);

if (lenDiff) {
int ldiff = modDiff(iHash1[TLSH_CHECKSUM_LEN], iHash2[TLSH_CHECKSUM_LEN], RANGE_LVALUE);
if (ldiff == 0)
diff = 0;
else if (ldiff == 1)
diff = 1;
else
diff += ldiff * 12;
}

int q1diff = modDiff(iHash1[TLSH_CHECKSUM_LEN + 1] & 0xf, iHash2[TLSH_CHECKSUM_LEN + 1] & 0xf, RANGE_QRATIO);
if (q1diff <= 1)
diff += q1diff;
else
diff += (q1diff - 1) * 12;

int q2diff = modDiff(iHash1[TLSH_CHECKSUM_LEN + 1] >> 4, iHash2[TLSH_CHECKSUM_LEN + 1] >> 4, RANGE_QRATIO);
if (q2diff <= 1)
diff += q2diff;
else
diff += (q2diff - 1) * 12;

for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {
if (iHash1[k] != iHash2[k]) {
diff++;
break;
}
}

diff += hDistance(Arrays.copyOfRange(iHash1, TLSH_CHECKSUM_LEN + 2, iHash1.length), Arrays.copyOfRange(iHash2, TLSH_CHECKSUM_LEN + 2, iHash2.length));

return diff;
}

public static void main(String [] args) {
try {
TLSH ti1 = new TLSH();
Expand Down