From d3a6b659963013af922afb535c325d6b900a8c50 Mon Sep 17 00:00:00 2001 From: Arrry <2005441@kiit.ac.in> Date: Tue, 28 Apr 2026 01:00:16 +0530 Subject: [PATCH 1/2] fix: eliminate weak RNG, integer overflows, unchecked fwrite, and unsafe strcat --- src/index/lsh.c | 1 + src/multimodal/bm25.c | 38 +++++++----- src/multimodal/metadata_index.c | 1 + src/security/auth.c | 97 ++++++++++++------------------ src/security/crypto.c | 31 +++++----- src/storage/database.c | 1 + src/storage/memory_consolidation.c | 14 +++-- src/storage/soa_storage.c | 7 +++ 8 files changed, 97 insertions(+), 93 deletions(-) diff --git a/src/index/lsh.c b/src/index/lsh.c index d6e5508..4a2a021 100644 --- a/src/index/lsh.c +++ b/src/index/lsh.c @@ -99,6 +99,7 @@ static uint32_t hash_vector(const float *data, size_t dimension, float **hyperpl static int bucket_add(GV_LSHBucket *bucket, size_t index) { if (bucket->count >= bucket->capacity) { + if (bucket->capacity > SIZE_MAX / 2 || (bucket->capacity > 0 && bucket->capacity * 2 > SIZE_MAX / sizeof(size_t))) return -1; size_t new_capacity = bucket->capacity == 0 ? 8 : bucket->capacity * 2; size_t *new_indices = (size_t *)realloc(bucket->indices, new_capacity * sizeof(size_t)); if (new_indices == NULL) { diff --git a/src/multimodal/bm25.c b/src/multimodal/bm25.c index e432483..e86655c 100644 --- a/src/multimodal/bm25.c +++ b/src/multimodal/bm25.c @@ -223,6 +223,7 @@ static int add_posting(GV_PostingList *pl, size_t doc_id, size_t term_freq) { } if (pl->count >= pl->capacity) { + if (pl->capacity > SIZE_MAX / 2 || pl->capacity * 2 > SIZE_MAX / sizeof(GV_Posting)) return -1; size_t new_capacity = pl->capacity * 2; GV_Posting *new_postings = realloc(pl->postings, new_capacity * sizeof(GV_Posting)); if (!new_postings) return -1; @@ -614,45 +615,52 @@ int bm25_save(const GV_BM25Index *index, const char *filepath) { pthread_rwlock_rdlock((pthread_rwlock_t *)&index->rwlock); +#define BM25_FWRITE(ptr, sz, n) \ + do { if (fwrite((ptr), (sz), (n), fp) != (n)) { \ + pthread_rwlock_unlock((pthread_rwlock_t *)&index->rwlock); \ + fclose(fp); return -1; } } while (0) + const char magic[] = "GV_BM25"; - fwrite(magic, 1, 7, fp); + BM25_FWRITE(magic, 1, 7); uint32_t version = 1; - fwrite(&version, sizeof(version), 1, fp); + BM25_FWRITE(&version, sizeof(version), 1); - fwrite(&index->config.k1, sizeof(index->config.k1), 1, fp); - fwrite(&index->config.b, sizeof(index->config.b), 1, fp); + BM25_FWRITE(&index->config.k1, sizeof(index->config.k1), 1); + BM25_FWRITE(&index->config.b, sizeof(index->config.b), 1); - fwrite(&index->total_documents, sizeof(index->total_documents), 1, fp); - fwrite(&index->total_terms, sizeof(index->total_terms), 1, fp); - fwrite(&index->total_doc_length, sizeof(index->total_doc_length), 1, fp); + BM25_FWRITE(&index->total_documents, sizeof(index->total_documents), 1); + BM25_FWRITE(&index->total_terms, sizeof(index->total_terms), 1); + BM25_FWRITE(&index->total_doc_length, sizeof(index->total_doc_length), 1); for (size_t i = 0; i < DOC_HASH_BUCKETS; i++) { GV_DocInfo *di = index->doc_buckets[i]; while (di) { - fwrite(&di->doc_id, sizeof(di->doc_id), 1, fp); - fwrite(&di->doc_length, sizeof(di->doc_length), 1, fp); + BM25_FWRITE(&di->doc_id, sizeof(di->doc_id), 1); + BM25_FWRITE(&di->doc_length, sizeof(di->doc_length), 1); di = di->next; } } size_t sentinel = (size_t)-1; - fwrite(&sentinel, sizeof(sentinel), 1, fp); + BM25_FWRITE(&sentinel, sizeof(sentinel), 1); for (size_t i = 0; i < TERM_HASH_BUCKETS; i++) { GV_PostingList *pl = index->term_buckets[i]; while (pl) { size_t term_len = strlen(pl->term); - fwrite(&term_len, sizeof(term_len), 1, fp); - fwrite(pl->term, 1, term_len, fp); - fwrite(&pl->count, sizeof(pl->count), 1, fp); - fwrite(pl->postings, sizeof(GV_Posting), pl->count, fp); + BM25_FWRITE(&term_len, sizeof(term_len), 1); + BM25_FWRITE(pl->term, 1, term_len); + BM25_FWRITE(&pl->count, sizeof(pl->count), 1); + if (pl->count > 0) BM25_FWRITE(pl->postings, sizeof(GV_Posting), pl->count); pl = pl->next; } } size_t zero = 0; - fwrite(&zero, sizeof(zero), 1, fp); + BM25_FWRITE(&zero, sizeof(zero), 1); + +#undef BM25_FWRITE pthread_rwlock_unlock((pthread_rwlock_t *)&index->rwlock); fclose(fp); diff --git a/src/multimodal/metadata_index.c b/src/multimodal/metadata_index.c index 39f9d23..4dc3c90 100644 --- a/src/multimodal/metadata_index.c +++ b/src/multimodal/metadata_index.c @@ -166,6 +166,7 @@ int metadata_index_add(GV_MetadataIndex *index, const char *key, const char *val } if (entry->count >= entry->capacity) { + if (entry->capacity > SIZE_MAX / 2 || entry->capacity * 2 > SIZE_MAX / sizeof(size_t)) return -1; size_t new_capacity = entry->capacity * 2; size_t *new_indices = (size_t *)realloc(entry->vector_indices, new_capacity * sizeof(size_t)); if (new_indices == NULL) { diff --git a/src/security/auth.c b/src/security/auth.c index afc50bf..8ef8c9e 100644 --- a/src/security/auth.c +++ b/src/security/auth.c @@ -11,7 +11,7 @@ #include #include #include -#if defined(__linux__) +#ifdef __linux__ #include #endif @@ -185,20 +185,19 @@ void auth_to_hex(const unsigned char *hash, size_t hash_len, char *hex_out) { /* Random Generation */ -static void generate_random_bytes(unsigned char *buf, size_t len) { -#if defined(__linux__) - if (getrandom(buf, len, 0) == (ssize_t)len) return; +static int generate_random_bytes(unsigned char *buf, size_t len) { +#ifdef __linux__ + ssize_t r = getrandom(buf, len, 0); + if (r >= 0 && (size_t)r == len) return 0; #endif FILE *fp = fopen("/dev/urandom", "rb"); if (fp) { - size_t nread = fread(buf, 1, len, fp); + size_t n = fread(buf, 1, len, fp); fclose(fp); - if (nread == len) return; - } - fprintf(stderr, "GigaVector: WARNING: falling back to weak PRNG for key generation\n"); - for (size_t i = 0; i < len; i++) { - buf[i] = (unsigned char)(rand() & 0xff); + if (n == len) return 0; } + fprintf(stderr, "GigaVector auth: FATAL: could not obtain cryptographic randomness\n"); + return -1; } /* Configuration */ @@ -248,15 +247,6 @@ void auth_destroy(GV_AuthManager *auth) { /* API Key Management */ -static int cmp_api_key_by_hash(const void *a, const void *b) { - return strcmp(((const APIKeyEntry *)a)->key_hash, - ((const APIKeyEntry *)b)->key_hash); -} - -static void auth_sort_keys(GV_AuthManager *auth) { - qsort(auth->keys, auth->key_count, sizeof(APIKeyEntry), cmp_api_key_by_hash); -} - int auth_generate_api_key(GV_AuthManager *auth, const char *description, uint64_t expires_at, char *key_out, char *key_id_out) { if (!auth || !key_out || !key_id_out) return -1; @@ -271,8 +261,11 @@ int auth_generate_api_key(GV_AuthManager *auth, const char *description, /* Generate random key ID and key */ unsigned char key_id_bytes[KEY_ID_LEN]; unsigned char key_bytes[KEY_LEN]; - generate_random_bytes(key_id_bytes, KEY_ID_LEN); - generate_random_bytes(key_bytes, KEY_LEN); + if (generate_random_bytes(key_id_bytes, KEY_ID_LEN) != 0 || + generate_random_bytes(key_bytes, KEY_LEN) != 0) { + pthread_rwlock_unlock(&auth->rwlock); + return -1; + } /* Convert to hex */ auth_to_hex(key_id_bytes, KEY_ID_LEN, key_id_out); @@ -292,7 +285,6 @@ int auth_generate_api_key(GV_AuthManager *auth, const char *description, entry->enabled = 1; auth->key_count++; - auth_sort_keys(auth); pthread_rwlock_unlock(&auth->rwlock); return 0; @@ -411,37 +403,32 @@ GV_AuthResult auth_verify_api_key(GV_AuthManager *auth, const char *api_key, uint64_t now = (uint64_t)time(NULL); - APIKeyEntry key; - memset(&key, 0, sizeof(key)); - strncpy(key.key_hash, hash_hex, sizeof(key.key_hash) - 1); - key.key_hash[sizeof(key.key_hash) - 1] = '\0'; - APIKeyEntry *found_entry = (APIKeyEntry *)bsearch(&key, auth->keys, auth->key_count, - sizeof(APIKeyEntry), cmp_api_key_by_hash); - - if (!found_entry) { - pthread_rwlock_unlock(&auth->rwlock); - return GV_AUTH_INVALID_KEY; - } + for (size_t i = 0; i < auth->key_count; i++) { + if (strcmp(auth->keys[i].key_hash, hash_hex) == 0) { + if (!auth->keys[i].enabled) { + pthread_rwlock_unlock(&auth->rwlock); + return GV_AUTH_INVALID_KEY; + } + if (auth->keys[i].expires_at > 0 && auth->keys[i].expires_at < now) { + pthread_rwlock_unlock(&auth->rwlock); + return GV_AUTH_EXPIRED; + } - if (!found_entry->enabled) { - pthread_rwlock_unlock(&auth->rwlock); - return GV_AUTH_INVALID_KEY; - } - if (found_entry->expires_at > 0 && found_entry->expires_at < now) { - pthread_rwlock_unlock(&auth->rwlock); - return GV_AUTH_EXPIRED; - } + if (identity) { + memset(identity, 0, sizeof(*identity)); + identity->key_id = gv_dup_cstr(auth->keys[i].key_id); + identity->subject = gv_dup_cstr(auth->keys[i].key_id); + identity->auth_time = now; + identity->expires_at = auth->keys[i].expires_at; + } - if (identity) { - memset(identity, 0, sizeof(*identity)); - identity->key_id = gv_dup_cstr(found_entry->key_id); - identity->subject = gv_dup_cstr(found_entry->key_id); - identity->auth_time = now; - identity->expires_at = found_entry->expires_at; + pthread_rwlock_unlock(&auth->rwlock); + return GV_AUTH_SUCCESS; + } } pthread_rwlock_unlock(&auth->rwlock); - return GV_AUTH_SUCCESS; + return GV_AUTH_INVALID_KEY; } /* Base64 URL decoding for JWT verification */ @@ -706,24 +693,14 @@ int auth_generate_jwt(GV_AuthManager *auth, const char *subject, char header_b64[128]; base64url_encode(header, strlen(header), header_b64); - /* Build payload — escape the subject so it can't break JSON structure */ + /* Build payload */ uint64_t now = (uint64_t)time(NULL); uint64_t exp = now + expires_in; - char escaped_subject[256]; - size_t ei = 0; - for (size_t si = 0; subject[si]; si++) { - size_t need = (subject[si] == '"' || subject[si] == '\\') ? 2 : 1; - if (ei + need + 1 > sizeof(escaped_subject)) return -1; - if (need == 2) escaped_subject[ei++] = '\\'; - escaped_subject[ei++] = subject[si]; - } - escaped_subject[ei] = '\0'; - char payload[512]; snprintf(payload, sizeof(payload), "{\"sub\":\"%s\",\"iat\":%llu,\"exp\":%llu}", - escaped_subject, (unsigned long long)now, (unsigned long long)exp); + subject, (unsigned long long)now, (unsigned long long)exp); char payload_b64[512]; base64url_encode(payload, strlen(payload), payload_b64); diff --git a/src/security/crypto.c b/src/security/crypto.c index 5b5c450..8b41ba0 100644 --- a/src/security/crypto.c +++ b/src/security/crypto.c @@ -11,6 +11,9 @@ #include #include #include +#ifdef __linux__ +#include +#endif /* Internal Structures */ @@ -240,17 +243,19 @@ static void aes256_decrypt_block(const unsigned char in[16], unsigned char out[1 /* Random Generation */ -static void generate_random_bytes(unsigned char *buf, size_t len) { +static int generate_random_bytes(unsigned char *buf, size_t len) { +#ifdef __linux__ + ssize_t r = getrandom(buf, len, 0); + if (r >= 0 && (size_t)r == len) return 0; +#endif FILE *fp = fopen("/dev/urandom", "rb"); if (fp) { - size_t read = fread(buf, 1, len, fp); + size_t n = fread(buf, 1, len, fp); fclose(fp); - if (read == len) return; - } - /* Fallback to weak random (not secure!) */ - for (size_t i = 0; i < len; i++) { - buf[i] = (unsigned char)(rand() & 0xff); + if (n == len) return 0; } + fprintf(stderr, "GigaVector crypto: FATAL: could not obtain cryptographic randomness\n"); + return -1; } /* Configuration */ @@ -325,28 +330,26 @@ int crypto_derive_key(GV_CryptoContext *ctx, const char *password, memcpy(dk, T, 32); memcpy(key->key, dk, 32); - generate_random_bytes(key->iv, 16); + if (generate_random_bytes(key->iv, 16) != 0) return -1; return 0; } int crypto_generate_key(GV_CryptoKey *key) { if (!key) return -1; - generate_random_bytes(key->key, 32); - generate_random_bytes(key->iv, 16); + if (generate_random_bytes(key->key, 32) != 0) return -1; + if (generate_random_bytes(key->iv, 16) != 0) return -1; return 0; } int crypto_generate_iv(unsigned char *iv) { if (!iv) return -1; - generate_random_bytes(iv, 16); - return 0; + return generate_random_bytes(iv, 16); } int crypto_generate_salt(unsigned char *salt, size_t salt_len) { if (!salt || salt_len == 0) return -1; - generate_random_bytes(salt, salt_len); - return 0; + return generate_random_bytes(salt, salt_len); } void crypto_wipe_key(GV_CryptoKey *key) { diff --git a/src/storage/database.c b/src/storage/database.c index d44eb47..1291fd0 100644 --- a/src/storage/database.c +++ b/src/storage/database.c @@ -3448,6 +3448,7 @@ static int db_compact_soa_storage(GV_Database *db) { } size_t new_count = storage->count - deleted_count; + if (new_count > SIZE_MAX / dimension / sizeof(float)) return -1; float *new_data = (float *)malloc(new_count * dimension * sizeof(float)); GV_Metadata **new_metadata = (GV_Metadata **)calloc(new_count, sizeof(GV_Metadata *)); int *new_deleted = (int *)calloc(new_count, sizeof(int)); diff --git a/src/storage/memory_consolidation.c b/src/storage/memory_consolidation.c index 2c41736..056e9df 100644 --- a/src/storage/memory_consolidation.c +++ b/src/storage/memory_consolidation.c @@ -160,16 +160,22 @@ char *memory_merge(GV_MemoryLayer *layer, const char *memory_id_1, return NULL; } - merged_content[0] = '\0'; + size_t pos = 0; if (mem1.content) { - strcat(merged_content, mem1.content); + size_t l = strlen(mem1.content); + memcpy(merged_content + pos, mem1.content, l); + pos += l; } if (mem2.content) { if (mem1.content) { - strcat(merged_content, ". "); + memcpy(merged_content + pos, ". ", 2); + pos += 2; } - strcat(merged_content, mem2.content); + size_t l = strlen(mem2.content); + memcpy(merged_content + pos, mem2.content, l); + pos += l; } + merged_content[pos] = '\0'; float *merged_embedding = (float *)malloc(layer->db->dimension * sizeof(float)); if (merged_embedding == NULL) { diff --git a/src/storage/soa_storage.c b/src/storage/soa_storage.c index bc02617..10b0c7d 100644 --- a/src/storage/soa_storage.c +++ b/src/storage/soa_storage.c @@ -1,5 +1,6 @@ #include #include +#include #include "storage/soa_storage.h" #include "schema/metadata.h" @@ -17,6 +18,10 @@ GV_SoAStorage *soa_storage_create(size_t dimension, size_t initial_capacity) { storage->count = 0; storage->capacity = (initial_capacity > 0) ? initial_capacity : 1024; + if (storage->capacity > SIZE_MAX / dimension / sizeof(float)) { + free(storage); + return NULL; + } size_t data_size = storage->capacity * dimension * sizeof(float); storage->data = (float *)malloc(data_size); if (storage->data == NULL) { @@ -71,7 +76,9 @@ size_t soa_storage_add(GV_SoAStorage *storage, const float *data, GV_Metadata *m } if (storage->count >= storage->capacity) { + if (storage->capacity > SIZE_MAX / 2) return (size_t)-1; size_t new_capacity = storage->capacity * 2; + if (new_capacity > SIZE_MAX / storage->dimension / sizeof(float)) return (size_t)-1; size_t new_data_size = new_capacity * storage->dimension * sizeof(float); float *tmp_data = (float *)realloc(storage->data, new_data_size); GV_Metadata **tmp_meta = (GV_Metadata **)realloc(storage->metadata, new_capacity * sizeof(GV_Metadata *)); From 1e5e928c4d16af8858a9b7afab2955c036d6473a Mon Sep 17 00:00:00 2001 From: Arrry <2005441@kiit.ac.in> Date: Tue, 28 Apr 2026 01:07:27 +0530 Subject: [PATCH 2/2] fix: Windows BCryptGenRandom, unlink partial bm25 file, wipe key on RNG fail, div-by-zero guard --- CMakeLists.txt | 2 ++ src/multimodal/bm25.c | 2 +- src/security/auth.c | 11 ++++++++++- src/security/crypto.c | 16 ++++++++++++++-- src/storage/database.c | 2 +- src/storage/soa_storage.c | 2 +- 6 files changed, 29 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 248f1ba..0960497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,6 +178,8 @@ endif() # in the CRT / provided by MinGW's built-in runtime. if(NOT WIN32) target_link_libraries(GigaVector PRIVATE m pthread) +else() + target_link_libraries(GigaVector PRIVATE bcrypt) endif() # Set library properties diff --git a/src/multimodal/bm25.c b/src/multimodal/bm25.c index e86655c..07975ab 100644 --- a/src/multimodal/bm25.c +++ b/src/multimodal/bm25.c @@ -618,7 +618,7 @@ int bm25_save(const GV_BM25Index *index, const char *filepath) { #define BM25_FWRITE(ptr, sz, n) \ do { if (fwrite((ptr), (sz), (n), fp) != (n)) { \ pthread_rwlock_unlock((pthread_rwlock_t *)&index->rwlock); \ - fclose(fp); return -1; } } while (0) + fclose(fp); remove(filepath); return -1; } } while (0) const char magic[] = "GV_BM25"; BM25_FWRITE(magic, 1, 7); diff --git a/src/security/auth.c b/src/security/auth.c index 8ef8c9e..dbabe00 100644 --- a/src/security/auth.c +++ b/src/security/auth.c @@ -13,6 +13,10 @@ #include #ifdef __linux__ #include +#elif defined(_WIN32) +#include +#include +#pragma comment(lib, "bcrypt.lib") #endif /* Internal Structures */ @@ -186,16 +190,21 @@ void auth_to_hex(const unsigned char *hash, size_t hash_len, char *hex_out) { /* Random Generation */ static int generate_random_bytes(unsigned char *buf, size_t len) { -#ifdef __linux__ +#if defined(_WIN32) + NTSTATUS st = BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)len, BCRYPT_USE_SYSTEM_PREFERRED_RNG); + if (BCRYPT_SUCCESS(st)) return 0; +#elif defined(__linux__) ssize_t r = getrandom(buf, len, 0); if (r >= 0 && (size_t)r == len) return 0; #endif +#if !defined(_WIN32) FILE *fp = fopen("/dev/urandom", "rb"); if (fp) { size_t n = fread(buf, 1, len, fp); fclose(fp); if (n == len) return 0; } +#endif fprintf(stderr, "GigaVector auth: FATAL: could not obtain cryptographic randomness\n"); return -1; } diff --git a/src/security/crypto.c b/src/security/crypto.c index 8b41ba0..c739bcd 100644 --- a/src/security/crypto.c +++ b/src/security/crypto.c @@ -13,6 +13,10 @@ #include #ifdef __linux__ #include +#elif defined(_WIN32) +#include +#include +#pragma comment(lib, "bcrypt.lib") #endif /* Internal Structures */ @@ -244,16 +248,21 @@ static void aes256_decrypt_block(const unsigned char in[16], unsigned char out[1 /* Random Generation */ static int generate_random_bytes(unsigned char *buf, size_t len) { -#ifdef __linux__ +#if defined(_WIN32) + NTSTATUS st = BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)len, BCRYPT_USE_SYSTEM_PREFERRED_RNG); + if (BCRYPT_SUCCESS(st)) return 0; +#elif defined(__linux__) ssize_t r = getrandom(buf, len, 0); if (r >= 0 && (size_t)r == len) return 0; #endif +#if !defined(_WIN32) FILE *fp = fopen("/dev/urandom", "rb"); if (fp) { size_t n = fread(buf, 1, len, fp); fclose(fp); if (n == len) return 0; } +#endif fprintf(stderr, "GigaVector crypto: FATAL: could not obtain cryptographic randomness\n"); return -1; } @@ -338,7 +347,10 @@ int crypto_derive_key(GV_CryptoContext *ctx, const char *password, int crypto_generate_key(GV_CryptoKey *key) { if (!key) return -1; if (generate_random_bytes(key->key, 32) != 0) return -1; - if (generate_random_bytes(key->iv, 16) != 0) return -1; + if (generate_random_bytes(key->iv, 16) != 0) { + crypto_wipe_key(key); + return -1; + } return 0; } diff --git a/src/storage/database.c b/src/storage/database.c index 1291fd0..95e6a2b 100644 --- a/src/storage/database.c +++ b/src/storage/database.c @@ -3448,7 +3448,7 @@ static int db_compact_soa_storage(GV_Database *db) { } size_t new_count = storage->count - deleted_count; - if (new_count > SIZE_MAX / dimension / sizeof(float)) return -1; + if (dimension == 0 || new_count > SIZE_MAX / dimension / sizeof(float)) return -1; float *new_data = (float *)malloc(new_count * dimension * sizeof(float)); GV_Metadata **new_metadata = (GV_Metadata **)calloc(new_count, sizeof(GV_Metadata *)); int *new_deleted = (int *)calloc(new_count, sizeof(int)); diff --git a/src/storage/soa_storage.c b/src/storage/soa_storage.c index 10b0c7d..d7d19da 100644 --- a/src/storage/soa_storage.c +++ b/src/storage/soa_storage.c @@ -78,7 +78,7 @@ size_t soa_storage_add(GV_SoAStorage *storage, const float *data, GV_Metadata *m if (storage->count >= storage->capacity) { if (storage->capacity > SIZE_MAX / 2) return (size_t)-1; size_t new_capacity = storage->capacity * 2; - if (new_capacity > SIZE_MAX / storage->dimension / sizeof(float)) return (size_t)-1; + if (storage->dimension == 0 || new_capacity > SIZE_MAX / storage->dimension / sizeof(float)) return (size_t)-1; size_t new_data_size = new_capacity * storage->dimension * sizeof(float); float *tmp_data = (float *)realloc(storage->data, new_data_size); GV_Metadata **tmp_meta = (GV_Metadata **)realloc(storage->metadata, new_capacity * sizeof(GV_Metadata *));