From a9174ad87cf9c51223b979a7ced8584a55cfc86c Mon Sep 17 00:00:00 2001 From: mirzabob Date: Fri, 1 May 2026 14:10:07 +0530 Subject: [PATCH] Add doc and workflow fixes --- .editorconfig | 2 + .github/workflows/maven-central-publish.yml | 3 +- README.md | 532 +------------------- docs/docs/deduplication.md | 20 +- docs/docs/getting-started.md | 15 +- docs/docs/usage.md | 114 ++--- 6 files changed, 91 insertions(+), 595 deletions(-) diff --git a/.editorconfig b/.editorconfig index 57a2ace..860ce8d 100644 --- a/.editorconfig +++ b/.editorconfig @@ -638,6 +638,8 @@ ij_kotlin_wrap_expression_body_functions = 0 ij_kotlin_wrap_first_method_in_call_chain = false [{*.markdown,*.md}] +ij_formatter_enabled = false +max_line_length = off ij_markdown_force_one_space_after_blockquote_symbol = true ij_markdown_force_one_space_after_header_symbol = true ij_markdown_force_one_space_after_list_bullet = true diff --git a/.github/workflows/maven-central-publish.yml b/.github/workflows/maven-central-publish.yml index 37e20e8..d7b3953 100644 --- a/.github/workflows/maven-central-publish.yml +++ b/.github/workflows/maven-central-publish.yml @@ -24,4 +24,5 @@ jobs: env: MAVEN_USERNAME: ${{ secrets.CENTRAL_TOKEN_USERNAME }} MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN_PASSWORD }} - MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_SIGNING_KEY_PASSWORD }} \ No newline at end of file + MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_SIGNING_KEY_PASSWORD }} + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 048c230..7d4b450 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Solus -([![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) +> A distributed de-duper with TTL capability based on Bloom Filters + +[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=coverage)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) [![Bugs](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=bugs)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) [![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=vulnerabilities)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) @@ -9,51 +11,37 @@ [![Duplicated Lines (%)](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=duplicated_lines_density)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) -[![Maintainability Rating](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=sqale_rating)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus)) +[![Maintainability Rating](https://sonarcloud.io/api/project_badges/measure?project=PhonePe_Solus&metric=sqale_rating)](https://sonarcloud.io/summary/new_code?id=PhonePe_Solus) + +## Overview **Solus** is a high-performance, TTL-based deduplication library designed for (hundred+) billion scale operations. It uses probabilistic data structures (Bloom filters) with sharding to provide memory-efficient duplicate detection with configurable accuracy. -## Table of Contents - -- [Features](#features) -- [Use Cases](#use-cases) -- [Installation](#installation) -- [Quick Start](#quick-start) -- [Configuration](#configuration) -- [Storage Backends](#storage-backends) -- [API Reference](#api-reference) -- [Architecture](#architecture) -- [Performance Tuning](#performance-tuning) -- [Error Handling](#error-handling) -- [Contributing](#contributing) -- [License](#license) - ## Features -- **Massive Scale**: Handle billions of unique entities with minimal memory footprint -- **TTL Support**: Automatic expiration of entries based on time-to-live -- **Multiple Storage Backends**: Support for Aerospike and HBase -- **Multi-Datacenter Support**: Built-in support for cross-datacenter (XDC) and datacenter-local (DC) deduplication -- **Configurable Accuracy**: Tune false positive rates based on your requirements -- **Batch Operations**: Efficient bulk add and check operations -- **Thread-Safe**: Safe for concurrent use in multi-threaded applications -- **Atomic Operations**: `addIfAbsent` for atomic check-and-add patterns +- **Massive Scale** — Handle billions of unique entities using sharded Bloom filters +- **TTL Support** — Automatic expiration of entries based on time-to-live +- **Pluggable Storage Backends** — Aerospike and HBase out of the box +- **Multi-Datacenter Support** — `DC` (datacenter-local) and `XDC` (cross-datacenter) deduplication levels +- **Configurable Accuracy** — Tune false positive rates by adjusting hash functions, shards, and bits per shard +- **Batch Operations** — Efficient bulk add and check operations +- **Atomic Operations** — `addIfAbsent` for atomic check-and-add patterns -## Use Cases +## Documentation -- **Coupon/Voucher Redemption**: Prevent duplicate redemption of one-time use codes -- **Event Deduplication**: Filter duplicate events in streaming pipelines -- **Request Idempotency**: Ensure API requests are processed only once -- **Notification Throttling**: Prevent sending duplicate notifications to users -- **Click Fraud Detection**: Identify and filter duplicate ad clicks +Detailed documentation is available at **[https://phonepe.github.io/solus](https://phonepe.github.io/solus)** -## Installation +- [Getting Started](docs/docs/getting-started.md) +- [Usage Guide](docs/docs/usage.md) +- [Deduplication Semantics](docs/docs/deduplication.md) +- [Aerospike Backend](docs/docs/storages/aerospike.md) +- [HBase Backend](docs/docs/storages/hbase.md) -### Maven +## Getting Started -Add the following dependency to your `pom.xml`: +### Maven ```xml @@ -64,12 +52,10 @@ Add the following dependency to your `pom.xml`: ``` -> **Note:** Find the latest version on [Maven Central](https://search.maven.org/artifact/com.phonepe/solus). - ### Gradle ```groovy -implementation 'com.phonepe:solus:1.0.0' +implementation 'com.phonepe:solus:${solus.version}' ``` ### Requirements @@ -79,456 +65,10 @@ implementation 'com.phonepe:solus:1.0.0' - Aerospike 6.x+ - HBase 2.x+ -## Quick Start - -### Using Aerospike - -```java -import com.aerospike.client.AerospikeClient; -import com.phonepe.solus.SolusEngine; -import com.phonepe.solus.config.DeDuperConfig; -import com.phonepe.solus.store.context.impl.AerospikeStorageContext; - -// 1. Create Aerospike client -AerospikeClient aerospikeClient = new AerospikeClient("localhost", 3000); - - // 2. Create storage context - AerospikeStorageContext storageContext = AerospikeStorageContext.builder() - .aerospikeClient(aerospikeClient) - .namespace("your-namespace") - .setName("deduper-set") - .farm("dc1") // Optional: datacenter identifier - .build(); - - // 3. Create Solus Engine - SolusEngine solusEngine = SolusEngine.builder() - .clientId("my-service") - .storageContext(storageContext) - .build(); - -// 4. Register a DeDuper (with default config) -solusEngine. - - register("coupons"); - -// 5. Add an entity with TTL (in milliseconds) -solusEngine. - - add("coupons","COUPON-ABC-123",86400000L); // 24 hours TTL - - // 6. Check if entity exists - boolean isAbsent = solusEngine.checkAbsence("coupons", "COUPON-ABC-123"); -// isAbsent = false (entity exists) - - // 7. Atomic add-if-absent pattern - boolean wasAdded = solusEngine.addIfAbsent("coupons", "COUPON-XYZ-789", 86400000L); -// wasAdded = true (entity was added because it didn't exist) -``` - -### Using HBase - -```java -import org.apache.hadoop.hbase.client.Connection; -import org.apache.hadoop.hbase.client.ConnectionFactory; -import com.phonepe.solus.SolusEngine; -import com.phonepe.solus.hbase.HBaseTableConnection; -import com.phonepe.solus.store.context.impl.HBaseStorageContext; - -// 1. Create HBase connection -Configuration config = HBaseConfiguration.create(); -config. - - set("hbase.zookeeper.quorum","zk1.example.com,zk2.example.com"); - - Connection hbaseConnection = ConnectionFactory.createConnection(config); - - // 2. Create HBase table connection - HBaseTableConnection tableConnection = new HBaseTableConnection(false, hbaseConnection); - - // 3. Create storage context (table will be created automatically if it doesn't exist) - HBaseStorageContext storageContext = HBaseStorageContext.builder() - .connection(tableConnection) - .tableName("solus_deduper") - .farm("dc1") - .build(); - - // 4. Create Solus Engine - SolusEngine solusEngine = SolusEngine.builder() - .clientId("my-service") - .storageContext(storageContext) - .build(); - -// Rest of the usage is identical to Aerospike -solusEngine. - - register("events"); -solusEngine. - - add("events","event-12345",3600000L); // 1 hour TTL -``` - -## Configuration - -### DeDuperConfig - -Each DeDuper can be configured with the following parameters: - -| Parameter | Default | Min | Max | Description | -|---------------------|------------|------------|-------------|-----------------------------------------------------| -| `noOfHashFunctions` | 7 | 7 | 13 | Number of hash functions for the Bloom filter | -| `noOfShards` | 10,000,000 | 10,000,000 | 150,000,000 | Number of shards for distributing data | -| `bitsPerShard` | 1,000 | 1,000 | 30,000 | Number of bits in each shard's Bloom filter | -| `deDuperLevel` | XDC | - | - | `DC` (datacenter-local) or `XDC` (cross-datacenter) | - -```java -// Custom configuration for high-scale use case -DeDuperConfig config = DeDuperConfig.builder() - .noOfHashFunctions(10) - .noOfShards(50_000_000L) - .bitsPerShard(20_000) - .deDuperLevel(DeDuperLevel.XDC) - .build(); - -solusEngine. - -register("high-volume-deduper",config); -``` - -### Capacity Planning - -The maximum key space is calculated as: `noOfShards × bitsPerShard` - -With maximum configuration, Solus can handle up to **4.5 trillion** unique keys. - -**Formula for optimal configuration:** - -The false positive probability is approximately: $(1-e^{-kn/m})^k$ - -Where: - -- `k` = number of hash functions -- `n` = number of elements inserted -- `m` = total number of bits (noOfShards × bitsPerShard) - -The optimal number of hash functions to minimize false positives: $k = (m/n) \times \ln(2)$ - -## Storage Backends - -### Aerospike - -Best for: - -- Low-latency requirements (sub-millisecond reads) -- High throughput scenarios -- Cloud-native deployments - -```java -AerospikeStorageContext.builder() - . - -aerospikeClient(aerospikeClient) // Required: Aerospike client instance - . - -namespace("namespace") // Required: Aerospike namespace - . - -setName("set-name") // Required: Aerospike set name - . - -farm("datacenter-id") // Optional: Datacenter identifier - . - -build(); -``` - -### HBase - -Best for: - -- Integration with Hadoop ecosystem -- Very large-scale deployments -- When data locality is important - -```java -HBaseStorageContext.builder() - . - -connection(hbaseTableConnection) // Required: HBase connection wrapper - . - -tableName("table-name") // Required: HBase table name - . - -farm("datacenter-id") // Optional: Datacenter identifier - . - -build(); -``` - -The HBase table is automatically created with: - -- GZ compression enabled -- Single column family -- Pre-split regions for optimal distribution - -## API Reference - -### SolusEngine - -The main entry point for all deduplication operations. - -#### Registration Methods - -```java -// Register with default configuration -void register(String name) - -// Register with custom configuration -void register(String name, DeDuperConfig config) - -// Unregister a DeDuper -void unregister(String name) - -// Get DeDuper details -DeDuper getDeDuper(String name) - -// Get cached DeDuper (faster, may be slightly stale) -DeDuper getCachedDeDuper(String name) - -// Get all active DeDupers -Map getActiveDeDupers() -``` - -#### Data Operations - -```java -// Check if single entity is absent -boolean checkAbsence(String deDuperName, T entity) - -// Check absence for multiple entities (batch) -Map checkAbsence(String deDuperName, Set entities) - -// Add single entity with TTL -void add(String deDuperName, T entity, long ttlInMs) - -// Add multiple entities with TTL (batch) -void add(String deDuperName, Set entities, long ttlInMs) - -// Atomic add-if-absent for single entity -boolean addIfAbsent(String deDuperName, T entity, long ttlInMs) - -// Atomic add-if-absent for multiple entities (batch) -Map addIfAbsent(String deDuperName, Set entities, long ttlInMs) -``` - -### Return Values - -| Method | Returns `true` when | -|------------------|-------------------------------------------| -| `checkAbsence()` | Entity is **not** in the DeDuper (absent) | -| `addIfAbsent()` | Entity was **added** (was absent before) | - -**Note**: Due to the probabilistic nature of Bloom filters, `checkAbsence()` returning `false` guarantees the entity was -seen before, but returning `true` has a small probability of being a false positive. - -## Architecture - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ SolusEngine │ -├─────────────────────────────────────────────────────────────────┤ -│ ┌─────────────────────┐ ┌─────────────────────┐ │ -│ │ DeDuperCrudCommands │ │ DeDuperDataCommands │ │ -│ └──────────┬──────────┘ └──────────┬──────────┘ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ StorageContext │ │ -│ │ ┌─────────────────────┐ ┌─────────────────────────┐ │ │ -│ │ │ AerospikeStorage │ │ HBaseStorage │ │ │ -│ │ │ Context │ │ Context │ │ │ -│ │ └─────────┬───────────┘ └───────────┬─────────────┘ │ │ -│ └────────────┼──────────────────────────┼─────────────────┘ │ -└───────────────┼──────────────────────────┼─────────────────────┘ - │ │ - ▼ ▼ - ┌───────────────┐ ┌───────────────┐ - │ Aerospike │ │ HBase │ - │ Cluster │ │ Cluster │ - └───────────────┘ └───────────────┘ -``` - -### How It Works - -1. **Sharding**: Entities are hashed and distributed across millions of shards -2. **Bloom Filters**: Each shard maintains a space-efficient Bloom filter -3. **TTL Management**: Storage backend handles automatic expiration -4. **Hash Functions**: Multiple hash functions reduce false positive rates - -## Performance Tuning - -### Recommended Configurations - -#### Small Scale (up to 100M keys) - -```java -DeDuperConfig.builder() - . - -noOfHashFunctions(7) - . - -noOfShards(10_000_000L) - . - -bitsPerShard(1_000) - . - -build(); -// False positive rate: ~0.8% -``` - -#### Medium Scale (up to 1B keys) - -```java -DeDuperConfig.builder() - . - -noOfHashFunctions(7) - . - -noOfShards(50_000_000L) - . - -bitsPerShard(5_000) - . - -build(); -// False positive rate: ~0.6% -``` - -#### Large Scale (up to 10B keys) - -```java -DeDuperConfig.builder() - . - -noOfHashFunctions(10) - . - -noOfShards(100_000_000L) - . - -bitsPerShard(20_000) - . - -build(); -// False positive rate: ~0.1% -``` - -### Performance Test Results - -| Configuration | Keys | False Positive Rate | -|-----------------------------------------|------|---------------------| -| 7 hash functions, 5k bits, 200k shards | 100M | ~0.8% | -| 7 hash functions, 20k bits, 500k shards | 1B | ~0.6% | - -## Error Handling - -Solus uses `SolusException` for all error conditions: - -```java -try{ - solusEngine.getDeDuper("non-existent"); -}catch( -SolusException e){ - switch(e. - -getErrorCode()){ - case DEDUPER_NOT_FOUND: - // Handle missing DeDuper - break; - case INVALID_CONFIG: - // Configuration validation failed - break; - case DEDUPER_CONFIG_MISMATCH: - // Trying to register existing DeDuper with different config - break; - case AEROSPIKE_ERROR: - case HBASE_ERROR: - // Storage backend error - break; - case TABLE_CREATION_ERROR: - // HBase table creation failed - break; - case CACHE_ERROR: - // Internal cache error - break; - case INTERNAL_ERROR: - // Unexpected internal error - break; - } - } -``` - -## Multi-Datacenter Support - -Solus supports two deduplication levels: - -### XDC (Cross-Datacenter) - Default - -Deduplication is shared across all datacenters. An entity added in DC1 will be detected as duplicate in DC2. - -```java -DeDuperConfig.builder() - . - -deDuperLevel(DeDuperLevel.XDC) - . - -build(); -``` - -### DC (Datacenter-Local) - -Deduplication is isolated to each datacenter. Useful when you need independent deduplication per region. - -```java -DeDuperConfig.builder() - . - -deDuperLevel(DeDuperLevel.DC) - . - -build(); -``` - ## Contributing We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details. -### Development Setup - -1. Clone the repository: - ```bash - git clone https://github.com/PhonePe/Solus.git - cd solus - ``` - -2. Build the project: - ```bash - mvn clean install - ``` - -3. Run tests: - ```bash - mvn test - ``` - -### Reporting Issues - -Please report issues on [GitHub Issues](https://github.com/PhonePe/Solus/issues). - ## License Solus is licensed under the [Apache License 2.0](LICENSE). @@ -548,29 +88,3 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ``` - -## Documentation Site (Zensical) - -This repository now includes Zensical-based docs under `docs/`. - -- Config: `docs/zensical.toml` -- Content: `docs/docs/` -- Python dependencies: `docs/requirements.txt` - -Build docs locally: - -```bash -cd docs -pip install -r requirements.txt -zensical build --clean -``` - -Generated site output is available at `docs/site`. - -## Acknowledgments - -Built with love by the PhonePe Engineering Team. - ---- - -**Need help?** Open an issue or reach out to the maintainers. diff --git a/docs/docs/deduplication.md b/docs/docs/deduplication.md index 060b198..c33ddbb 100644 --- a/docs/docs/deduplication.md +++ b/docs/docs/deduplication.md @@ -21,6 +21,7 @@ DeDuperConfig config = DeDuperConfig.builder() ``` !!! warning "Immutable after registration" + Once a deduper is registered, its configuration cannot be changed. Attempting to re-register with a different config throws `SolusException` with `ErrorCode.DEDUPER_CONFIG_MISMATCH`. ## Deduplication levels @@ -49,13 +50,16 @@ Deduplication is isolated to each datacenter. Each farm maintains independent Bl Use `DC` when you need independent deduplication per region or when cross-DC replication introduces unacceptable latency. !!! warning "DC consistency" + With `DC` level, the same entity can be added independently in different datacenters without conflict. This is by design — each DC maintains its own Bloom filter state. ## Capacity planning The maximum key space is calculated as: -$$\text{maxKeys} = \text{noOfShards} \times \text{bitsPerShard}$$ +``` +maxKeys = noOfShards × bitsPerShard +``` With maximum configuration (150M shards × 30K bits), Solus can handle up to **4.5 trillion** unique keys. @@ -63,17 +67,21 @@ With maximum configuration (150M shards × 30K bits), Solus can handle up to **4 The false positive probability for a Bloom filter is approximately: -$$(1 - e^{-kn/m})^k$$ +``` +P(false positive) ≈ (1 - e^(-kn/m))^k +``` Where: -- $k$ = number of hash functions (`noOfHashFunctions`) -- $n$ = number of elements inserted -- $m$ = total number of bits (`noOfShards × bitsPerShard`) +- `k` = number of hash functions (`noOfHashFunctions`) +- `n` = number of elements inserted +- `m` = total number of bits (`noOfShards × bitsPerShard`) The optimal number of hash functions to minimize false positives: -$$k_{opt} = \frac{m}{n} \times \ln(2)$$ +``` +k_opt = (m / n) × ln(2) +``` ### Recommended configurations diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md index b881135..91c4bcb 100644 --- a/docs/docs/getting-started.md +++ b/docs/docs/getting-started.md @@ -30,7 +30,7 @@ cd solus mvn clean install ``` -To run the tests: +To run the tests (Docker must be running): ```bash mvn clean test @@ -57,14 +57,10 @@ SolusEngine solusEngine = SolusEngine.builder() .build(); // 4. Register a deduper (creates metadata in the store) -solusEngine. - -register("coupons"); +solusEngine.register("coupons"); // 5. Add an entity with TTL -solusEngine. - -add("coupons","COUPON-ABC-123",86400000L); // 24-hour TTL +solusEngine.add("coupons","COUPON-ABC-123",86400000L); // 24-hour TTL // 6. Check if entity exists boolean isAbsent = solusEngine.checkAbsence("coupons", "COUPON-ABC-123"); @@ -76,8 +72,9 @@ boolean wasAdded = solusEngine.addIfAbsent("coupons", "COUPON-XYZ-789", 86400000 ``` !!! tip -The example above uses all default configuration values (7 hash functions, 10M shards, 1000 bits per shard, XDC level). -To customise these, pass a `DeDuperConfig` to the `register()` call. + + The example above uses all default configuration values (7 hash functions, 10M shards, 1000 bits per shard, XDC level). + To customise these, pass a `DeDuperConfig` to the `register()` call. See [Deduplication Semantics](deduplication.md#configuration) for details. ## What's next diff --git a/docs/docs/usage.md b/docs/docs/usage.md index bc3058c..11ff6bf 100644 --- a/docs/docs/usage.md +++ b/docs/docs/usage.md @@ -12,40 +12,41 @@ === "Aerospike" - ```java - AerospikeStorageContext storageContext = AerospikeStorageContext.builder() +```java +AerospikeStorageContext storageContext = AerospikeStorageContext.builder() .aerospikeClient(aerospikeClient) .namespace("your-namespace") .setName("deduper-set") .farm("dc1") .build(); - SolusEngine solusEngine = SolusEngine.builder() +SolusEngine solusEngine = SolusEngine.builder() .clientId("my-service") .storageContext(storageContext) .build(); - ``` +``` === "HBase" - ```java - HBaseTableConnection tableConnection = new HBaseTableConnection(false, hbaseConnection); +```java +HBaseTableConnection tableConnection = new HBaseTableConnection(false, hbaseConnection); - HBaseStorageContext storageContext = HBaseStorageContext.builder() +HBaseStorageContext storageContext = HBaseStorageContext.builder() .connection(tableConnection) .tableName("solus_deduper") .farm("dc1") .build(); - SolusEngine solusEngine = SolusEngine.builder() +SolusEngine solusEngine = SolusEngine.builder() .clientId("my-service") .storageContext(storageContext) .build(); - ``` +``` !!! warning -For HBase, the `HBaseStorageContext` constructor automatically creates the table if it does not exist, with GZ -compression, a single column family, and pre-split regions for optimal distribution. + + For HBase, the `HBaseStorageContext` constructor automatically creates the table if it does not exist, with GZ + compression, a single column family, and pre-split regions for optimal distribution. ## Register a deduper @@ -63,14 +64,13 @@ DeDuperConfig config = DeDuperConfig.builder() .deDuperLevel(DeDuperLevel.XDC) .build(); -solusEngine. - -register("high-volume-deduper",config); +solusEngine.register("high-volume-deduper", config); ``` !!! info -If a deduper with the same name already exists and its configuration matches, registration succeeds silently. If the -configuration differs, a `SolusException` with `ErrorCode.DEDUPER_CONFIG_MISMATCH` is thrown. + + If a deduper with the same name already exists and its configuration matches, registration succeeds silently. If the + configuration differs, a `SolusException` with `ErrorCode.DEDUPER_CONFIG_MISMATCH` is thrown. ### Unregister @@ -109,10 +109,12 @@ Set entities = Set.of("COUPON-1", "COUPON-2", "COUPON-3"); Map results = solusEngine.checkAbsence("coupons", entities); ``` -!!! note "Probabilistic guarantees" -Due to the nature of Bloom filters, `checkAbsence()` returning `false` **guarantees** the entity was seen before. -Returning `true` has a small probability of being a false positive (the entity was actually seen, but the filter missed -it). See [False Positive Rates](deduplication.md#false-positive-rates) for details. +!!! note "Probabilistic guarantees" + + Due to the nature of Bloom filters, `checkAbsence()` returning `false` **guarantees** the entity was seen before. + Returning `true` has a small probability of being a false positive (the entity was actually seen, but the filter missedit). + +See [False Positive Rates](deduplication.md#false-positive-rates) for details. ### Add entities @@ -121,13 +123,11 @@ backend. ```java // Single entity — TTL in milliseconds -solusEngine.add("coupons","COUPON-ABC-123",86400000L); // 24-hour TTL +solusEngine.add("coupons", "COUPON-ABC-123", 86400000L); // 24-hour TTL // Batch Set entities = Set.of("COUPON-1", "COUPON-2", "COUPON-3"); -solusEngine. - -add("coupons",entities, 86400000L); +solusEngine.add("coupons", entities, 86400000L); ``` ### Atomic add-if-absent @@ -148,38 +148,19 @@ All operations throw `SolusException`. Use `getErrorCode()` to distinguish failu ```java try{ - solusEngine.getDeDuper("non-existent"); -}catch( -SolusException e){ - switch(e. - -getErrorCode()){ - case DEDUPER_NOT_FOUND ->log. - -warn("Deduper does not exist"); - case INVALID_CONFIG ->log. - -error("Configuration validation failed",e); - case DEDUPER_CONFIG_MISMATCH ->log. - -error("Config mismatch on re-registration",e); - case AEROSPIKE_ERROR ->log. - -error("Aerospike backend error",e); - case HBASE_ERROR ->log. - -error("HBase backend error",e); - case TABLE_CREATION_ERROR ->log. - -error("HBase table creation failed",e); - case CACHE_ERROR ->log. - -error("Internal cache error",e); - case INTERNAL_ERROR ->log. - -error("Unexpected error",e); + solusEngine.getDeDuper("non-existent"); +} catch(SolusException e){ + switch(e.getErrorCode()){ + case DEDUPER_NOT_FOUND -> log.warn("Deduper does not exist"); + case INVALID_CONFIG -> log.error("Configuration validation failed", e); + case DEDUPER_CONFIG_MISMATCH -> log.error("Config mismatch on re-registration", e); + case AEROSPIKE_ERROR -> log.error("Aerospike backend error", e); + case HBASE_ERROR -> log.error("HBase backend error", e); + case TABLE_CREATION_ERROR -> log.error("HBase table creation failed", e); + case CACHE_ERROR -> log.error("Internal cache error", e); + case INTERNAL_ERROR -> log.error("Unexpected error", e); } - } +} ``` See [Error Codes](deduplication.md#error-codes) for the full list. @@ -210,27 +191,20 @@ DeDuperConfig config = DeDuperConfig.builder() .deDuperLevel(DeDuperLevel.XDC) .build(); -solusEngine. - -register("coupon-deduper",config); +solusEngine.register("coupon-deduper",config); // ── Use (request handling) ── String couponCode = "SUMMER-SALE-2026"; boolean wasAdded = solusEngine.addIfAbsent("coupon-deduper", couponCode, 86400000L); -if(wasAdded){ - -// First time — process the coupon -processCoupon(couponCode); -}else{ - // Duplicate — reject - throw new - -IllegalStateException("Coupon already redeemed"); +if(wasAdded) { + // First time — process the coupon + processCoupon(couponCode); +} else { + // Duplicate — reject + throw new IllegalStateException("Coupon already redeemed"); } // ── Teardown (application shutdown) ── // Unregister if no longer needed - solusEngine. - -unregister("coupon-deduper"); +solusEngine.unregister("coupon-deduper"); ```