opentensor · p-ferreira · Sep 28, 2023 · Sep 27, 2023 · Sep 27, 2023 · Sep 27, 2023
diff --git a/README.md b/README.md
@@ -69,6 +69,44 @@ python -m neurons/validators/validator.py
 
 ---
 
+
+# Running
+
+These validators are designed to run and update themselves automatically. To run a validator, follow these steps:
+
+1. Install this repository, you can do so by following the steps outlined in [the installation section](#installation).
+2. Install [Weights and Biases](https://docs.wandb.ai/quickstart) and run `wandb login` within this repository. This will initialize Weights and Biases, enabling you to view KPIs and Metrics on your validator. (Strongly recommended to help the network improve from data sharing)
+3. Install [PM2](https://pm2.io/docs/runtime/guide/installation/) and the [`jq` package](https://jqlang.github.io/jq/) on your system.
+   **On Linux**:
+   ```bash
+   sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update
+   ``` 
+   **On Mac OS**
+   ```bash
+   brew update && brew install jq && brew install npm && sudo npm install pm2 -g && pm2 update
+   ```
+4. Run the `run.sh` script which will handle running your validator and pulling the latest updates as they are issued. 
+   ```bash
+   pm2 start run.sh --name text_prompt_validators_autoupdate -- --wallet.name <your-wallet-name> --wallet.hotkey <your-wallet-hot-key>
+   ```
+
+This will run **two** PM2 process: one for the validator which is called `text_prompt_validators_main_process` by default (you can change this in `run.sh`), and one for the run.sh script (in step 4, we named it `text_prompt_validators_autoupdate`). The script will check for updates every 30 minutes, if there is an update then it will pull it, install it, restart `text_prompt_validators_main_process` and then restart itself.
+
+
+# Real-time monitoring with wandb integration
+By default, the text prompting validator sends data to wandb, allowing users to monitor running validators and access key metrics in real time, such as:
+- Gating model loss
+- Hardware usage
+- Forward pass time
+- Block duration
+
+All the data sent to wandb is publicly available to the community at the following [link](https://wandb.ai/opentensor-dev/openvalidators).
+
+You don't need to have a wandb account to access the data or to generate a new run,
+but bear in mind that
+[data generated by anonymous users will be deleted after 7 days](https://docs.wandb.ai/guides/app/features/anon#:~:text=If%20there's%20no%20account%2C%20we,be%20available%20for%207%20days)
+as default wandb policy.
+
 ## License
 This repository is licensed under the MIT License.
 ```text

diff --git a/neurons/__init__.py b/neurons/__init__.py
@@ -0,0 +1,18 @@
+# The MIT License (MIT)
+# Copyright © 2023 Yuma Rao
+# Copyright © 2023 Opentensor Foundation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+# the Software.
+
+# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+from . import validators
diff --git a/neurons/validators/__init__.py b/neurons/validators/__init__.py
@@ -0,0 +1,18 @@
+# The MIT License (MIT)
+# Copyright © 2023 Yuma Rao
+# Copyright © 2023 Opentensor Foundation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+# the Software.
+
+# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+from . import validator
diff --git a/neurons/validators/validator.py b/neurons/validators/validator.py
@@ -188,8 +188,9 @@ def __init__(self):
         if self.config.neuron.mock_reward_models:
             self.reward_functions = []
             self.reward_weights = []
+            self.blacklist = MockRewardModel(RewardModelType.blacklist.value)
             self.masking_functions = [
-                MockRewardModel(RewardModelType.blacklist.value),
+                self.blacklist,
                 MockRewardModel(RewardModelType.nsfw.value),
             ]
             bt.logging.debug(str(self.reward_functions))
@@ -301,6 +302,11 @@ def run(self):
         checkpoint(self)
         try:
             while True:
+                if not self.wallet.hotkey.ss58_address in self.metagraph.hotkeys:
+                    raise Exception(
+                        f"Validator is not registered - hotkey {self.wallet.hotkey.ss58_address} not in metagraph"
+                    )
+
                 bt.logging.info(f"step({self.step}) block({ttl_get_block( self )})")
 
                 # Run multiple forwards.
@@ -328,10 +334,9 @@ async def run_forward():
 
                 self.prev_block = ttl_get_block(self)
                 self.step += 1
-
-        except Exception as e:
-            bt.logging.error("Error in training loop", str(e))
-            bt.logging.debug(print_exception(value=e))
+        except Exception as err:
+            bt.logging.error("Error in training loop", str(err))
+            bt.logging.debug(print_exception(type(err), err, err.__traceback__))
 
 
 def main():

diff --git a/prompting/validators/event.py b/prompting/validators/event.py
@@ -27,6 +27,12 @@
 class EventSchema:
     completions: List[str]  # List of completions received for a given prompt
     completion_times: List[float]  # List of completion times for a given prompt
+    completion_status_messages: List[
+        str
+    ]  # List of completion status messages for a given prompt
+    completion_status_codes: List[
+        str
+    ]  # List of completion status codes for a given prompt
     name: str  # Prompt type, e.g. 'followup', 'answer'
     block: float  # Current block at given step
     gating_loss: float  # Gating model loss for given step
@@ -143,6 +149,8 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema":
         return EventSchema(
             completions=event_dict["completions"],
             completion_times=event_dict["completion_times"],
+            completion_status_messages=event_dict["completion_status_messages"],
+            completion_status_codes=event_dict["completion_status_codes"],
             name=event_dict["name"],
             block=event_dict["block"],
             gating_loss=event_dict["gating_loss"],

diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py
@@ -126,6 +126,13 @@ async def run_step(
 
     # Find the best completion given the rewards vector.
     completions: List[str] = [comp.completion for comp in responses]
+    completion_status_message: List[str] = [
+        str(comp.dendrite.status_code) for comp in responses
+    ]
+    completion_status_codes: List[str] = [
+        str(comp.dendrite.status_message) for comp in responses
+    ]
+
     best: str = completions[rewards.argmax(dim=0)].strip()
 
     # Get completion times
@@ -156,6 +163,8 @@ async def run_step(
             "uids": uids.tolist(),
             "completions": completions,
             "completion_times": completion_times,
+            "completion_status_messages": completion_status_message,
+            "completion_status_codes": completion_status_codes,
             "rewards": rewards.tolist(),
             "gating_loss": gating_loss.item(),
             "best": best,
@@ -167,8 +176,10 @@ async def run_step(
         logger.log("EVENTS", "events", **event)
 
     # Log the event to wandb.
-    wandb_event = EventSchema.from_dict(event, self.config.neuron.disable_log_rewards)
     if not self.config.wandb.off:
+        wandb_event = EventSchema.from_dict(
+            event, self.config.neuron.disable_log_rewards
+        )
         self.wandb.log(asdict(wandb_event))
 
     # Return the event.

diff --git a/prompting/validators/mock.py b/prompting/validators/mock.py
@@ -61,6 +61,8 @@ class mock_status:
         status_code = 200
 
     completion = ""
+    status_message = "Success"
+    status_code = "1"
     elapsed_time = 0
     is_success = True
     firewall_prompt = FirewallPrompt()

diff --git a/prompting/validators/reward/config.py b/prompting/validators/reward/config.py
@@ -38,8 +38,8 @@ class DefaultRewardFrameworkConfig:
     Note: All the weights should add up to 1.0.
     """
 
-    dpo_model_weight: float = 0
-    rlhf_model_weight: float = 1
-    reciprocate_model_weight: float = 0
+    dpo_model_weight: float = 0.3
+    rlhf_model_weight: float = 0.4
+    reciprocate_model_weight: float = 0.3
     dahoas_model_weight: float = 0
     prompt_model_weight: float = 0
diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py
@@ -21,7 +21,11 @@
 from typing import List
 from .config import RewardModelType
 from .reward import BaseRewardModel
-from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    NoRepeatNGramLogitsProcessor,
+)
 
 
 class DirectPreferenceRewardModel(BaseRewardModel):
@@ -43,6 +47,7 @@ def __init__(self, device: str):
             trust_remote_code=True,
             torch_dtype=torch.float16,
         ).to(self.device)
+        self.ngram_logit_processor = NoRepeatNGramLogitsProcessor(ngram_size=5)
 
     def reward_single(
         self, prompt: str, completion: str, name: str, with_penalty=True
@@ -94,11 +99,15 @@ def reward_single(
             logits = logits[:, :-1, :]  # [batch_size=1, seq_len-1, vocab_len]
 
             if with_penalty:
-                # Apply penalty for repeated generation
-                for i in range(len(prompt_part) + 1, len(combined) - 1):
-                    logit = logits[:, i, :].clone()
-                    inputs = combined[len(prompt_part) : i].clone()
-                    logits[:, i, :] = self.logit_penalty(input_ids=inputs, logit=logit)
+                org_logit = logits.clone()
+                logits = self.ngram_logit_processor(
+                    combined[len(prompt_part) :].reshape(1, -1).clone(),
+                    logits.permute(0, 2, 1),
+                ).permute(0, 2, 1)
+                # ngram_logit_processor set punished tokens to -inf, resetting them to 10 std below instead
+                logits[logits == -float("Inf")] = (
+                    org_logit.mean() - org_logit.std() * 10
+                )
 
             # Rescale via log(softmax(logits)).
             logits = logits.log_softmax(-1)
@@ -129,20 +138,3 @@ def get_rewards(
         ).to(self.device)
         bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}")
         return rewards
-
-    def logit_penalty(
-        self, input_ids: torch.LongTensor, logit: torch.FloatTensor
-    ) -> torch.FloatTensor:
-        # Counts the unique tokens within each generation
-        uniques, counts = input_ids.unique(return_counts=True)
-        score = torch.gather(logit, 1, uniques.unsqueeze(0))
-
-        # if score < 0 then repetition penalty has to be multiplied to reduce the previous token probability
-        score = torch.where(
-            score < 0,
-            score * (self.penalty**counts),
-            score / (self.penalty**counts),
-        )
-
-        logit.scatter_(1, uniques.unsqueeze(0), score.to(logit.dtype))
-        return logit
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 git+https://github.com/opentensor/bittensor.git@revolution
 torch
 transformers==4.30.0
-wandb==0.15.3
+wandb==0.15.10
 datasets==2.14.0
 plotly==5.14.1
 networkx==3.1