Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,44 @@ python -m neurons/validators/validator.py

---


# Running

These validators are designed to run and update themselves automatically. To run a validator, follow these steps:

1. Install this repository, you can do so by following the steps outlined in [the installation section](#installation).
2. Install [Weights and Biases](https://docs.wandb.ai/quickstart) and run `wandb login` within this repository. This will initialize Weights and Biases, enabling you to view KPIs and Metrics on your validator. (Strongly recommended to help the network improve from data sharing)
3. Install [PM2](https://pm2.io/docs/runtime/guide/installation/) and the [`jq` package](https://jqlang.github.io/jq/) on your system.
**On Linux**:
```bash
sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update
```
**On Mac OS**
```bash
brew update && brew install jq && brew install npm && sudo npm install pm2 -g && pm2 update
```
4. Run the `run.sh` script which will handle running your validator and pulling the latest updates as they are issued.
```bash
pm2 start run.sh --name text_prompt_validators_autoupdate -- --wallet.name <your-wallet-name> --wallet.hotkey <your-wallet-hot-key>
```

This will run **two** PM2 process: one for the validator which is called `text_prompt_validators_main_process` by default (you can change this in `run.sh`), and one for the run.sh script (in step 4, we named it `text_prompt_validators_autoupdate`). The script will check for updates every 30 minutes, if there is an update then it will pull it, install it, restart `text_prompt_validators_main_process` and then restart itself.


# Real-time monitoring with wandb integration
By default, the text prompting validator sends data to wandb, allowing users to monitor running validators and access key metrics in real time, such as:
- Gating model loss
- Hardware usage
- Forward pass time
- Block duration

All the data sent to wandb is publicly available to the community at the following [link](https://wandb.ai/opentensor-dev/openvalidators).

You don't need to have a wandb account to access the data or to generate a new run,
but bear in mind that
[data generated by anonymous users will be deleted after 7 days](https://docs.wandb.ai/guides/app/features/anon#:~:text=If%20there's%20no%20account%2C%20we,be%20available%20for%207%20days)
as default wandb policy.

## License
This repository is licensed under the MIT License.
```text
Expand Down
18 changes: 18 additions & 0 deletions neurons/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# The MIT License (MIT)
# Copyright © 2023 Yuma Rao
# Copyright © 2023 Opentensor Foundation

# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.

# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
from . import validators
18 changes: 18 additions & 0 deletions neurons/validators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# The MIT License (MIT)
# Copyright © 2023 Yuma Rao
# Copyright © 2023 Opentensor Foundation

# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.

# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
from . import validator
15 changes: 10 additions & 5 deletions neurons/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,9 @@ def __init__(self):
if self.config.neuron.mock_reward_models:
self.reward_functions = []
self.reward_weights = []
self.blacklist = MockRewardModel(RewardModelType.blacklist.value)
self.masking_functions = [
MockRewardModel(RewardModelType.blacklist.value),
self.blacklist,
MockRewardModel(RewardModelType.nsfw.value),
]
bt.logging.debug(str(self.reward_functions))
Expand Down Expand Up @@ -301,6 +302,11 @@ def run(self):
checkpoint(self)
try:
while True:
if not self.wallet.hotkey.ss58_address in self.metagraph.hotkeys:
raise Exception(
f"Validator is not registered - hotkey {self.wallet.hotkey.ss58_address} not in metagraph"
)

bt.logging.info(f"step({self.step}) block({ttl_get_block( self )})")

# Run multiple forwards.
Expand Down Expand Up @@ -328,10 +334,9 @@ async def run_forward():

self.prev_block = ttl_get_block(self)
self.step += 1

except Exception as e:
bt.logging.error("Error in training loop", str(e))
bt.logging.debug(print_exception(value=e))
except Exception as err:
bt.logging.error("Error in training loop", str(err))
bt.logging.debug(print_exception(type(err), err, err.__traceback__))


def main():
Expand Down
8 changes: 8 additions & 0 deletions prompting/validators/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
class EventSchema:
completions: List[str] # List of completions received for a given prompt
completion_times: List[float] # List of completion times for a given prompt
completion_status_messages: List[
str
] # List of completion status messages for a given prompt
completion_status_codes: List[
str
] # List of completion status codes for a given prompt
name: str # Prompt type, e.g. 'followup', 'answer'
block: float # Current block at given step
gating_loss: float # Gating model loss for given step
Expand Down Expand Up @@ -143,6 +149,8 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema":
return EventSchema(
completions=event_dict["completions"],
completion_times=event_dict["completion_times"],
completion_status_messages=event_dict["completion_status_messages"],
completion_status_codes=event_dict["completion_status_codes"],
name=event_dict["name"],
block=event_dict["block"],
gating_loss=event_dict["gating_loss"],
Expand Down
13 changes: 12 additions & 1 deletion prompting/validators/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ async def run_step(

# Find the best completion given the rewards vector.
completions: List[str] = [comp.completion for comp in responses]
completion_status_message: List[str] = [
str(comp.dendrite.status_code) for comp in responses
]
completion_status_codes: List[str] = [
str(comp.dendrite.status_message) for comp in responses
]

best: str = completions[rewards.argmax(dim=0)].strip()

# Get completion times
Expand Down Expand Up @@ -156,6 +163,8 @@ async def run_step(
"uids": uids.tolist(),
"completions": completions,
"completion_times": completion_times,
"completion_status_messages": completion_status_message,
"completion_status_codes": completion_status_codes,
"rewards": rewards.tolist(),
"gating_loss": gating_loss.item(),
"best": best,
Expand All @@ -167,8 +176,10 @@ async def run_step(
logger.log("EVENTS", "events", **event)

# Log the event to wandb.
wandb_event = EventSchema.from_dict(event, self.config.neuron.disable_log_rewards)
if not self.config.wandb.off:
wandb_event = EventSchema.from_dict(
event, self.config.neuron.disable_log_rewards
)
self.wandb.log(asdict(wandb_event))

# Return the event.
Expand Down
2 changes: 2 additions & 0 deletions prompting/validators/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class mock_status:
status_code = 200

completion = ""
status_message = "Success"
status_code = "1"
elapsed_time = 0
is_success = True
firewall_prompt = FirewallPrompt()
Expand Down
6 changes: 3 additions & 3 deletions prompting/validators/reward/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ class DefaultRewardFrameworkConfig:
Note: All the weights should add up to 1.0.
"""

dpo_model_weight: float = 0
rlhf_model_weight: float = 1
reciprocate_model_weight: float = 0
dpo_model_weight: float = 0.3
rlhf_model_weight: float = 0.4
reciprocate_model_weight: float = 0.3
dahoas_model_weight: float = 0
prompt_model_weight: float = 0
38 changes: 15 additions & 23 deletions prompting/validators/reward/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
from typing import List
from .config import RewardModelType
from .reward import BaseRewardModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
NoRepeatNGramLogitsProcessor,
)


class DirectPreferenceRewardModel(BaseRewardModel):
Expand All @@ -43,6 +47,7 @@ def __init__(self, device: str):
trust_remote_code=True,
torch_dtype=torch.float16,
).to(self.device)
self.ngram_logit_processor = NoRepeatNGramLogitsProcessor(ngram_size=5)

def reward_single(
self, prompt: str, completion: str, name: str, with_penalty=True
Expand Down Expand Up @@ -94,11 +99,15 @@ def reward_single(
logits = logits[:, :-1, :] # [batch_size=1, seq_len-1, vocab_len]

if with_penalty:
# Apply penalty for repeated generation
for i in range(len(prompt_part) + 1, len(combined) - 1):
logit = logits[:, i, :].clone()
inputs = combined[len(prompt_part) : i].clone()
logits[:, i, :] = self.logit_penalty(input_ids=inputs, logit=logit)
org_logit = logits.clone()
logits = self.ngram_logit_processor(
combined[len(prompt_part) :].reshape(1, -1).clone(),
logits.permute(0, 2, 1),
).permute(0, 2, 1)
# ngram_logit_processor set punished tokens to -inf, resetting them to 10 std below instead
logits[logits == -float("Inf")] = (
org_logit.mean() - org_logit.std() * 10
)

# Rescale via log(softmax(logits)).
logits = logits.log_softmax(-1)
Expand Down Expand Up @@ -129,20 +138,3 @@ def get_rewards(
).to(self.device)
bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}")
return rewards

def logit_penalty(
self, input_ids: torch.LongTensor, logit: torch.FloatTensor
) -> torch.FloatTensor:
# Counts the unique tokens within each generation
uniques, counts = input_ids.unique(return_counts=True)
score = torch.gather(logit, 1, uniques.unsqueeze(0))

# if score < 0 then repetition penalty has to be multiplied to reduce the previous token probability
score = torch.where(
score < 0,
score * (self.penalty**counts),
score / (self.penalty**counts),
)

logit.scatter_(1, uniques.unsqueeze(0), score.to(logit.dtype))
return logit
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
git+https://github.com/opentensor/bittensor.git@revolution
torch
transformers==4.30.0
wandb==0.15.3
wandb==0.15.10
datasets==2.14.0
plotly==5.14.1
networkx==3.1
Expand Down
Loading