diff --git a/README.md b/README.md index c138f8a..bd988be 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,44 @@ python -m neurons/validators/validator.py --- + +# Running + +These validators are designed to run and update themselves automatically. To run a validator, follow these steps: + +1. Install this repository, you can do so by following the steps outlined in [the installation section](#installation). +2. Install [Weights and Biases](https://docs.wandb.ai/quickstart) and run `wandb login` within this repository. This will initialize Weights and Biases, enabling you to view KPIs and Metrics on your validator. (Strongly recommended to help the network improve from data sharing) +3. Install [PM2](https://pm2.io/docs/runtime/guide/installation/) and the [`jq` package](https://jqlang.github.io/jq/) on your system. + **On Linux**: + ```bash + sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update + ``` + **On Mac OS** + ```bash + brew update && brew install jq && brew install npm && sudo npm install pm2 -g && pm2 update + ``` +4. Run the `run.sh` script which will handle running your validator and pulling the latest updates as they are issued. + ```bash + pm2 start run.sh --name text_prompt_validators_autoupdate -- --wallet.name --wallet.hotkey + ``` + +This will run **two** PM2 process: one for the validator which is called `text_prompt_validators_main_process` by default (you can change this in `run.sh`), and one for the run.sh script (in step 4, we named it `text_prompt_validators_autoupdate`). The script will check for updates every 30 minutes, if there is an update then it will pull it, install it, restart `text_prompt_validators_main_process` and then restart itself. + + +# Real-time monitoring with wandb integration +By default, the text prompting validator sends data to wandb, allowing users to monitor running validators and access key metrics in real time, such as: +- Gating model loss +- Hardware usage +- Forward pass time +- Block duration + +All the data sent to wandb is publicly available to the community at the following [link](https://wandb.ai/opentensor-dev/openvalidators). + +You don't need to have a wandb account to access the data or to generate a new run, +but bear in mind that +[data generated by anonymous users will be deleted after 7 days](https://docs.wandb.ai/guides/app/features/anon#:~:text=If%20there's%20no%20account%2C%20we,be%20available%20for%207%20days) +as default wandb policy. + ## License This repository is licensed under the MIT License. ```text diff --git a/neurons/__init__.py b/neurons/__init__.py new file mode 100644 index 0000000..4ab3c3a --- /dev/null +++ b/neurons/__init__.py @@ -0,0 +1,18 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +from . import validators diff --git a/neurons/validators/__init__.py b/neurons/validators/__init__.py new file mode 100644 index 0000000..2ba7c11 --- /dev/null +++ b/neurons/validators/__init__.py @@ -0,0 +1,18 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +from . import validator diff --git a/neurons/validators/validator.py b/neurons/validators/validator.py index 03c2ea8..3ed59ee 100644 --- a/neurons/validators/validator.py +++ b/neurons/validators/validator.py @@ -188,8 +188,9 @@ def __init__(self): if self.config.neuron.mock_reward_models: self.reward_functions = [] self.reward_weights = [] + self.blacklist = MockRewardModel(RewardModelType.blacklist.value) self.masking_functions = [ - MockRewardModel(RewardModelType.blacklist.value), + self.blacklist, MockRewardModel(RewardModelType.nsfw.value), ] bt.logging.debug(str(self.reward_functions)) @@ -301,6 +302,11 @@ def run(self): checkpoint(self) try: while True: + if not self.wallet.hotkey.ss58_address in self.metagraph.hotkeys: + raise Exception( + f"Validator is not registered - hotkey {self.wallet.hotkey.ss58_address} not in metagraph" + ) + bt.logging.info(f"step({self.step}) block({ttl_get_block( self )})") # Run multiple forwards. @@ -328,10 +334,9 @@ async def run_forward(): self.prev_block = ttl_get_block(self) self.step += 1 - - except Exception as e: - bt.logging.error("Error in training loop", str(e)) - bt.logging.debug(print_exception(value=e)) + except Exception as err: + bt.logging.error("Error in training loop", str(err)) + bt.logging.debug(print_exception(type(err), err, err.__traceback__)) def main(): diff --git a/prompting/validators/event.py b/prompting/validators/event.py index 69768d7..6e51584 100644 --- a/prompting/validators/event.py +++ b/prompting/validators/event.py @@ -27,6 +27,12 @@ class EventSchema: completions: List[str] # List of completions received for a given prompt completion_times: List[float] # List of completion times for a given prompt + completion_status_messages: List[ + str + ] # List of completion status messages for a given prompt + completion_status_codes: List[ + str + ] # List of completion status codes for a given prompt name: str # Prompt type, e.g. 'followup', 'answer' block: float # Current block at given step gating_loss: float # Gating model loss for given step @@ -143,6 +149,8 @@ def from_dict(event_dict: dict, disable_log_rewards: bool) -> "EventSchema": return EventSchema( completions=event_dict["completions"], completion_times=event_dict["completion_times"], + completion_status_messages=event_dict["completion_status_messages"], + completion_status_codes=event_dict["completion_status_codes"], name=event_dict["name"], block=event_dict["block"], gating_loss=event_dict["gating_loss"], diff --git a/prompting/validators/forward.py b/prompting/validators/forward.py index 0fa9c7c..9b51c3b 100644 --- a/prompting/validators/forward.py +++ b/prompting/validators/forward.py @@ -126,6 +126,13 @@ async def run_step( # Find the best completion given the rewards vector. completions: List[str] = [comp.completion for comp in responses] + completion_status_message: List[str] = [ + str(comp.dendrite.status_code) for comp in responses + ] + completion_status_codes: List[str] = [ + str(comp.dendrite.status_message) for comp in responses + ] + best: str = completions[rewards.argmax(dim=0)].strip() # Get completion times @@ -156,6 +163,8 @@ async def run_step( "uids": uids.tolist(), "completions": completions, "completion_times": completion_times, + "completion_status_messages": completion_status_message, + "completion_status_codes": completion_status_codes, "rewards": rewards.tolist(), "gating_loss": gating_loss.item(), "best": best, @@ -167,8 +176,10 @@ async def run_step( logger.log("EVENTS", "events", **event) # Log the event to wandb. - wandb_event = EventSchema.from_dict(event, self.config.neuron.disable_log_rewards) if not self.config.wandb.off: + wandb_event = EventSchema.from_dict( + event, self.config.neuron.disable_log_rewards + ) self.wandb.log(asdict(wandb_event)) # Return the event. diff --git a/prompting/validators/mock.py b/prompting/validators/mock.py index 30bf733..5c1056c 100644 --- a/prompting/validators/mock.py +++ b/prompting/validators/mock.py @@ -61,6 +61,8 @@ class mock_status: status_code = 200 completion = "" + status_message = "Success" + status_code = "1" elapsed_time = 0 is_success = True firewall_prompt = FirewallPrompt() diff --git a/prompting/validators/reward/config.py b/prompting/validators/reward/config.py index b826fb2..c7bc0dd 100644 --- a/prompting/validators/reward/config.py +++ b/prompting/validators/reward/config.py @@ -38,8 +38,8 @@ class DefaultRewardFrameworkConfig: Note: All the weights should add up to 1.0. """ - dpo_model_weight: float = 0 - rlhf_model_weight: float = 1 - reciprocate_model_weight: float = 0 + dpo_model_weight: float = 0.3 + rlhf_model_weight: float = 0.4 + reciprocate_model_weight: float = 0.3 dahoas_model_weight: float = 0 prompt_model_weight: float = 0 diff --git a/prompting/validators/reward/dpo.py b/prompting/validators/reward/dpo.py index 961a0da..a987f69 100644 --- a/prompting/validators/reward/dpo.py +++ b/prompting/validators/reward/dpo.py @@ -21,7 +21,11 @@ from typing import List from .config import RewardModelType from .reward import BaseRewardModel -from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + NoRepeatNGramLogitsProcessor, +) class DirectPreferenceRewardModel(BaseRewardModel): @@ -43,6 +47,7 @@ def __init__(self, device: str): trust_remote_code=True, torch_dtype=torch.float16, ).to(self.device) + self.ngram_logit_processor = NoRepeatNGramLogitsProcessor(ngram_size=5) def reward_single( self, prompt: str, completion: str, name: str, with_penalty=True @@ -94,11 +99,15 @@ def reward_single( logits = logits[:, :-1, :] # [batch_size=1, seq_len-1, vocab_len] if with_penalty: - # Apply penalty for repeated generation - for i in range(len(prompt_part) + 1, len(combined) - 1): - logit = logits[:, i, :].clone() - inputs = combined[len(prompt_part) : i].clone() - logits[:, i, :] = self.logit_penalty(input_ids=inputs, logit=logit) + org_logit = logits.clone() + logits = self.ngram_logit_processor( + combined[len(prompt_part) :].reshape(1, -1).clone(), + logits.permute(0, 2, 1), + ).permute(0, 2, 1) + # ngram_logit_processor set punished tokens to -inf, resetting them to 10 std below instead + logits[logits == -float("Inf")] = ( + org_logit.mean() - org_logit.std() * 10 + ) # Rescale via log(softmax(logits)). logits = logits.log_softmax(-1) @@ -129,20 +138,3 @@ def get_rewards( ).to(self.device) bt.logging.trace(f"DirectPreferenceRewardModel | rewards: {rewards.tolist()}") return rewards - - def logit_penalty( - self, input_ids: torch.LongTensor, logit: torch.FloatTensor - ) -> torch.FloatTensor: - # Counts the unique tokens within each generation - uniques, counts = input_ids.unique(return_counts=True) - score = torch.gather(logit, 1, uniques.unsqueeze(0)) - - # if score < 0 then repetition penalty has to be multiplied to reduce the previous token probability - score = torch.where( - score < 0, - score * (self.penalty**counts), - score / (self.penalty**counts), - ) - - logit.scatter_(1, uniques.unsqueeze(0), score.to(logit.dtype)) - return logit diff --git a/requirements.txt b/requirements.txt index 1da9f3d..ee0ec07 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ git+https://github.com/opentensor/bittensor.git@revolution torch transformers==4.30.0 -wandb==0.15.3 +wandb==0.15.10 datasets==2.14.0 plotly==5.14.1 networkx==3.1 diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..6519cc1 --- /dev/null +++ b/run.sh @@ -0,0 +1,289 @@ +#!/bin/bash + +# Initialize variables +script="neurons/validators/validator.py" +autoRunLoc=$(readlink -f "$0") +proc_name="text_prompt_validators_main_process" +args=() +version_location="./prompting/__init__.py" +version="__version__" + +old_args=$@ + +# Check if pm2 is installed +if ! command -v pm2 &> /dev/null +then + echo "pm2 could not be found. To install see: https://pm2.keymetrics.io/docs/usage/quick-start/" + exit 1 +fi + +# Checks if $1 is smaller than $2 +# If $1 is smaller than or equal to $2, then true. +# else false. +version_less_than_or_equal() { + [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ] +} + +# Checks if $1 is smaller than $2 +# If $1 is smaller than $2, then true. +# else false. +version_less_than() { + [ "$1" = "$2" ] && return 1 || version_less_than_or_equal $1 $2 +} + +# Returns the difference between +# two versions as a numerical value. +get_version_difference() { + local tag1="$1" + local tag2="$2" + + # Extract the version numbers from the tags + local version1=$(echo "$tag1" | sed 's/v//') + local version2=$(echo "$tag2" | sed 's/v//') + + # Split the version numbers into an array + IFS='.' read -ra version1_arr <<< "$version1" + IFS='.' read -ra version2_arr <<< "$version2" + + # Calculate the numerical difference + local diff=0 + for i in "${!version1_arr[@]}"; do + local num1=${version1_arr[$i]} + local num2=${version2_arr[$i]} + + # Compare the numbers and update the difference + if (( num1 > num2 )); then + diff=$((diff + num1 - num2)) + elif (( num1 < num2 )); then + diff=$((diff + num2 - num1)) + fi + done + + strip_quotes $diff +} + +read_version_value() { + # Read each line in the file + while IFS= read -r line; do + # Check if the line contains the variable name + if [[ "$line" == *"$version"* ]]; then + # Extract the value of the variable + local value=$(echo "$line" | awk -F '=' '{print $2}' | tr -d ' ') + strip_quotes $value + return 0 + fi + done < "$version_location" + + echo "" +} + +check_package_installed() { + local package_name="$1" + os_name=$(uname -s) + + if [[ "$os_name" == "Linux" ]]; then + # Use dpkg-query to check if the package is installed + if dpkg-query -W -f='${Status}' "$package_name" 2>/dev/null | grep -q "installed"; then + return 1 + else + return 0 + fi + elif [[ "$os_name" == "Darwin" ]]; then + if brew list --formula | grep -q "^$package_name$"; then + return 1 + else + return 0 + fi + else + echo "Unknown operating system" + return 0 + fi +} + +check_variable_value_on_github() { + local repo="$1" + local file_path="$2" + local variable_name="$3" + + local url="https://api.github.com/repos/$repo/contents/$file_path" + local response=$(curl -s "$url") + + # Check if the response contains an error message + if [[ $response =~ "message" ]]; then + echo "Error: Failed to retrieve file contents from GitHub." + return 1 + fi + + # Extract the content from the response + local content=$(echo "$response" | tr -d '\n' | jq -r '.content') + + if [[ "$content" == "null" ]]; then + echo "File '$file_path' not found in the repository." + return 1 + fi + + # Decode the Base64-encoded content + local decoded_content=$(echo "$content" | base64 --decode) + + # Extract the variable value from the content + local variable_value=$(echo "$decoded_content" | grep "$variable_name" | awk -F '=' '{print $2}' | tr -d ' ') + + if [[ -z "$variable_value" ]]; then + echo "Variable '$variable_name' not found in the file '$file_path'." + return 1 + fi + + strip_quotes $variable_value +} + +strip_quotes() { + local input="$1" + + # Remove leading and trailing quotes using parameter expansion + local stripped="${input#\"}" + stripped="${stripped%\"}" + + echo "$stripped" +} + +# Loop through all command line arguments +while [[ $# -gt 0 ]]; do + arg="$1" + + # Check if the argument starts with a hyphen (flag) + if [[ "$arg" == -* ]]; then + # Check if the argument has a value + if [[ $# -gt 1 && "$2" != -* ]]; then + if [[ "$arg" == "--script" ]]; then + script="$2"; + shift 2 + else + # Add '=' sign between flag and value + args+=("'$arg'"); + args+=("'$2'"); + shift 2 + fi + else + # Add '=True' for flags with no value + args+=("'$arg'"); + shift + fi + else + # Argument is not a flag, add it as it is + args+=("'$arg '"); + shift + fi +done + +# Check if script argument was provided +if [[ -z "$script" ]]; then + echo "The --script argument is required." + exit 1 +fi + +branch=$(git branch --show-current) # get current branch. +echo watching branch: $branch +echo pm2 process name: $proc_name + +# Get the current version locally. +current_version=$(read_version_value) + +# Check if script is already running with pm2 +if pm2 status | grep -q $proc_name; then + echo "The script is already running with pm2. Stopping and restarting..." + pm2 delete $proc_name +fi + +# Run the Python script with the arguments using pm2 +echo "Running $script with the following pm2 config:" + +# Join the arguments with commas using printf +joined_args=$(printf "%s," "${args[@]}") + +# Remove the trailing comma +joined_args=${joined_args%,} + +# Create the pm2 config file +echo "module.exports = { + apps : [{ + name : '$proc_name', + script : '$script', + interpreter: 'python3', + min_uptime: '5m', + max_restarts: '5', + args: [$joined_args] + }] +}" > app.config.js + +# Print configuration to be used +cat app.config.js + +pm2 start app.config.js + +# Check if packages are installed. +check_package_installed "jq" +if [ "$?" -eq 1 ]; then + while true; do + + # First ensure that this is a git installation + if [ -d "./.git" ]; then + + # check value on github remotely + latest_version=$(check_variable_value_on_github "opentensor/validators" "openvalidators/__init__.py" "__version__ ") + + # If the file has been updated + if version_less_than $current_version $latest_version; then + echo "latest version $latest_version" + echo "current version $current_version" + diff=$(get_version_difference $latest_version $current_version) + if [ "$diff" -eq 1 ]; then + echo "current validator version:" "$current_version" + echo "latest validator version:" "$latest_version" + + # Pull latest changes + # Failed git pull will return a non-zero output + if git pull origin $branch; then + # latest_version is newer than current_version, should download and reinstall. + echo "New version published. Updating the local copy." + + # Install latest changes just in case. + pip install -e . + + # # Run the Python script with the arguments using pm2 + # TODO (shib): Remove this pm2 del in the next spec version update. + pm2 del auto_run_validator + echo "Restarting PM2 process" + pm2 restart $proc_name + + # Update current version: + current_version=$(read_version_value) + echo "" + + # Restart autorun script + echo "Restarting script..." + ./$(basename $0) $old_args && exit + else + echo "**Will not update**" + echo "It appears you have made changes on your local copy. Please stash your changes using git stash." + fi + else + # current version is newer than the latest on git. This is likely a local copy, so do nothing. + echo "**Will not update**" + echo "The local version is $diff versions behind. Please manually update to the latest version and re-run this script." + fi + else + echo "**Skipping update **" + echo "$current_version is the same as or more than $latest_version. You are likely running locally." + fi + else + echo "The installation does not appear to be done through Git. Please install from source at https://github.com/opentensor/validators and rerun this script." + fi + + # Wait about 30 minutes + # This should be plenty of time for validators to catch up + # and should prevent any rate limitations by GitHub. + sleep 1800 + done +else + echo "Missing package 'jq'. Please install it for your system first." +fi diff --git a/tests/validators/__init__.py b/tests/validators/__init__.py new file mode 100644 index 0000000..c81585d --- /dev/null +++ b/tests/validators/__init__.py @@ -0,0 +1,17 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. diff --git a/tests/validators/reward/__init__.py b/tests/validators/reward/__init__.py new file mode 100644 index 0000000..c81585d --- /dev/null +++ b/tests/validators/reward/__init__.py @@ -0,0 +1,17 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. diff --git a/tests/validators/reward/test_task_validator.py b/tests/validators/reward/test_task_validator.py new file mode 100644 index 0000000..44a92c5 --- /dev/null +++ b/tests/validators/reward/test_task_validator.py @@ -0,0 +1,143 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import unittest +from prompting.validators.reward.task_validator import TaskValidator + + +class TaskValidatorTestCase(unittest.TestCase): + """ + This class contains unit tests for the TaskValidator class. + + The tests cover different scenarios for the `reward` method of the TaskValidator class. + The `reward` method is expected to return a reward based on the task name and the completion text. + """ + + def setUp(self): + self.validator = TaskValidator() + + def test_augment_with_answer_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'augment' (summarization) + and the completion contains the 'Answer:' keyword. + """ + name = f"augment" + completion = "Summary: test summary\nAnswer: Test answer" + self.assertEqual(self.validator.reward("", completion, name), 0.0) + + def test_followup_with_answer_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'followup' (question generation) + and the completion contains the 'Answer:' keyword. + """ + for i in range(0, 4): + name = f"followup{i}" + completion = ( + "Question: This is a test question?\nAnswer: This is a test answer." + ) + self.assertEqual(self.validator.reward("", completion, name), 0.0) + + def test_augment_with_question_keyword(self): + """ + Test if the reward method returns 0 when the task "name" starts with 'augment' (summarization) + and the completion contains the 'Question:' keyword. + """ + name = f"augment" + completion = "Summary: test summary\nQuestion: This is a test question?" + self.assertEqual(self.validator.reward("", completion, name), 0.0) + + def test_answer_with_question_keyword(self): + """ + Test if the reward method returns 0 when the task "name" is 'answer' (answer generation) + and the completion contains the 'Question:' keyword. + """ + for i in range(0, 4): + name = f"answer{i}" + completion = ( + "Question: This is a test question?\nAnswer: This is a test answer." + ) + self.assertEqual(self.validator.reward("", completion, name), 0.0) + + def test_followup_and_answer_with_summary_keyword(self): + """ + Test if the reward method returns 0 when the task "name" is different from "augment" (summarization) + and the completion contains the 'Summary:' keyword. + """ + for name in [ + "followup0", + "followup1", + "followup2", + "followup3", + "answer0", + "answer1", + "answer2", + "answer3", + ]: + completion = "Summary: This is a test summary." + self.assertEqual(self.validator.reward("", completion, name), 0.0) + + def test_reward_valid_followup(self): + """ + Test if the reward method returns 1 when the task "name" starts with 'followup' (question generation) + and the completion contains a question + """ + for i in range(0, 4): + name = f"followup{i}" + completion = "Question: This is a test question?" + self.assertEqual(self.validator.reward("", completion, name), 1.0) + + def test_reward_valid_answer(self): + """ + Test if the reward method returns 1 when the task "name" is 'answer' (answer generation) + and the completion contains an answer + """ + for i in range(0, 4): + name = f"answer{i}" + completion = "Answer: This is a test answer." + self.assertEqual(self.validator.reward("", completion, name), 1.0) + + def test_reward_valid_augment(self): + """ + Test if the reward method returns 1 when the task "name" is 'augment' (summarization) + and the completion contains the a summary. + """ + name = "augment" + completion = "Summary: This is a test summary." + self.assertEqual(self.validator.reward("", completion, name), 1.0) + + def test_reward_valid_other(self): + """ + Test if the reward method returns 1 when the task "name" is different from "augment", "followup", and "answer" + and the completion does not contain the 'Summary:', 'Answer:', and 'Question:' keywords. + """ + for name in [ + "followup0", + "followup1", + "followup2", + "followup3", + "answer0", + "answer1", + "answer2", + "answer3", + ]: + completion = "This is a test completion." + self.assertEqual(self.validator.reward("", completion, name), 1.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/validators/test_dataset.py b/tests/validators/test_dataset.py new file mode 100644 index 0000000..2d4ff73 --- /dev/null +++ b/tests/validators/test_dataset.py @@ -0,0 +1,44 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import unittest +from prompting.validators.dataset import Dataset + + +class DatasetTestCase(unittest.TestCase): + def test_next_skips_empty_and_newline_only_strings(self): + mock_data = iter([{"text": ""}, {"text": "\n\n"}, {"text": "Non-empty text"}]) + dataset = Dataset() + dataset.openwebtext = mock_data + dataset.red_pajama = mock_data + + # Test that __next__ skips empty texts and texts that consist only of newline characters + self.assertEqual(dataset.__next__(), {"text": "Non-empty text"}) + + def test_next_returns_regular_strings(self): + mock_data = iter([{"text": "Non-empty text"}]) + dataset = Dataset() + dataset.openwebtext = mock_data + dataset.red_pajama = mock_data + + # Test that __next__ returns a non-empty text + self.assertEqual(dataset.__next__(), {"text": "Non-empty text"}) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/validators/test_event.py b/tests/validators/test_event.py new file mode 100644 index 0000000..59f1a76 --- /dev/null +++ b/tests/validators/test_event.py @@ -0,0 +1,189 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import unittest +from dataclasses import fields +from unittest.mock import patch +from prompting.validators.event import EventSchema +from prompting.validators.reward import RewardModelType + + +class EventTestCase(unittest.TestCase): + def test_event_from_dict_all_forward_columns_match(self): + """Test that all default columns logged on the forward pass are correctly converted""" + # Arrange: Create a dictionary with all columns + event_dict = { + "completions": ["test"], + "completion_times": [0.123], + "completion_status_messages": ["Success"], + "completion_status_codes": ["1"], + "name": "test-name", + "block": 1.0, + "gating_loss": 1.0, + "uids": [1], + "prompt": "test-prompt", + "step_length": 1.0, + "best": "test-best", + "rewards": [1.0], + RewardModelType.dahoas.value: [1.0], + RewardModelType.blacklist.value: [1.0], + RewardModelType.nsfw.value: [1.0], + RewardModelType.reciprocate.value: [1.0], + RewardModelType.diversity.value: [1.0], + RewardModelType.dpo.value: [1.0], + RewardModelType.rlhf.value: [1.0], + RewardModelType.prompt.value: [1.0], + RewardModelType.relevance.value: [1.0], + RewardModelType.task_validator.value: [1.0], + RewardModelType.dahoas.value + "_normalized": [1.0], + RewardModelType.blacklist.value + "_normalized": [1.0], + RewardModelType.nsfw.value + "_normalized": [1.0], + RewardModelType.reciprocate.value + "_normalized": [1.0], + RewardModelType.diversity.value + "_normalized": [1.0], + RewardModelType.dpo.value + "_normalized": [1.0], + RewardModelType.rlhf.value + "_normalized": [1.0], + RewardModelType.prompt.value + "_normalized": [1.0], + RewardModelType.relevance.value + "_normalized": [1.0], + RewardModelType.task_validator.value + "_normalized": [1.0], + } + + # Act + with patch("bittensor.logging.warning") as mock_warning: + event = EventSchema.from_dict(event_dict, disable_log_rewards=False) + mock_warning.assert_not_called() + + # Assert + for field in fields(EventSchema): + field_name = field.name + field_value = getattr(event, field_name) + + # Note: Does not include 'set_weights' column as it is not logged on the forward pass + if field_name == "set_weights": + assert field_value is None + continue + + print(field_name, field_value) + assert field_name in event_dict and event_dict[field_name] == field_value + + def test_event_from_dict_forward_no_reward_logging(self): + """Test that all default columns (not including reward columns) logged on the forward pass are + correctly converted""" + # Assert: create a dictionary with all non-related reward columns + event_dict = { + "completions": ["test"], + "completion_times": [0.123], + "completion_status_messages": ["Success"], + "completion_status_codes": ["1"], + "name": "test-name", + "block": 1.0, + "gating_loss": 1.0, + "uids": [1], + "prompt": "test-prompt", + "step_length": 1.0, + "best": "test-best", + "rewards": [1.0], + } + + # Act + with patch("bittensor.logging.warning") as mock_warning: + event = EventSchema.from_dict(event_dict, disable_log_rewards=True) + mock_warning.assert_not_called() + + # Assert: Check that all columns that were logged are correctly converted + for key, value in event_dict.items(): + assert getattr(event, key) == value + + # Assert: Check that all reward columns that are not logged are set to None + assert event.set_weights is None + assert event.dahoas_reward_model is None + assert event.blacklist_filter is None + assert event.nsfw_filter is None + assert event.reciprocate_reward_model is None + assert event.diversity_reward_model is None + assert event.dpo_reward_model is None + assert event.rlhf_reward_model is None + assert event.prompt_reward_model is None + assert event.relevance_filter is None + assert event.task_validator_filter is None + + assert event.dahoas_reward_model_normalized is None + assert event.nsfw_filter_normalized is None + assert event.reciprocate_reward_model_normalized is None + assert event.diversity_reward_model_normalized is None + assert event.dpo_reward_model_normalized is None + assert event.rlhf_reward_model_normalized is None + assert event.prompt_reward_model_normalized is None + assert event.relevance_filter_normalized is None + assert event.task_validator_filter_normalized is None + + def test_event_from_dict_forward_reward_logging_mismatch(self): + """Test that all default columns logged on the forward pass are correctly converted and that + that reward columns that should be logged are logged as warnings""" + # Assert: create a dictionary with all non-related reward columns + event_dict = { + "completions": ["test"], + "completion_times": [0.123], + "completion_status_messages": ["Success"], + "completion_status_codes": ["1"], + "name": "test-name", + "block": 1.0, + "gating_loss": 1.0, + "uids": [1], + "prompt": "test-prompt", + "step_length": 1.0, + "best": "test-best", + "rewards": [1.0], + } + + not_logged_columns = [] + for field in RewardModelType: + not_logged_columns.append(field.value) + if field.value != "blacklist_filter": + not_logged_columns.append(field.value + "_normalized") + + # Act + with patch("bittensor.logging.warning") as mock_warning: + event = EventSchema.from_dict(event_dict, disable_log_rewards=False) + # Assert: Check that all columns that are not logged in the dict are logged as warnings + self.assertEqual(mock_warning.call_count, len(not_logged_columns)) + + # Assert: Check that all columns that were logged are correctly converted + for key, value in event_dict.items(): + assert getattr(event, key) == value + + # Assert: Check that all reward columns that are not logged are set to None + assert event.set_weights is None + assert event.dahoas_reward_model is None + assert event.blacklist_filter is None + assert event.nsfw_filter is None + assert event.reciprocate_reward_model is None + assert event.diversity_reward_model is None + assert event.dpo_reward_model is None + assert event.rlhf_reward_model is None + assert event.prompt_reward_model is None + assert event.relevance_filter is None + assert event.task_validator_filter is None + + assert event.dahoas_reward_model_normalized is None + assert event.nsfw_filter_normalized is None + assert event.reciprocate_reward_model_normalized is None + assert event.diversity_reward_model_normalized is None + assert event.dpo_reward_model_normalized is None + assert event.rlhf_reward_model_normalized is None + assert event.prompt_reward_model_normalized is None + assert event.relevance_filter_normalized is None + assert event.task_validator_filter_normalized is None diff --git a/tests/validators/test_utils.py b/tests/validators/test_utils.py new file mode 100644 index 0000000..6c73ef2 --- /dev/null +++ b/tests/validators/test_utils.py @@ -0,0 +1,156 @@ +# The MIT License (MIT) +# Copyright © 2023 Yuma Rao +# Copyright © 2023 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +import torch +import bittensor as bt +import copy +import unittest +from unittest.mock import MagicMock +from prompting.validators.utils import resync_linear_layer, check_uid_availability + + +class UtilsTestCase(unittest.TestCase): + def setUp(self): + """ + Creates a mock metagraph with 1024 mock axons before each test. + """ + mock_metagraph = MagicMock(spec=bt.metagraph) + mock_metagraph.uids = torch.tensor(range(0, 1024)) + mock_metagraph.S = torch.zeros(1024) + mock_metagraph.hotkeys = list(map(str, range(0, 1024))) + mock_metagraph.validator_permit = [False] * 1024 + mock_metagraph.axons = [ + MagicMock( + spec=bt.chain_data.AxonInfo, hotkey=str(num), ip="0.0.0.0/0", port=12345 + ) + for num in range(0, 1024) + ] + + self.metagraph = mock_metagraph + self.keypair = "test" + + def test_resync_linear_layer_multiple_updates(self): + # Arrange: Create necessary inputs for the test + # Create a linear layer of 768 x uids full of ones + linear_output_size = len(self.metagraph.uids) + linear_layer = torch.nn.Linear(768, linear_output_size) + torch.nn.init.ones_(linear_layer.weight) + torch.nn.init.ones_(linear_layer.bias) + + # Create a new metagraph state with updated hotkeys + updated_uids_indices = [0, 10, 20, 30] + modified_metagraph = copy.deepcopy(self.metagraph) + + for modified_index in updated_uids_indices: + modified_metagraph.hotkeys[modified_index] = "test" + + # Act: Call the utils function to be tested + resync_linear_layer(linear_layer, self.metagraph, modified_metagraph) + + # Assert: Ensure that the bias of the updated indices have been reinitialized as expected + for index in range(0, linear_output_size): + # If the index has been updated, assert that bias is zero and weights are not ones + if index in updated_uids_indices: + self.assertEqual(linear_layer.bias[index].item(), 0) + self.assertFalse( + torch.all( + linear_layer.weight[index] + == torch.ones(linear_layer.weight[index].shape) + ) + ) + # If the index has not been updated, assert that bias is one and weights are ones + else: + self.assertEqual(linear_layer.bias[index].item(), 1) + self.assertTrue( + torch.all( + linear_layer.weight[index] + == torch.ones(linear_layer.weight[index].shape) + ) + ) + + def test_check_uid_availability_not_serving_axon(self): + # Arrange: Create a non serving axon + uid = 1 + self.metagraph.axons[uid] = MagicMock( + spec=bt.chain_data.AxonInfo, is_serving=False + ) + + # Act: Call the function to check if uid is available + result = check_uid_availability(self.metagraph, uid, vpermit_tao_limit=0) + + # Assert: Ensure that the result is False (uid is available) when node doesn't have a serving axon + self.assertFalse(result) + + def test_check_uid_availability_node_without_validator_permit(self): + # Arrange: Create a serving axon without validator permit + uid = 1 + self.metagraph.axons[uid] = MagicMock( + spec=bt.chain_data.AxonInfo, is_serving=True + ) + self.metagraph.validator_permit[uid] = False + + # Act: Call the function to check if uid is available + result = check_uid_availability(self.metagraph, uid, vpermit_tao_limit=0) + + # Assert: Ensure that the result is True (uid is available) when node does not have a validator permit + self.assertTrue(result) + + def test_check_uid_availability_validator_with_stake_less_than_vpermit_tao_limit( + self, + ): + # Arrange: Create a serving axon with validator permit and stake less than vpermit_tao_limit + uid = 1 + self.metagraph.axons[uid] = MagicMock( + spec=bt.chain_data.AxonInfo, is_serving=True + ) + self.metagraph.validator_permit[uid] = True + self.metagraph.S[uid] = 1 + v_permit_tao_limit = 2 + + # Act: Call the function to check if uid is available + result = check_uid_availability( + self.metagraph, uid, vpermit_tao_limit=v_permit_tao_limit + ) + + # Assert: Ensure that the result is True (uid is available) when node validator + # has stake less than vpermit_tao_limit + self.assertTrue(result) + + def test_check_uid_availability_validator_with_stake_greater_than_vpermit_tao_limit( + self, + ): + # Arrange: Create a serving axon with validator permit and stake greater than vpermit_tao_limit + uid = 1 + self.metagraph.axons[uid] = MagicMock( + spec=bt.chain_data.AxonInfo, is_serving=True + ) + self.metagraph.validator_permit[uid] = True + self.metagraph.S[uid] = 2 + v_permit_tao_limit = 1 + + # Act: Call the function to check if uid is available + result = check_uid_availability( + self.metagraph, uid, vpermit_tao_limit=v_permit_tao_limit + ) + + # Assert: Ensure that the result is False (uid is available) when validator node + # has stake greater than vpermit_tao_limit + self.assertFalse(result) + + +if __name__ == "__main__": + unittest.main()