Skip to content

ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask. #62

@yaolu-zjut

Description

@yaolu-zjut

Hi, I meet a problem that "The model did not return a loss from the inputs", can you help me solve it? Here is my code:
'''

set_random_seed(args.seed)
gradient_accumulation_steps = args.batch_size // args.micro_batch_size

device_map = "auto"
world_size = int(os.environ.get("WORLD_SIZE", 1))
ddp = world_size != 1
if ddp:
    print('using ddp...')
    device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
    gradient_accumulation_steps = gradient_accumulation_steps // world_size

tokenizer = AutoTokenizer.from_pretrained(args.prune_model_path,
    use_fast=False, trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(args.prune_model_path,
    trust_remote_code=True, device_map=device_map
)

tokenizer.pad_token_id = (0)
tokenizer.padding_side = "left"
print(model)

CUTOFF_LEN = 256
VAL_SET_SIZE = 2000
DATA_PATH = "/public/MountData/dataset/LLM_dataset/baize/data_tmp.json"

data = []
for x in 'alpaca,medical,quora,stackoverflow'.split(","):
    data += json.load(open("/public/MountData/dataset/LLM_dataset/baize/{}_chat_data.json".format(x)))
random.shuffle(data)
json.dump(data, open(DATA_PATH, "w"))
data = load_dataset("json", data_files=DATA_PATH)

# Data Preprocess
def generate_prompt(data_point):
    return data_point["input"]

def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding="max_length",
    )
    return {
        "input_ids": result["input_ids"][:-1],
        "attention_mask": result["attention_mask"][:-1],
    }

def generate_and_tokenize_prompt(data_point):
    prompt = generate_prompt(data_point)
    return tokenize(prompt)

if VAL_SET_SIZE > 0:
    train_val = data["train"].train_test_split(
        test_size=VAL_SET_SIZE, shuffle=True, seed=42
    )
    train_data = train_val["train"].shuffle().map(generate_and_tokenize_prompt)
    val_data = train_val["test"].shuffle().map(generate_and_tokenize_prompt)
else:
    train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
    val_data = None

# Prepare For LoRA
model = prepare_model_for_int8_training(model)
print('model is ready...')
config = LoraConfig(
    r=args.lora_r,
    lora_alpha=args.lora_alpha,
    target_modules=args.lora_target_modules.split(","),
    lora_dropout=args.lora_dropout,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
model.print_trainable_parameters()

if not ddp and torch.cuda.device_count() > 1:
    # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
    model.is_parallelizable = True
    model.model_parallel = True

trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=args.micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        warmup_steps=100,  # 100 ori
        num_train_epochs=args.num_epochs,
        learning_rate=args.learning_rate,
        fp16=True,  # not torch.cuda.is_bf16_supported()
        bf16=False,  # torch.cuda.is_bf16_supported()
        logging_steps=10,
        logging_first_step=True,
        optim="adamw_torch",
        evaluation_strategy="steps",
        save_strategy="steps",
        eval_steps=100,
        save_steps=200,
        output_dir=args.output_dir,
        save_total_limit=20,
        max_grad_norm=1.0,
        load_best_model_at_end=True,
        # lr_scheduler_type="linear",
        ddp_find_unused_parameters=False if ddp else None,
        group_by_length=args.group_by_length,
        report_to="none",
        run_name=args.output_dir.split('/')[-1],
        metric_for_best_model="{}_loss".format('/public/MountData/dataset/LLM_dataset/baize/'),
    ),
    data_collator=transformers.DataCollatorForSeq2Seq(
        tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
    ),
)
model.config.use_cache = False

trainer.train()
# model = model.merge_and_unload()

if args.save_model:
    output_lora_dir = '/public/MountData/yaolu/LLM_pretrained/pruned_model/finetuned_lora_baize_{}_{}{}/'.format(args.base_model, args.pr_method, args.remove_layer)
    if not os.path.exists(output_lora_dir):
        os.mkdir(output_lora_dir)
    model.save_pretrained(output_lora_dir)

'''

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions