ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.

Hi, I meet a problem that "The model did not return a loss from the inputs", can you help me solve it? Here is my code:
'''

    set_random_seed(args.seed)
    gradient_accumulation_steps = args.batch_size // args.micro_batch_size

    device_map = "auto"
    world_size = int(os.environ.get("WORLD_SIZE", 1))
    ddp = world_size != 1
    if ddp:
        print('using ddp...')
        device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
        gradient_accumulation_steps = gradient_accumulation_steps // world_size

    tokenizer = AutoTokenizer.from_pretrained(args.prune_model_path,
        use_fast=False, trust_remote_code=True
    )
    model = AutoModelForCausalLM.from_pretrained(args.prune_model_path,
        trust_remote_code=True, device_map=device_map
    )

    tokenizer.pad_token_id = (0)
    tokenizer.padding_side = "left"
    print(model)

    CUTOFF_LEN = 256
    VAL_SET_SIZE = 2000
    DATA_PATH = "/public/MountData/dataset/LLM_dataset/baize/data_tmp.json"

    data = []
    for x in 'alpaca,medical,quora,stackoverflow'.split(","):
        data += json.load(open("/public/MountData/dataset/LLM_dataset/baize/{}_chat_data.json".format(x)))
    random.shuffle(data)
    json.dump(data, open(DATA_PATH, "w"))
    data = load_dataset("json", data_files=DATA_PATH)

    # Data Preprocess
    def generate_prompt(data_point):
        return data_point["input"]

    def tokenize(prompt):
        result = tokenizer(
            prompt,
            truncation=True,
            max_length=CUTOFF_LEN + 1,
            padding="max_length",
        )
        return {
            "input_ids": result["input_ids"][:-1],
            "attention_mask": result["attention_mask"][:-1],
        }

    def generate_and_tokenize_prompt(data_point):
        prompt = generate_prompt(data_point)
        return tokenize(prompt)

    if VAL_SET_SIZE > 0:
        train_val = data["train"].train_test_split(
            test_size=VAL_SET_SIZE, shuffle=True, seed=42
        )
        train_data = train_val["train"].shuffle().map(generate_and_tokenize_prompt)
        val_data = train_val["test"].shuffle().map(generate_and_tokenize_prompt)
    else:
        train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
        val_data = None

    # Prepare For LoRA
    model = prepare_model_for_int8_training(model)
    print('model is ready...')
    config = LoraConfig(
        r=args.lora_r,
        lora_alpha=args.lora_alpha,
        target_modules=args.lora_target_modules.split(","),
        lora_dropout=args.lora_dropout,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, config)
    model.print_trainable_parameters()

    if not ddp and torch.cuda.device_count() > 1:
        # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
        model.is_parallelizable = True
        model.model_parallel = True

    trainer = transformers.Trainer(
        model=model,
        train_dataset=train_data,
        eval_dataset=val_data,
        args=transformers.TrainingArguments(
            per_device_train_batch_size=args.micro_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            warmup_steps=100,  # 100 ori
            num_train_epochs=args.num_epochs,
            learning_rate=args.learning_rate,
            fp16=True,  # not torch.cuda.is_bf16_supported()
            bf16=False,  # torch.cuda.is_bf16_supported()
            logging_steps=10,
            logging_first_step=True,
            optim="adamw_torch",
            evaluation_strategy="steps",
            save_strategy="steps",
            eval_steps=100,
            save_steps=200,
            output_dir=args.output_dir,
            save_total_limit=20,
            max_grad_norm=1.0,
            load_best_model_at_end=True,
            # lr_scheduler_type="linear",
            ddp_find_unused_parameters=False if ddp else None,
            group_by_length=args.group_by_length,
            report_to="none",
            run_name=args.output_dir.split('/')[-1],
            metric_for_best_model="{}_loss".format('/public/MountData/dataset/LLM_dataset/baize/'),
        ),
        data_collator=transformers.DataCollatorForSeq2Seq(
            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
        ),
    )
    model.config.use_cache = False

    trainer.train()
    # model = model.merge_and_unload()

    if args.save_model:
        output_lora_dir = '/public/MountData/yaolu/LLM_pretrained/pruned_model/finetuned_lora_baize_{}_{}{}/'.format(args.base_model, args.pr_method, args.remove_layer)
        if not os.path.exists(output_lora_dir):
            os.mkdir(output_lora_dir)
        model.save_pretrained(output_lora_dir)

'''

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask. #62

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask. #62

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions