Hi, I meet a problem that "The model did not return a loss from the inputs", can you help me solve it? Here is my code:
'''
set_random_seed(args.seed)
gradient_accumulation_steps = args.batch_size // args.micro_batch_size
device_map = "auto"
world_size = int(os.environ.get("WORLD_SIZE", 1))
ddp = world_size != 1
if ddp:
print('using ddp...')
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
gradient_accumulation_steps = gradient_accumulation_steps // world_size
tokenizer = AutoTokenizer.from_pretrained(args.prune_model_path,
use_fast=False, trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(args.prune_model_path,
trust_remote_code=True, device_map=device_map
)
tokenizer.pad_token_id = (0)
tokenizer.padding_side = "left"
print(model)
CUTOFF_LEN = 256
VAL_SET_SIZE = 2000
DATA_PATH = "/public/MountData/dataset/LLM_dataset/baize/data_tmp.json"
data = []
for x in 'alpaca,medical,quora,stackoverflow'.split(","):
data += json.load(open("/public/MountData/dataset/LLM_dataset/baize/{}_chat_data.json".format(x)))
random.shuffle(data)
json.dump(data, open(DATA_PATH, "w"))
data = load_dataset("json", data_files=DATA_PATH)
# Data Preprocess
def generate_prompt(data_point):
return data_point["input"]
def tokenize(prompt):
result = tokenizer(
prompt,
truncation=True,
max_length=CUTOFF_LEN + 1,
padding="max_length",
)
return {
"input_ids": result["input_ids"][:-1],
"attention_mask": result["attention_mask"][:-1],
}
def generate_and_tokenize_prompt(data_point):
prompt = generate_prompt(data_point)
return tokenize(prompt)
if VAL_SET_SIZE > 0:
train_val = data["train"].train_test_split(
test_size=VAL_SET_SIZE, shuffle=True, seed=42
)
train_data = train_val["train"].shuffle().map(generate_and_tokenize_prompt)
val_data = train_val["test"].shuffle().map(generate_and_tokenize_prompt)
else:
train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
val_data = None
# Prepare For LoRA
model = prepare_model_for_int8_training(model)
print('model is ready...')
config = LoraConfig(
r=args.lora_r,
lora_alpha=args.lora_alpha,
target_modules=args.lora_target_modules.split(","),
lora_dropout=args.lora_dropout,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()
if not ddp and torch.cuda.device_count() > 1:
# keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
model.is_parallelizable = True
model.model_parallel = True
trainer = transformers.Trainer(
model=model,
train_dataset=train_data,
eval_dataset=val_data,
args=transformers.TrainingArguments(
per_device_train_batch_size=args.micro_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
warmup_steps=100, # 100 ori
num_train_epochs=args.num_epochs,
learning_rate=args.learning_rate,
fp16=True, # not torch.cuda.is_bf16_supported()
bf16=False, # torch.cuda.is_bf16_supported()
logging_steps=10,
logging_first_step=True,
optim="adamw_torch",
evaluation_strategy="steps",
save_strategy="steps",
eval_steps=100,
save_steps=200,
output_dir=args.output_dir,
save_total_limit=20,
max_grad_norm=1.0,
load_best_model_at_end=True,
# lr_scheduler_type="linear",
ddp_find_unused_parameters=False if ddp else None,
group_by_length=args.group_by_length,
report_to="none",
run_name=args.output_dir.split('/')[-1],
metric_for_best_model="{}_loss".format('/public/MountData/dataset/LLM_dataset/baize/'),
),
data_collator=transformers.DataCollatorForSeq2Seq(
tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
),
)
model.config.use_cache = False
trainer.train()
# model = model.merge_and_unload()
if args.save_model:
output_lora_dir = '/public/MountData/yaolu/LLM_pretrained/pruned_model/finetuned_lora_baize_{}_{}{}/'.format(args.base_model, args.pr_method, args.remove_layer)
if not os.path.exists(output_lora_dir):
os.mkdir(output_lora_dir)
model.save_pretrained(output_lora_dir)
Hi, I meet a problem that "The model did not return a loss from the inputs", can you help me solve it? Here is my code:
'''
'''