mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 06:12:50 +08:00
tiny fix
Former-commit-id: 450910c1db969533c5268022cb064cbc2c9cb7e6
This commit is contained in:
parent
e877b8d55b
commit
c5ec4eaef5
@ -208,6 +208,7 @@ accelerate launch src/train_XX.py # arguments (same as above)
|
|||||||
compute_environment: LOCAL_MACHINE
|
compute_environment: LOCAL_MACHINE
|
||||||
deepspeed_config:
|
deepspeed_config:
|
||||||
gradient_accumulation_steps: 4
|
gradient_accumulation_steps: 4
|
||||||
|
gradient_clipping: 0.5
|
||||||
offload_optimizer_device: none
|
offload_optimizer_device: none
|
||||||
offload_param_device: none
|
offload_param_device: none
|
||||||
zero3_init_flag: false
|
zero3_init_flag: false
|
||||||
|
@ -13,6 +13,7 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, max_shard_size="10GB")
|
model.save_pretrained(training_args.output_dir, max_shard_size="10GB")
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
print("model and tokenizer have been saved at:", training_args.output_dir)
|
print("model and tokenizer have been saved at:", training_args.output_dir)
|
||||||
|
print("Remember to copy the *.py files from the original directory.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -26,8 +26,10 @@ class DynamicDataCollatorWithPadding(DataCollatorWithPadding):
|
|||||||
"""
|
"""
|
||||||
batch_size, seq_length = input_ids.size()
|
batch_size, seq_length = input_ids.size()
|
||||||
attention_mask = torch.ones((batch_size, seq_length), device=device)
|
attention_mask = torch.ones((batch_size, seq_length), device=device)
|
||||||
|
|
||||||
for i, seq in enumerate(input_ids):
|
for i, seq in enumerate(input_ids):
|
||||||
attention_mask[i, :(seq != self.tokenizer.pad_token_id).nonzero()[0].item()] = 0 # padding
|
attention_mask[i, :(seq != self.tokenizer.pad_token_id).nonzero()[0].item()] = 0 # padding
|
||||||
|
|
||||||
attention_mask = attention_mask.bool()
|
attention_mask = attention_mask.bool()
|
||||||
return attention_mask
|
return attention_mask
|
||||||
|
|
||||||
@ -49,7 +51,11 @@ class DynamicDataCollatorWithPadding(DataCollatorWithPadding):
|
|||||||
labels = [torch.tensor(feature["labels"]).flip(0) for feature in features]
|
labels = [torch.tensor(feature["labels"]).flip(0) for feature in features]
|
||||||
input_ids = input_ids + labels # pad them to the same length
|
input_ids = input_ids + labels # pad them to the same length
|
||||||
|
|
||||||
input_ids = torch.nn.utils.rnn.pad_sequence(input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id).flip(-1)
|
input_ids = torch.nn.utils.rnn.pad_sequence(
|
||||||
|
input_ids,
|
||||||
|
batch_first=True,
|
||||||
|
padding_value=self.tokenizer.pad_token_id
|
||||||
|
).flip(-1)
|
||||||
|
|
||||||
batch = {}
|
batch = {}
|
||||||
|
|
||||||
|
@ -35,8 +35,9 @@ class ComputeMetrics:
|
|||||||
score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []}
|
score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []}
|
||||||
|
|
||||||
for pred, label in zip(preds, labels):
|
for pred, label in zip(preds, labels):
|
||||||
pred = pred[len(label) - np.sum(label == IGNORE_INDEX) : len(pred) - np.sum(pred == IGNORE_INDEX)] # remove prompts
|
pred_pad_len, label_pad_len = np.sum(pred == IGNORE_INDEX), np.sum(label == IGNORE_INDEX)
|
||||||
label = label[:len(label) - np.sum(label == IGNORE_INDEX)]
|
pred = pred[len(label) - label_pad_len : len(pred) - pred_pad_len] # remove prompts
|
||||||
|
label = label[:len(label) - label_pad_len]
|
||||||
|
|
||||||
hypothesis = list(jieba.cut(self.tokenizer.decode(pred, skip_special_tokens=True)))
|
hypothesis = list(jieba.cut(self.tokenizer.decode(pred, skip_special_tokens=True)))
|
||||||
reference = list(jieba.cut(self.tokenizer.decode(label, skip_special_tokens=True)))
|
reference = list(jieba.cut(self.tokenizer.decode(label, skip_special_tokens=True)))
|
||||||
@ -79,8 +80,9 @@ class Seq2SeqPeftTrainer(PeftTrainer):
|
|||||||
with open(output_prediction_file, "w", encoding="utf-8") as writer:
|
with open(output_prediction_file, "w", encoding="utf-8") as writer:
|
||||||
res: List[str] = []
|
res: List[str] = []
|
||||||
for pred, label in zip(predict_results.predictions, predict_results.label_ids):
|
for pred, label in zip(predict_results.predictions, predict_results.label_ids):
|
||||||
pred = pred[len(label) - np.sum(label == IGNORE_INDEX) : len(pred) - np.sum(pred == IGNORE_INDEX)] # remove prompts
|
pred_pad_len, label_pad_len = np.sum(pred == IGNORE_INDEX), np.sum(label == IGNORE_INDEX)
|
||||||
label = label[:len(label) - np.sum(label == IGNORE_INDEX)]
|
pred = pred[len(label) - label_pad_len : len(pred) - pred_pad_len] # remove prompts
|
||||||
|
label = label[:len(label) - label_pad_len]
|
||||||
|
|
||||||
pred = self.tokenizer.decode(pred, skip_special_tokens=True)
|
pred = self.tokenizer.decode(pred, skip_special_tokens=True)
|
||||||
label = self.tokenizer.decode(label, skip_special_tokens=True)
|
label = self.tokenizer.decode(label, skip_special_tokens=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user