mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-23 23:30:36 +08:00
[misc] lint (#9636)
This commit is contained in:
@@ -15,32 +15,24 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import Trainer
|
||||
from trl import DPOTrainer
|
||||
from trl.trainer import disable_dropout_in_model
|
||||
from ktransformers.sft.lora import KTrainer # type: ignore
|
||||
from typing_extensions import override
|
||||
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.packages import is_transformers_version_greater_than
|
||||
from ..callbacks import SaveProcessorCallback
|
||||
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler, get_batch_logps, nested_detach
|
||||
from .trainer import CustomDPOTrainer as BaseDPOTrainer
|
||||
from ktransformers.sft.lora import KTrainer
|
||||
from ..trainer_utils import get_batch_logps, nested_detach
|
||||
from .trainer import CustomDPOTrainer
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedModel, ProcessorMixin
|
||||
|
||||
from ...hparams import FinetuningArguments
|
||||
from transformers import PreTrainedModel
|
||||
|
||||
|
||||
class CustomDPOTrainer(KTrainer, BaseDPOTrainer):
|
||||
class KDPOTrainer(KTrainer, CustomDPOTrainer):
|
||||
@override
|
||||
def concatenated_forward(
|
||||
self, model: "PreTrainedModel", batch: dict[str, "torch.Tensor"], is_ref_model: bool = False
|
||||
self, model: "PreTrainedModel", batch: dict[str, "torch.Tensor"], is_ref_model: bool = False
|
||||
) -> tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
|
||||
r"""Compute the sum log probabilities of the labels under given logits if loss_type is not IPO, ORPO or SimPO.
|
||||
|
||||
@@ -48,9 +40,8 @@ class CustomDPOTrainer(KTrainer, BaseDPOTrainer):
|
||||
"""
|
||||
if self.finetuning_args.use_ref_model:
|
||||
batch = nested_detach(batch, clone=True) # avoid error
|
||||
labels = batch["labels"]
|
||||
# dpo not need compute loss in forward, waste mem
|
||||
del batch["labels"]
|
||||
|
||||
labels = batch.pop("labels") # dpo do not need compute loss in forward
|
||||
all_logits: torch.Tensor = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32)
|
||||
all_logits = all_logits.to("cpu")
|
||||
labels = labels.to(all_logits.device)
|
||||
@@ -68,4 +59,4 @@ class CustomDPOTrainer(KTrainer, BaseDPOTrainer):
|
||||
if self.loss_type in ["ipo", "orpo", "simpo"]:
|
||||
return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps
|
||||
else:
|
||||
return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps / chosen_length
|
||||
return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps / chosen_length
|
||||
|
||||
Reference in New Issue
Block a user