huggingface · 1485840691 · Jun 21, 2025 · Jun 22, 2025 · Jun 22, 2025 · Jun 22, 2025
diff --git a/trl/scripts/dft.py b/trl/scripts/dft.py
@@ -0,0 +1,208 @@
+# Copyright 2020-2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# /// script
+# dependencies = [
+#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "peft",
+# ]
+# ///
+
+"""
+# Full training
+```
+python trl/scripts/dft.py \
+    --model_name_or_path Qwen/Qwen2-0.5B \
+    --dataset_name trl-lib/Capybara \
+    --learning_rate 2.0e-5 \
+    --num_train_epochs 1 \
+    --packing \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 8 \
+    --gradient_checkpointing \
+    --eos_token '<|im_end|>' \
+    --eval_strategy steps \
+    --eval_steps 100 \
+    --output_dir Qwen2-0.5B-DFT \
+    --push_to_hub
+```
+
+# LoRA
+```
+python trl/scripts/dft.py \
+    --model_name_or_path Qwen/Qwen2-0.5B \
+    --dataset_name trl-lib/Capybara \
+    --learning_rate 2.0e-4 \
+    --num_train_epochs 1 \
+    --packing \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 8 \
+    --gradient_checkpointing \
+    --eos_token '<|im_end|>' \
+    --eval_strategy steps \
+    --eval_steps 100 \
+    --use_peft \
+    --lora_r 32 \
+    --lora_alpha 16 \
+    --output_dir Qwen2-0.5B-DFT \
+    --push_to_hub
+```
+"""
+
+import argparse
+import warnings
+from typing import Optional
+
+import torch
+from datasets import load_dataset
+from torch import nn
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+from transformers.models.auto.modeling_auto import MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
+
+from trl import (
+    DatasetMixtureConfig,
+    ModelConfig,
+    ScriptArguments,
+    SFTConfig,
+    SFTTrainer,
+    TrlParser,
+    clone_chat_template,
+    get_dataset,
+    get_kbit_device_map,
+    get_peft_config,
+    get_quantization_config,
+)
+
+
+def compute_loss_fn(outputs, labels, num_items_in_batch: Optional[int] = None, ignore_index: int = -100, **kwargs):
+    logits = outputs["logits"]
+    logits = logits.float()
+    vocab_size = logits.shape[-1]
+    # Upcast to float if we need to compute the loss to avoid potential precision issues
+
+    shift_labels = labels[..., 1:].contiguous()
+
+    # Flatten the tokens
+    shift_logits = logits[..., :-1, :].contiguous()
+    shift_logits = shift_logits.view(-1, vocab_size)
+    shift_labels = shift_labels.view(-1)
+    # Enable model parallelism
+    shift_labels = shift_labels.to(shift_logits.device)
+
+    loss = nn.functional.cross_entropy(shift_logits, shift_labels, ignore_index=ignore_index, reduction="none")
+
+    probs = torch.softmax(shift_logits, dim=-1)
+    prob_labels = torch.clamp(shift_labels, min=0)
+    prob_coefs = probs.gather(1, prob_labels.unsqueeze(-1)).squeeze(-1).detach()
+
+    loss = loss * prob_coefs
+
+    if num_items_in_batch is not None:
+        loss = loss.sum() / num_items_in_batch
+    else:
+        loss = loss.mean()
+
+    return loss
+
+
+def main(script_args, training_args, model_args, dataset_args):
+    ################
+    # Model init kwargs & Tokenizer
+    ################
+    quantization_config = get_quantization_config(model_args)
+    model_kwargs = dict(
+        revision=model_args.model_revision,
+        trust_remote_code=model_args.trust_remote_code,
+        attn_implementation=model_args.attn_implementation,
+        torch_dtype=model_args.torch_dtype,
+        use_cache=False if training_args.gradient_checkpointing else True,
+        device_map=get_kbit_device_map() if quantization_config is not None else None,
+        quantization_config=quantization_config,
+    )
+
+    # Create model
+    config = AutoConfig.from_pretrained(model_args.model_name_or_path)
+    valid_image_text_architectures = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.values()
+
+    if config.architectures and any(arch in valid_image_text_architectures for arch in config.architectures):
+        from transformers import AutoModelForImageTextToText
+
+        model_kwargs.pop("use_cache", None)  # Image models do not support cache
+        model = AutoModelForImageTextToText.from_pretrained(model_args.model_name_or_path, **model_kwargs)
+    else:
+        model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, **model_kwargs)
+
+    # Create tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.model_name_or_path, trust_remote_code=model_args.trust_remote_code, use_fast=True
+    )
+
+    # Set default chat template if needed
+    if tokenizer.chat_template is None:
+        # TODO: source should be passed as an argument
+        model, tokenizer = clone_chat_template(model, tokenizer, "Qwen/Qwen3-0.6B")
+
+    # Load the dataset
+    if dataset_args.datasets and script_args.dataset_name:
+        warnings.warn(
+            "Both `datasets` and `dataset_name` are provided. The `datasets` argument will be used to load the "
+            "dataset and `dataset_name` will be ignored."
+        )
+    elif dataset_args.datasets and not script_args.dataset_name:
+        dataset = get_dataset(dataset_args)
+    elif not dataset_args.datasets and script_args.dataset_name:
+        dataset = load_dataset(
+            script_args.dataset_name, name=script_args.dataset_config, streaming=script_args.dataset_streaming
+        )
+    else:
+        raise ValueError("Either `datasets` or `dataset_name` must be provided.")
+
+    # Initialize the SFT trainer
+    trainer = SFTTrainer(
+        model=model,
+        args=training_args,
+        compute_loss_func=compute_loss_fn,
+        train_dataset=dataset[script_args.dataset_train_split],
+        eval_dataset=dataset[script_args.dataset_test_split] if training_args.eval_strategy != "no" else None,
+        processing_class=tokenizer,
+        peft_config=get_peft_config(model_args),
+    )
+
+    # Train the model
+    trainer.train()
+
+    # Save and push to Hub
+    trainer.save_model(training_args.output_dir)
+    if training_args.push_to_hub:
+        trainer.push_to_hub(dataset_name=script_args.dataset_name)
+
+
+def make_parser(subparsers: argparse._SubParsersAction = None):
+    dataclass_types = (ScriptArguments, SFTConfig, ModelConfig, DatasetMixtureConfig)
+    if subparsers is not None:
+        parser = subparsers.add_parser("sft", help="Run the SFT training script", dataclass_types=dataclass_types)
+    else:
+        parser = TrlParser(dataclass_types)
+    return parser
+
+
+if __name__ == "__main__":
+    parser = make_parser()
+    # When using the trl cli, this script may be run with additional arguments, corresponding accelerate arguments.
+    # To ensure that their parsing does not interfere with the script arguments, parse the arguments with
+    # `return_remaining_strings=True`, then ignore the remaining strings.
+    script_args, training_args, model_args, dataset_args, _ = parser.parse_args_and_config(
+        return_remaining_strings=True
+    )
+    main(script_args, training_args, model_args, dataset_args)