mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-04 04:32:50 +08:00
parent
7dbc670902
commit
9a88387b91
@ -38,8 +38,8 @@ def export_model(args: Optional[Dict[str, Any]] = None):
|
||||
model_args, _, finetuning_args, _ = get_infer_args(args)
|
||||
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
||||
|
||||
if getattr(model, "quantization_method", None):
|
||||
raise ValueError("Cannot export a quantized model.")
|
||||
if getattr(model, "quantization_method", None) and model_args.adapter_name_or_path is not None:
|
||||
logger.warning("Cannot merge adapters to a quantized model.")
|
||||
|
||||
model.config.use_cache = True
|
||||
model = model.to("cpu")
|
||||
|
@ -75,4 +75,4 @@ def create_web_demo() -> gr.Blocks:
|
||||
if __name__ == "__main__":
|
||||
demo = create_ui()
|
||||
demo.queue()
|
||||
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
|
||||
demo.launch(server_name="0.0.0.0", share=False, inbrowser=True)
|
||||
|
@ -4,7 +4,7 @@ from llmtuner import create_ui
|
||||
def main():
|
||||
demo = create_ui()
|
||||
demo.queue()
|
||||
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
|
||||
demo.launch(server_name="0.0.0.0", share=False, inbrowser=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -4,7 +4,7 @@ from llmtuner import create_web_demo
|
||||
def main():
|
||||
demo = create_web_demo()
|
||||
demo.queue()
|
||||
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
|
||||
demo.launch(server_name="0.0.0.0", share=False, inbrowser=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
x
Reference in New Issue
Block a user