(lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# export LD_LIBRARY_PATH=/root/miniconda3/envs/lf/lib/python3.11/site-packages/nvidia/cuda_nvrtc/lib:$LD_LIBRARY_PATH (lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# (lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# echo $LD_LIBRARY_PATH /root/miniconda3/envs/lf/lib/python3.11/site-packages/nvidia/cuda_nvrtc/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 (lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# (lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# (lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# (lf) root@autodl-container-c9174bac52-926fa9cb:~/LLaMA-Factory# llamafactory-cli webui /root/miniconda3/envs/lf/lib/python3.11/site-packages/gradio/components/chatbot.py:223: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys. warnings.warn( * Running on local URL: http://0.0.0.0:7860 To create a public link, set `share=True` in `launch()`. 10/23/2024 22:34:20 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16 2024-10-23 22:34:20,822 - modelscope - WARNING - Using branch: master as version is unstable, use with caution [INFO|configuration_utils.py:670] 2024-10-23 22:34:21,168 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:670] 2024-10-23 22:34:21,172 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:739] 2024-10-23 22:34:21,175 >> Model config ChatGLMConfig { "_name_or_path": "/root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "configuration_chatglm.ChatGLMConfig", "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": [ 151329, 151336, 151338 ], "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1.5625e-07, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_hidden_layers": 40, "num_layers": 40, "original_rope": true, "pad_token_id": 151329, "padded_vocab_size": 151552, "post_layer_norm": true, "rmsnorm": true, "rope_ratio": 500, "seq_length": 131072, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.45.0", "use_cache": true, "vocab_size": 151552 } [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,185 >> loading file tokenizer.model [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,186 >> loading file added_tokens.json [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,186 >> loading file special_tokens_map.json [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,186 >> loading file tokenizer_config.json [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,186 >> loading file tokenizer.json [INFO|tokenization_utils_base.py:2478] 2024-10-23 22:34:21,579 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. [INFO|configuration_utils.py:670] 2024-10-23 22:34:21,581 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:670] 2024-10-23 22:34:21,582 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:739] 2024-10-23 22:34:21,582 >> Model config ChatGLMConfig { "_name_or_path": "/root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "configuration_chatglm.ChatGLMConfig", "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": [ 151329, 151336, 151338 ], "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1.5625e-07, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_hidden_layers": 40, "num_layers": 40, "original_rope": true, "pad_token_id": 151329, "padded_vocab_size": 151552, "post_layer_norm": true, "rmsnorm": true, "rope_ratio": 500, "seq_length": 131072, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.45.0", "use_cache": true, "vocab_size": 151552 } [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,584 >> loading file tokenizer.model [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,584 >> loading file added_tokens.json [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,584 >> loading file special_tokens_map.json [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,584 >> loading file tokenizer_config.json [INFO|tokenization_utils_base.py:2212] 2024-10-23 22:34:21,584 >> loading file tokenizer.json [INFO|tokenization_utils_base.py:2478] 2024-10-23 22:34:21,894 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. 10/23/2024 22:34:21 - INFO - llamafactory.data.template - Add <|user|>,<|observation|> to stop words. 10/23/2024 22:34:21 - INFO - llamafactory.data.loader - Loading dataset demo.json... training example: input_ids: [151331, 151333, 151336, 198, 109377, 151337, 198, 121658, 3837, 101328, 117148, 99134, 15223, 54235, 102, 98520, 3837, 98444, 98582, 117148, 99134, 15223, 10231, 254, 242, 98345, 99134, 103065, 103192, 15223, 54235, 102, 98520, 3837, 118295, 100119, 99526, 1773, 98964, 106546, 98342, 107410, 98540, 110000, 11314, 151329] inputs: [gMASK] <|user|> 你好 <|assistant|> 您好,我是 信息中心 AI 助手,一个由 信息中心 AI 研发中心 开发的 AI 助手,很高兴认识您。请问我能为您做些什么? <|endoftext|> label_ids: [151329, -100, -100, -100, -100, -100, 198, 121658, 3837, 101328, 117148, 99134, 15223, 54235, 102, 98520, 3837, 98444, 98582, 117148, 99134, 15223, 10231, 254, 242, 98345, 99134, 103065, 103192, 15223, 54235, 102, 98520, 3837, 118295, 100119, 99526, 1773, 98964, 106546, 98342, 107410, 98540, 110000, 11314, 151329] labels: <|endoftext|> 您好,我是 信息中心 AI 助手,一个由 信息中心 AI 研发中心 开发的 AI 助手,很高兴认识您。请问我能为您做些什么? <|endoftext|> [INFO|configuration_utils.py:670] 2024-10-23 22:34:24,112 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:670] 2024-10-23 22:34:24,113 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:739] 2024-10-23 22:34:24,114 >> Model config ChatGLMConfig { "_name_or_path": "/root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "configuration_chatglm.ChatGLMConfig", "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": [ 151329, 151336, 151338 ], "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1.5625e-07, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_hidden_layers": 40, "num_layers": 40, "original_rope": true, "pad_token_id": 151329, "padded_vocab_size": 151552, "post_layer_norm": true, "rmsnorm": true, "rope_ratio": 500, "seq_length": 131072, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.45.0", "use_cache": true, "vocab_size": 151552 } [INFO|modeling_utils.py:3723] 2024-10-23 22:34:24,152 >> loading weights file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/model.safetensors.index.json [INFO|modeling_utils.py:1622] 2024-10-23 22:34:24,152 >> Instantiating ChatGLMForConditionalGeneration model under default dtype torch.bfloat16. [INFO|configuration_utils.py:1099] 2024-10-23 22:34:24,153 >> Generate config GenerationConfig { "eos_token_id": [ 151329, 151336, 151338 ], "pad_token_id": 151329 } Loading checkpoint shards: 100%|███████████████████| 10/10 [00:06<00:00, 1.60it/s] [INFO|modeling_utils.py:4568] 2024-10-23 22:34:30,490 >> All model checkpoint weights were used when initializing ChatGLMForConditionalGeneration. [INFO|modeling_utils.py:4576] 2024-10-23 22:34:30,490 >> All the weights of ChatGLMForConditionalGeneration were initialized from the model checkpoint at /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat. If your task is similar to the task the model of the checkpoint was trained on, you can already use ChatGLMForConditionalGeneration for predictions without further training. [INFO|configuration_utils.py:1052] 2024-10-23 22:34:30,493 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/generation_config.json [INFO|configuration_utils.py:1099] 2024-10-23 22:34:30,494 >> Generate config GenerationConfig { "do_sample": true, "eos_token_id": [ 151329, 151336, 151338 ], "max_length": 128000, "pad_token_id": 151329, "temperature": 0.8, "top_p": 0.8 } 10/23/2024 22:34:30 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. 10/23/2024 22:34:30 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. 10/23/2024 22:34:30 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. 10/23/2024 22:34:30 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA 10/23/2024 22:34:30 - INFO - llamafactory.model.model_utils.misc - Found linear modules: query_key_value,dense_h_to_4h,dense,dense_4h_to_h 10/23/2024 22:34:30 - INFO - llamafactory.model.loader - trainable params: 21,176,320 || all params: 9,421,127,680 || trainable%: 0.2248 [INFO|trainer.py:667] 2024-10-23 22:34:30,834 >> Using auto half precision backend [INFO|trainer.py:2243] 2024-10-23 22:34:31,512 >> ***** Running training ***** [INFO|trainer.py:2244] 2024-10-23 22:34:31,513 >> Num examples = 80 [INFO|trainer.py:2245] 2024-10-23 22:34:31,513 >> Num Epochs = 5 [INFO|trainer.py:2246] 2024-10-23 22:34:31,513 >> Instantaneous batch size per device = 2 [INFO|trainer.py:2249] 2024-10-23 22:34:31,513 >> Total train batch size (w. parallel, distributed & accumulation) = 16 [INFO|trainer.py:2250] 2024-10-23 22:34:31,513 >> Gradient Accumulation steps = 8 [INFO|trainer.py:2251] 2024-10-23 22:34:31,513 >> Total optimization steps = 25 [INFO|trainer.py:2252] 2024-10-23 22:34:31,516 >> Number of trainable parameters = 21,176,320 0%| | 0/25 [00:00> Saving model checkpoint to saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/checkpoint-25 [INFO|configuration_utils.py:670] 2024-10-23 22:35:44,953 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:739] 2024-10-23 22:35:44,954 >> Model config ChatGLMConfig { "_name_or_path": "THUDM/glm-4-9b-chat", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "configuration_chatglm.ChatGLMConfig", "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": [ 151329, 151336, 151338 ], "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1.5625e-07, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_hidden_layers": 40, "num_layers": 40, "original_rope": true, "pad_token_id": 151329, "padded_vocab_size": 151552, "post_layer_norm": true, "rmsnorm": true, "rope_ratio": 500, "seq_length": 131072, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.45.0", "use_cache": true, "vocab_size": 151552 } [INFO|tokenization_utils_base.py:2649] 2024-10-23 22:35:45,108 >> tokenizer config file saved in saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/checkpoint-25/tokenizer_config.json [INFO|tokenization_utils_base.py:2658] 2024-10-23 22:35:45,108 >> Special tokens file saved in saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/checkpoint-25/special_tokens_map.json [INFO|tokenization_utils_base.py:2709] 2024-10-23 22:35:45,108 >> added tokens file saved in saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/checkpoint-25/added_tokens.json [INFO|trainer.py:2505] 2024-10-23 22:35:45,372 >> Training completed. Do not forget to share your model on huggingface.co/models =) {'train_runtime': 73.8562, 'train_samples_per_second': 5.416, 'train_steps_per_second': 0.338, 'train_loss': 1104.093203125, 'epoch': 5.0, 'num_input_tokens_seen': 21552} 100%|██████████████████████████████████████████████| 25/25 [01:13<00:00, 2.95s/it] [INFO|trainer.py:3705] 2024-10-23 22:35:45,374 >> Saving model checkpoint to saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53 [INFO|configuration_utils.py:670] 2024-10-23 22:35:45,400 >> loading configuration file /root/autodl-tmp/modelscope/hub/ZhipuAI/glm-4-9b-chat/config.json [INFO|configuration_utils.py:739] 2024-10-23 22:35:45,401 >> Model config ChatGLMConfig { "_name_or_path": "THUDM/glm-4-9b-chat", "add_bias_linear": false, "add_qkv_bias": true, "apply_query_key_layer_scaling": true, "apply_residual_connection_post_layernorm": false, "architectures": [ "ChatGLMModel" ], "attention_dropout": 0.0, "attention_softmax_in_fp32": true, "auto_map": { "AutoConfig": "configuration_chatglm.ChatGLMConfig", "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration", "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification" }, "bias_dropout_fusion": true, "classifier_dropout": null, "eos_token_id": [ 151329, 151336, 151338 ], "ffn_hidden_size": 13696, "fp32_residual_connection": false, "hidden_dropout": 0.0, "hidden_size": 4096, "kv_channels": 128, "layernorm_epsilon": 1.5625e-07, "model_type": "chatglm", "multi_query_attention": true, "multi_query_group_num": 2, "num_attention_heads": 32, "num_hidden_layers": 40, "num_layers": 40, "original_rope": true, "pad_token_id": 151329, "padded_vocab_size": 151552, "post_layer_norm": true, "rmsnorm": true, "rope_ratio": 500, "seq_length": 131072, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.45.0", "use_cache": true, "vocab_size": 151552 } [INFO|tokenization_utils_base.py:2649] 2024-10-23 22:35:45,552 >> tokenizer config file saved in saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/tokenizer_config.json [INFO|tokenization_utils_base.py:2658] 2024-10-23 22:35:45,552 >> Special tokens file saved in saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/special_tokens_map.json [INFO|tokenization_utils_base.py:2709] 2024-10-23 22:35:45,552 >> added tokens file saved in saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/added_tokens.json ***** train metrics ***** epoch = 5.0 num_input_tokens_seen = 21552 total_flos = 1059839GF train_loss = 1104.0932 train_runtime = 0:01:13.85 train_samples_per_second = 5.416 train_steps_per_second = 0.338 Figure saved at: saves/GLM-4-9B-Chat/lora/train_2024-10-23-22-33-53/training_loss.png 10/23/2024 22:35:45 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot. 10/23/2024 22:35:45 - WARNING - llamafactory.extras.ploting - No metric eval_accuracy to plot. [INFO|modelcard.py:449] 2024-10-23 22:35:45,696 >> Dropping the following result as it does not have all the necessary fields: {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}