=======
root@zf4909066b964784aa197ec174fed0d1-task0-0:/# python -m fastchat.serve.controller --host 0.0.0.0
2025-01-16 14:27:20 | INFO | controller | args: Namespace(host='0.0.0.0', port=21001, dispatch_method='shortest_queue', ssl=False)
2025-01-16 14:27:20 | ERROR | stderr | INFO:     Started server process [57]
2025-01-16 14:27:20 | ERROR | stderr | INFO:     Waiting for application startup.
2025-01-16 14:27:20 | ERROR | stderr | INFO:     Application startup complete.
2025-01-16 14:27:20 | ERROR | stderr | INFO:     Uvicorn running on http://0.0.0.0:21001 (Press CTRL+C to quit)
2025-01-16 14:28:07 | INFO | controller | Register a new worker: http://localhost:21002
2025-01-16 14:28:07 | INFO | controller | Register done: http://localhost:21002, {'model_names': ['glm-4-9b-chat'], 'speed': 1, 'queue_length': 0}
2025-01-16 14:28:07 | INFO | stdout | INFO:     127.0.0.1:43232 - "POST /register_worker HTTP/1.1" 200 OK
2025-01-16 14:28:52 | INFO | controller | Receive heart beat. http://localhost:21002
2025-01-16 14:28:52 | INFO | stdout | INFO:     127.0.0.1:43284 - "POST /receive_heart_beat HTTP/1.1" 200 OK
2025-01-16 14:29:33 | INFO | stdout | INFO:     127.0.0.1:43336 - "POST /list_models HTTP/1.1" 200 OK


======
root@zf4909066b964784aa197ec174fed0d1-task0-0:/# python -m fastchat.serve.model_worker --model-path /dataset/glm-4-9b-chat/ --host 0.0.0.0 --num-gpus 4 --max-gpu-memory 15GiB
2025-01-16 14:27:53 | INFO | model_worker | args: Namespace(host='0.0.0.0', port=21002, worker_address='http://localhost:21002', controller_address='http://localhost:21001', model_path='/dataset/glm-4-9b-chat/', revision='main', device='cuda', gpus=None, num_gpus=4, max_gpu_memory='15GiB', dtype=None, load_8bit=False, cpu_offloading=False, gptq_ckpt=None, gptq_wbits=16, gptq_groupsize=-1, gptq_act_order=False, awq_ckpt=None, awq_wbits=16, awq_groupsize=-1, enable_exllama=False, exllama_max_seq_len=4096, exllama_gpu_split=None, exllama_cache_8bit=False, enable_xft=False, xft_max_seq_len=4096, xft_dtype=None, model_names=None, conv_template=None, embed_in_truncate=False, limit_worker_concurrency=5, stream_interval=2, no_register=False, seed=None, debug=False, ssl=False)
2025-01-16 14:27:53 | INFO | model_worker | Loading the model ['glm-4-9b-chat'] on worker a0d32b20 ...
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards:   0%|                                                                                  | 0/10 [00:00<?, ?it/s]
Loading checkpoint shards:  10%|███████▍                                                                  | 1/10 [00:01<00:14,  1.60s/it]
Loading checkpoint shards:  20%|██████████████▊                                                           | 2/10 [00:03<00:12,  1.61s/it]
Loading checkpoint shards:  30%|██████████████████████▏                                                   | 3/10 [00:04<00:09,  1.42s/it]
Loading checkpoint shards:  40%|█████████████████████████████▌                                            | 4/10 [00:05<00:07,  1.30s/it]
Loading checkpoint shards:  50%|█████████████████████████████████████                                     | 5/10 [00:07<00:06,  1.39s/it]
Loading checkpoint shards:  60%|████████████████████████████████████████████▍                             | 6/10 [00:08<00:05,  1.31s/it]
Loading checkpoint shards:  70%|███████████████████████████████████████████████████▊                      | 7/10 [00:09<00:03,  1.25s/it]
Loading checkpoint shards:  80%|███████████████████████████████████████████████████████████▏              | 8/10 [00:10<00:02,  1.35s/it]
Loading checkpoint shards:  90%|██████████████████████████████████████████████████████████████████▌       | 9/10 [00:12<00:01,  1.31s/it]
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.17s/it]
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████| 10/10 [00:13<00:00,  1.30s/it]
2025-01-16 14:28:07 | ERROR | stderr | 
2025-01-16 14:28:07 | INFO | model_worker | Register to controller
2025-01-16 14:28:07 | ERROR | stderr | INFO:     Started server process [109]
2025-01-16 14:28:07 | ERROR | stderr | INFO:     Waiting for application startup.
2025-01-16 14:28:07 | ERROR | stderr | INFO:     Application startup complete.
2025-01-16 14:28:07 | ERROR | stderr | INFO:     Uvicorn running on http://0.0.0.0:21002 (Press CTRL+C to quit)

2025-01-16 14:28:52 | INFO | model_worker | Send heart beat. Models: ['glm-4-9b-chat']. Semaphore: None. call_ct: 0. worker_id: a0d32b20. 
2025-01-16 14:29:33 | INFO | stdout | INFO:     127.0.0.1:38960 - "POST /model_details HTTP/1.1" 200 OK
2025-01-16 14:29:33 | INFO | stdout | INFO:     127.0.0.1:38962 - "POST /count_token HTTP/1.1" 200 OK
2025-01-16 14:29:33 | INFO | stdout | INFO:     127.0.0.1:38964 - "POST /worker_get_conv_template HTTP/1.1" 200 OK
2025-01-16 14:29:33 | INFO | stdout | INFO:     127.0.0.1:38966 - "POST /worker_generate HTTP/1.1" 500 Internal Server Error
2025-01-16 14:29:33 | ERROR | stderr | ERROR:    Exception in ASGI application
2025-01-16 14:29:33 | ERROR | stderr | Traceback (most recent call last):
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
2025-01-16 14:29:33 | ERROR | stderr |     result = await app(  # type: ignore[func-returns-value]
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     return await self.app(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await super().__call__(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/applications.py", line 123, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.middleware_stack(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     raise exc
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.app(scope, receive, _send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     raise exc
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     await app(scope, receive, sender)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 756, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.middleware_stack(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
2025-01-16 14:29:33 | ERROR | stderr |     await route.handle(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
2025-01-16 14:29:33 | ERROR | stderr |     await self.app(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
2025-01-16 14:29:33 | ERROR | stderr |     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     raise exc
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     await app(scope, receive, sender)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
2025-01-16 14:29:33 | ERROR | stderr |     response = await func(request)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
2025-01-16 14:29:33 | ERROR | stderr |     raw_response = await run_endpoint_function(
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
2025-01-16 14:29:33 | ERROR | stderr |     return await dependant.call(**values)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastchat/serve/base_model_worker.py", line 209, in api_generate
2025-01-16 14:29:33 | ERROR | stderr |     output = await asyncio.to_thread(worker.generate_gate, params)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/asyncio/threads.py", line 25, in to_thread
2025-01-16 14:29:33 | ERROR | stderr |     return await loop.run_in_executor(None, func_call)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/concurrent/futures/thread.py", line 58, in run
2025-01-16 14:29:33 | ERROR | stderr |     result = self.fn(*self.args, **self.kwargs)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastchat/serve/model_worker.py", line 147, in generate_gate
2025-01-16 14:29:33 | ERROR | stderr |     for x in self.generate_stream_gate(params):
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastchat/serve/model_worker.py", line 114, in generate_stream_gate
2025-01-16 14:29:33 | ERROR | stderr |     for output in self.generate_stream_func(
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 35, in generator_context
2025-01-16 14:29:33 | ERROR | stderr |     response = gen.send(None)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastchat/model/model_chatglm.py", line 106, in generate_stream_chatglm
2025-01-16 14:29:33 | ERROR | stderr |     for total_ids in model.stream_generate(**inputs, **gen_kwargs):
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
2025-01-16 14:29:33 | ERROR | stderr |     raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
2025-01-16 14:29:33 | ERROR | stderr | AttributeError: 'ChatGLMForConditionalGeneration' object has no attribute 'stream_generate'. Did you mean: 'can_generate'?
2025-01-16 14:29:38 | INFO | model_worker | Send heart beat. Models: ['glm-4-9b-chat']. Semaphore: Semaphore(value=4, locked=False). call_ct: 1. worker_id: a0d32b20. 
2025-01-16 14:30:23 | INFO | model_worker | Send heart beat. Models: ['glm-4-9b-chat']. Semaphore: Semaphore(value=4, locked=False). call_ct: 1. worker_id: a0d32b20. 

======

root@zf4909066b964784aa197ec174fed0d1-task0-0:/# python -m fastchat.serve.openai_api_server --host 0.0.0.0
2025-01-16 14:29:00 | INFO | openai_api_server | args: Namespace(host='0.0.0.0', port=8000, controller_address='http://localhost:21001', allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_keys=None, ssl=False)
2025-01-16 14:29:00 | ERROR | stderr | INFO:     Started server process [224]
2025-01-16 14:29:00 | ERROR | stderr | INFO:     Waiting for application startup.
2025-01-16 14:29:00 | ERROR | stderr | INFO:     Application startup complete.
2025-01-16 14:29:00 | ERROR | stderr | INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
2025-01-16 14:29:33 | INFO | stdout | INFO:     127.0.0.1:45998 - "POST /v1/completions HTTP/1.1" 500 Internal Server Error
2025-01-16 14:29:33 | ERROR | stderr | ERROR:    Exception in ASGI application
2025-01-16 14:29:33 | ERROR | stderr | Traceback (most recent call last):
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
2025-01-16 14:29:33 | ERROR | stderr |     result = await app(  # type: ignore[func-returns-value]
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     return await self.app(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await super().__call__(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/applications.py", line 123, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.middleware_stack(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/errors.py", line 186, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     raise exc
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/errors.py", line 164, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.app(scope, receive, _send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/cors.py", line 85, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.app(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     raise exc
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     await app(scope, receive, sender)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 756, in __call__
2025-01-16 14:29:33 | ERROR | stderr |     await self.middleware_stack(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 776, in app
2025-01-16 14:29:33 | ERROR | stderr |     await route.handle(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 297, in handle
2025-01-16 14:29:33 | ERROR | stderr |     await self.app(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 77, in app
2025-01-16 14:29:33 | ERROR | stderr |     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     raise exc
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2025-01-16 14:29:33 | ERROR | stderr |     await app(scope, receive, sender)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/starlette/routing.py", line 72, in app
2025-01-16 14:29:33 | ERROR | stderr |     response = await func(request)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastapi/routing.py", line 278, in app
2025-01-16 14:29:33 | ERROR | stderr |     raw_response = await run_endpoint_function(
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastapi/routing.py", line 191, in run_endpoint_function
2025-01-16 14:29:33 | ERROR | stderr |     return await dependant.call(**values)
2025-01-16 14:29:33 | ERROR | stderr |   File "/opt/conda/lib/python3.10/site-packages/fastchat/serve/openai_api_server.py", line 603, in create_completion
2025-01-16 14:29:33 | ERROR | stderr |     if content["error_code"] != 0:
2025-01-16 14:29:33 | ERROR | stderr | TypeError: string indices must be integers

======

root@zf4909066b964784aa197ec174fed0d1-task0-0:/# curl http://localhost:8000/v1/completions   -H "Content-Type: application/json"   -d '{
>     "model": "glm-4-9b-chat",
>     "prompt": "Once upon a time",
>     "max_tokens": 41,
>     "temperature": 0.5}'
Internal Server Error