unitorch.cli.fastapis
InfoFastAPI
Tip
core/fastapi/info is the section for configuration of InfoFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/info.py
| def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/info")
router = config.getoption("router", "/core/fastapi/info")
self._device = config.getoption("device", "cpu")
self._router = APIRouter(prefix=router)
self._router.add_api_route("/status", self.status, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_device
instance-attribute
_device = getoption('device', 'cpu')
_router
instance-attribute
_router = APIRouter(prefix=router)
start
Source code in src/unitorch/cli/fastapis/info.py
| def start(self):
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/info.py
| def stop(self):
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/info.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68 | def status(self):
mem_info = psutil.virtual_memory()
stats = {
"cpu": {
"total": mem_info[0] / 1024**3,
"free": mem_info[1] / 1024**3,
"used": mem_info[3] / 1024**3,
}
}
if self._device != "cpu":
if isinstance(self._device, list):
for device in self._device:
free, total = torch.cuda.mem_get_info(device)
total = total / 1024**3
free = free / 1024**3
used = total - free
stats = {
**stats,
**{
f"cuda:{device}": {
"total": total,
"free": free,
"used": used,
}
},
}
else:
free, total = torch.cuda.mem_get_info(self._device)
total = total / 1024**3
free = free / 1024**3
used = total - free
stats = {
**stats,
**{"cuda": {"total": total, "free": free, "used": used}},
}
return stats
|
BRIAFastAPI
Tip
core/fastapi/bria is the section for configuration of BRIAFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/bria.py
80
81
82
83
84
85
86
87
88
89
90 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/bria")
router = config.getoption("router", "/core/fastapi/bria")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
Source code in src/unitorch/cli/fastapis/bria.py
| def start(self):
self._pipe = BRIAForSegmentationPipeline.from_config(
self.config,
pretrained_weight_path="https://huggingface.co/datasets/fuliucansheng/hubfiles/resolve/main/bria_rmbg2.0_pytorch_model.bin",
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/bria.py
103
104
105
106
107
108
109 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/bria.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(image: UploadFile, threshold: float = 0.5)
Source code in src/unitorch/cli/fastapis/bria.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 | async def generate(
self,
image: UploadFile,
threshold: float = 0.5,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
mask = self._pipe(image, threshold=threshold)
buffer = io.BytesIO()
mask.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
ClipForClassificationFastAPI
Tip
core/fastapi/clip is the section for configuration of ClipForClassificationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/clip.py
638
639
640
641
642
643
644
645
646
647
648 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/clip")
router = config.getoption("router", "/core/fastapi/clip")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
| def start(self, pretrained_name: str = "clip-vit-base-patch16"):
self._pipe = ClipForClassificationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/clip.py
661
662
663
664
665
666
667 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/clip.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
image: UploadFile,
max_seq_length: Optional[int] = 512,
)
Source code in src/unitorch/cli/fastapis/clip.py
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688 | async def generate(
self,
text: str,
image: UploadFile,
max_seq_length: Optional[int] = 512,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result = self._pipe(
text,
image,
max_seq_length=max_seq_length,
)
return result
|
ClipForTextClassificationFastAPI
Tip
core/fastapi/clip/text is the section for configuration of ClipForTextClassificationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/clip.py
693
694
695
696
697
698
699
700
701
702
703 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/clip/text")
router = config.getoption("router", "/core/fastapi/clip/text")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
| def start(self, pretrained_name: str = "clip-vit-base-patch16"):
self._pipe = ClipForTextClassificationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/clip.py
716
717
718
719
720
721
722 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/clip.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(text: str, max_seq_length: Optional[int] = 512)
Source code in src/unitorch/cli/fastapis/clip.py
727
728
729
730
731
732
733
734
735
736
737
738
739 | async def generate(
self,
text: str,
max_seq_length: Optional[int] = 512,
):
assert self._pipe is not None
async with self._lock:
result = self._pipe(
text,
max_seq_length=max_seq_length,
)
return result
|
ClipForImageClassificationFastAPI
Tip
core/fastapi/clip/image is the section for configuration of ClipForImageClassificationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/clip.py
744
745
746
747
748
749
750
751
752
753
754 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/clip/image")
router = config.getoption("router", "/core/fastapi/clip/image")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
| def start(self, pretrained_name: str = "clip-vit-base-patch16"):
self._pipe = ClipForImageClassificationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/clip.py
767
768
769
770
771
772
773 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/clip.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/clip.py
778
779
780
781
782
783
784
785
786
787
788 | async def generate(
self,
image: UploadFile,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result = self._pipe(image)
return result
|
Tip
core/fastapi/clip/matching is the section for configuration of ClipForMatchingFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/clip.py
793
794
795
796
797
798
799
800
801
802
803 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/clip/matching")
router = config.getoption("router", "/core/fastapi/clip/matching")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
_router = APIRouter(prefix=router)
start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
| def start(self, pretrained_name: str = "clip-vit-base-patch16"):
self._pipe = ClipForMatchingPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
Source code in src/unitorch/cli/fastapis/clip.py
816
817
818
819
820
821
822 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
Source code in src/unitorch/cli/fastapis/clip.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate(
text: str,
image: UploadFile,
max_seq_length: Optional[int] = 77,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/clip.py
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853 | async def generate(
self,
text: str,
image: UploadFile,
max_seq_length: Optional[int] = 77,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result = self._pipe(
text,
image,
max_seq_length=max_seq_length,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
return result
|
DetrForDetectionFastAPI
Tip
core/fastapi/detr is the section for configuration of DetrForDetectionFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/detr.py
153
154
155
156
157
158
159
160
161
162
163 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/detr")
router = config.getoption("router", "/core/fastapi/detr")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: Optional[str] = 'detr-resnet-50')
Source code in src/unitorch/cli/fastapis/detr.py
| def start(self, pretrained_name: Optional[str] = "detr-resnet-50"):
self._pipe = DetrForDetectionPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/detr.py
176
177
178
179
180
181
182 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/detr.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(image: UploadFile, threshold: float = 0.5)
Source code in src/unitorch/cli/fastapis/detr.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204 | async def generate(
self,
image: UploadFile,
threshold: float = 0.5,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result_image = self._pipe(image, threshold=threshold)
buffer = io.BytesIO()
result_image.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
DPTForDepthEstimationFastAPI
Tip
core/fastapi/dpt is the section for configuration of DPTForDepthEstimationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/dpt.py
132
133
134
135
136
137
138
139
140
141
142 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/dpt")
router = config.getoption("router", "/core/fastapi/dpt")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: Optional[str] = 'dpt-large')
Source code in src/unitorch/cli/fastapis/dpt.py
| def start(self, pretrained_name: Optional[str] = "dpt-large"):
self._pipe = DPTForDepthEstimationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/dpt.py
155
156
157
158
159
160
161 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/dpt.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/dpt.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182 | async def generate(
self,
image: UploadFile,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result_image = self._pipe(image)
buffer = io.BytesIO()
result_image.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
GroundingDinoForDetectionFastAPI
Tip
core/fastapi/grounding_dino is the section for configuration of GroundingDinoForDetectionFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/grounding_dino.py
181
182
183
184
185
186
187
188
189
190
191 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/grounding_dino")
router = config.getoption("router", "/core/fastapi/grounding_dino")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(
pretrained_name: Optional[str] = "grounding-dino-tiny",
)
Source code in src/unitorch/cli/fastapis/grounding_dino.py
| def start(self, pretrained_name: Optional[str] = "grounding-dino-tiny"):
self._pipe = GroundingDinoForDetectionPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/grounding_dino.py
204
205
206
207
208
209
210 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/grounding_dino.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
image: UploadFile,
text_threshold: float = 0.25,
box_threshold: float = 0.25,
)
Source code in src/unitorch/cli/fastapis/grounding_dino.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 | async def generate(
self,
text: str,
image: UploadFile,
text_threshold: float = 0.25,
box_threshold: float = 0.25,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result_image = self._pipe(
text,
image,
text_threshold=text_threshold,
box_threshold=box_threshold,
)
buffer = io.BytesIO()
result_image.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
LlamaForGenerationFastAPI
Tip
core/fastapi/llama is the section for configuration of LlamaForGenerationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/llama.py
268
269
270
271
272
273
274
275
276
277
278 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/llama")
router = config.getoption("router", "/core/fastapi/llama")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'llama-3.2-1b-instruct')
Source code in src/unitorch/cli/fastapis/llama.py
| def start(self, pretrained_name: str = "llama-3.2-1b-instruct"):
self._pipe = LlamaForGenerationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/llama.py
291
292
293
294
295
296
297 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/llama.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
prompt: str,
max_seq_length: Optional[int] = 512,
num_beams: Optional[int] = 2,
decoder_start_token_id: Optional[int] = 1,
decoder_end_token_id: Optional[
Union[int, List[int]]
] = [2],
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
repetition_penalty: Optional[float] = 1.0,
no_repeat_ngram_size: Optional[int] = 0,
early_stopping: Optional[bool] = True,
length_penalty: Optional[float] = 1.0,
num_beam_groups: Optional[int] = 1,
diversity_penalty: Optional[float] = 0.0,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/llama.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356 | async def generate(
self,
prompt: str,
max_seq_length: Optional[int] = 512,
num_beams: Optional[int] = 2,
decoder_start_token_id: Optional[int] = 1,
decoder_end_token_id: Optional[Union[int, List[int]]] = [2],
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
repetition_penalty: Optional[float] = 1.0,
no_repeat_ngram_size: Optional[int] = 0,
early_stopping: Optional[bool] = True,
length_penalty: Optional[float] = 1.0,
num_beam_groups: Optional[int] = 1,
diversity_penalty: Optional[float] = 0.0,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
async with self._lock:
result = self._pipe(
prompt,
max_seq_length=max_seq_length,
num_beams=num_beams,
decoder_start_token_id=decoder_start_token_id,
decoder_end_token_id=decoder_end_token_id,
num_return_sequences=num_return_sequences,
min_gen_seq_length=min_gen_seq_length,
max_gen_seq_length=max_gen_seq_length,
repetition_penalty=repetition_penalty,
no_repeat_ngram_size=no_repeat_ngram_size,
early_stopping=early_stopping,
length_penalty=length_penalty,
num_beam_groups=num_beam_groups,
diversity_penalty=diversity_penalty,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
return result
|
LlavaMistralClipFastAPI
Tip
core/fastapi/llava/mistral_clip is the section for configuration of LlavaMistralClipFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/llava.py
544
545
546
547
548
549
550
551
552
553
554 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/llava/mistral_clip")
router = config.getoption("router", "/core/fastapi/llava/mistral_clip")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
Source code in src/unitorch/cli/fastapis/llava.py
| def start(self):
self._pipe = LlavaMistralClipForGenerationPipeline.from_config(
self.config,
pretrained_name="llava-v1.6-mistral-7b-hf",
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/llava.py
567
568
569
570
571
572
573 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/llava.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(text: str, image: UploadFile)
Source code in src/unitorch/cli/fastapis/llava.py
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598 | async def generate(
self,
text: str,
image: UploadFile,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
text = f"[INST] <image>\n {text} [/INST]"
async with self._lock:
caption = self._pipe(
text,
image,
lora_checkpoints=[],
lora_weights=[],
lora_alphas=[],
lora_urls=[],
lora_files=[],
)
return caption
|
LlavaLlamaSiglipFastAPI
Tip
core/fastapi/llava/joycaption2 is the section for configuration of LlavaLlamaSiglipFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/llava.py
603
604
605
606
607
608
609
610
611
612
613 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/llava/joycaption2")
router = config.getoption("router", "/core/fastapi/llava/joycaption2")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
Source code in src/unitorch/cli/fastapis/llava.py
| def start(self):
self._pipe = LlavaLlamaSiglipForGenerationPipeline.from_config(
self.config,
pretrained_name="llava-v1.6-joycaption-2",
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/llava.py
626
627
628
629
630
631
632 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/llava.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(text: str, image: UploadFile)
Source code in src/unitorch/cli/fastapis/llava.py
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657 | async def generate(
self,
text: str,
image: UploadFile,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
text = f"<|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 26 July 2024\\n\\nYou are a helpful image captioner.<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n<|reserved_special_token_70|><|reserved_special_token_69|><|reserved_special_token_71|>{text}|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n"
async with self._lock:
caption = self._pipe(
text,
image,
lora_checkpoints=[],
lora_weights=[],
lora_alphas=[],
lora_urls=[],
lora_files=[],
)
return caption
|
Tip
core/fastapi/mask2former is the section for configuration of Mask2FormerForSegmentationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/mask2former.py
143
144
145
146
147
148
149
150
151
152
153 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/mask2former")
router = config.getoption("router", "/core/fastapi/mask2former")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
_router = APIRouter(prefix=router)
start(
pretrained_name: Optional[
str
] = "mask2former-swin-tiny-ade-semantic",
)
Source code in src/unitorch/cli/fastapis/mask2former.py
159
160
161
162
163
164
165
166 | def start(
self, pretrained_name: Optional[str] = "mask2former-swin-tiny-ade-semantic"
):
self._pipe = Mask2FormerForSegmentationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
Source code in src/unitorch/cli/fastapis/mask2former.py
168
169
170
171
172
173
174 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
Source code in src/unitorch/cli/fastapis/mask2former.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/mask2former.py
179
180
181
182
183
184
185
186
187
188
189 | async def generate(
self,
image: UploadFile,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
results = self._pipe(image)
return [(mask.tolist(), label) for mask, label in results]
|
MistralForGenerationFastAPI
Tip
core/fastapi/mistral is the section for configuration of MistralForGenerationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/mistral.py
270
271
272
273
274
275
276
277
278
279
280 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/mistral")
router = config.getoption("router", "/core/fastapi/mistral")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'mistral-7b-instruct-v0.1')
Source code in src/unitorch/cli/fastapis/mistral.py
| def start(self, pretrained_name: str = "mistral-7b-instruct-v0.1"):
self._pipe = MistralForGenerationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/mistral.py
293
294
295
296
297
298
299 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/mistral.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
prompt: str,
max_seq_length: Optional[int] = 512,
num_beams: Optional[int] = 2,
decoder_start_token_id: Optional[int] = 1,
decoder_end_token_id: Optional[
Union[int, List[int]]
] = 2,
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
repetition_penalty: Optional[float] = 1.0,
no_repeat_ngram_size: Optional[int] = 0,
early_stopping: Optional[bool] = True,
length_penalty: Optional[float] = 1.0,
num_beam_groups: Optional[int] = 1,
diversity_penalty: Optional[float] = 0.0,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/mistral.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358 | async def generate(
self,
prompt: str,
max_seq_length: Optional[int] = 512,
num_beams: Optional[int] = 2,
decoder_start_token_id: Optional[int] = 1,
decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
repetition_penalty: Optional[float] = 1.0,
no_repeat_ngram_size: Optional[int] = 0,
early_stopping: Optional[bool] = True,
length_penalty: Optional[float] = 1.0,
num_beam_groups: Optional[int] = 1,
diversity_penalty: Optional[float] = 0.0,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
async with self._lock:
result = self._pipe(
prompt,
max_seq_length=max_seq_length,
num_beams=num_beams,
decoder_start_token_id=decoder_start_token_id,
decoder_end_token_id=decoder_end_token_id,
num_return_sequences=num_return_sequences,
min_gen_seq_length=min_gen_seq_length,
max_gen_seq_length=max_gen_seq_length,
repetition_penalty=repetition_penalty,
no_repeat_ngram_size=no_repeat_ngram_size,
early_stopping=early_stopping,
length_penalty=length_penalty,
num_beam_groups=num_beam_groups,
diversity_penalty=diversity_penalty,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
return result
|
QWen3FastAPI
Tip
core/fastapi/qwen3 is the section for configuration of QWen3FastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/qwen.py
267
268
269
270
271
272
273
274
275
276
277 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/qwen3")
router = config.getoption("router", "/core/fastapi/qwen3")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'qwen3-4b-thinking')
Source code in src/unitorch/cli/fastapis/qwen.py
| def start(self, pretrained_name: str = "qwen3-4b-thinking"):
self._pipe = QWen3ForGenerationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/qwen.py
290
291
292
293
294
295
296 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/qwen.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
use_chat_template: Optional[bool] = True,
max_seq_length: Optional[int] = 12800,
num_beams: Optional[int] = 2,
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/qwen.py
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 | async def generate(
self,
text: str,
use_chat_template: Optional[bool] = True,
max_seq_length: Optional[int] = 12800,
num_beams: Optional[int] = 2,
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
async with self._lock:
result = self._pipe(
text,
use_chat_template=use_chat_template,
max_seq_length=max_seq_length,
num_beams=num_beams,
num_return_sequences=num_return_sequences,
min_gen_seq_length=min_gen_seq_length,
max_gen_seq_length=max_gen_seq_length,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
return result
|
QWen3VLFastAPI
Tip
core/fastapi/qwen3_vl is the section for configuration of QWen3VLFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/qwen_vl.py
284
285
286
287
288
289
290
291
292
293
294 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/qwen3_vl")
router = config.getoption("router", "/core/fastapi/qwen3_vl")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'qwen3-vl-8b-instruct')
Source code in src/unitorch/cli/fastapis/qwen_vl.py
| def start(self, pretrained_name: str = "qwen3-vl-8b-instruct"):
self._pipe = QWen3VLForGenerationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/qwen_vl.py
307
308
309
310
311
312
313 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/qwen_vl.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
image: UploadFile = File(...),
use_chat_template: Optional[bool] = True,
max_seq_length: Optional[int] = 12800,
num_beams: Optional[int] = 2,
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/qwen_vl.py
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362 | async def generate(
self,
text: str,
image: UploadFile = File(...),
use_chat_template: Optional[bool] = True,
max_seq_length: Optional[int] = 12800,
num_beams: Optional[int] = 2,
num_return_sequences: Optional[int] = 1,
min_gen_seq_length: Optional[int] = 0,
max_gen_seq_length: Optional[int] = 512,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
image = await image.read()
image = Image.open(io.BytesIO(image)).convert("RGB")
async with self._lock:
result = self._pipe(
text,
images=image,
use_chat_template=use_chat_template,
max_seq_length=max_seq_length,
num_beams=num_beams,
num_return_sequences=num_return_sequences,
min_gen_seq_length=min_gen_seq_length,
max_gen_seq_length=max_gen_seq_length,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
return result
|
SamForSegmentationFastAPI
Tip
core/fastapi/sam is the section for configuration of SamForSegmentationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/sam.py
242
243
244
245
246
247
248
249
250
251
252 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/sam")
router = config.getoption("router", "/core/fastapi/sam")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: Optional[str] = 'sam-vit-base')
Source code in src/unitorch/cli/fastapis/sam.py
| def start(self, pretrained_name: Optional[str] = "sam-vit-base"):
self._pipe = SamForSegmentationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/sam.py
265
266
267
268
269
270
271 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/sam.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
image: UploadFile,
points: Optional[List] = None,
boxes: Optional[List] = None,
mask_threshold: float = 0.1,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/sam.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316 | async def generate(
self,
image: UploadFile,
points: Optional[List] = None,
boxes: Optional[List] = None,
mask_threshold: float = 0.1,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
mask_image = self._pipe(
image,
points=points,
boxes=boxes,
mask_threshold=mask_threshold,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
if mask_image is None:
return StreamingResponse(
io.BytesIO(),
media_type="image/png",
)
buffer = io.BytesIO()
mask_image.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
Tip
core/fastapi/segformer is the section for configuration of SegformerForSegmentationFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/segformer.py
151
152
153
154
155
156
157
158
159
160
161 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/segformer")
router = config.getoption("router", "/core/fastapi/segformer")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
_router = APIRouter(prefix=router)
start(
pretrained_name: Optional[
str
] = "segformer-swin-tiny-ade-semantic",
)
Source code in src/unitorch/cli/fastapis/segformer.py
167
168
169
170
171
172
173
174 | def start(
self, pretrained_name: Optional[str] = "segformer-swin-tiny-ade-semantic"
):
self._pipe = SegformerForSegmentationPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
Source code in src/unitorch/cli/fastapis/segformer.py
176
177
178
179
180
181
182 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
Source code in src/unitorch/cli/fastapis/segformer.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/segformer.py
187
188
189
190
191
192
193
194
195
196
197 | async def generate(
self,
image: UploadFile,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
results = self._pipe(image)
return [(mask.tolist(), label) for mask, label in results]
|
Tip
core/fastapi/siglip is the section for configuration of Siglip2ForMatchingFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/siglip.py
207
208
209
210
211
212
213
214
215
216
217 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/siglip")
router = config.getoption("router", "/core/fastapi/siglip")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
_router = APIRouter(prefix=router)
start(pretrained_name: str = 'siglip-base-patch16-224')
Source code in src/unitorch/cli/fastapis/siglip.py
| def start(self, pretrained_name: str = "siglip-base-patch16-224"):
self._pipe = Siglip2ForMatchingPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
Source code in src/unitorch/cli/fastapis/siglip.py
230
231
232
233
234
235
236 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
Source code in src/unitorch/cli/fastapis/siglip.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate(
text: str,
image: UploadFile,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/siglip.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265 | async def generate(
self,
text: str,
image: UploadFile,
lora_checkpoints: Optional[Union[str, List[str]]] = [],
lora_weights: Optional[Union[float, List[float]]] = [],
lora_alphas: Optional[Union[float, List[float]]] = [],
lora_urls: Optional[Union[str, List[str]]] = [],
lora_files: Optional[Union[str, List[str]]] = [],
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
result = self._pipe(
text,
image,
lora_checkpoints=lora_checkpoints,
lora_weights=lora_weights,
lora_alphas=lora_alphas,
lora_urls=lora_urls,
lora_files=lora_files,
)
return result
|
WanForText2VideoFastAPI
Tip
core/fastapi/wan/text2video is the section for configuration of WanForText2VideoFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/wan/text2video.py
322
323
324
325
326
327
328
329
330
331
332 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/wan/text2video")
router = config.getoption("router", "/core/fastapi/wan/text2video")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["POST"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(
pretrained_name: Optional[str] = "wan-v2.2-t2v-14b",
pretrained_lora_names: Optional[
Union[str, List[str]]
] = None,
pretrained_lora_weights: Optional[
Union[float, List[float]]
] = 1.0,
pretrained_lora_alphas: Optional[
Union[float, List[float]]
] = 32.0,
)
Source code in src/unitorch/cli/fastapis/wan/text2video.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352 | def start(
self,
pretrained_name: Optional[str] = "wan-v2.2-t2v-14b",
pretrained_lora_names: Optional[Union[str, List[str]]] = None,
pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
self._pipe = WanForText2VideoFastAPIPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
pretrained_lora_names=pretrained_lora_names,
pretrained_lora_weights=pretrained_lora_weights,
pretrained_lora_alphas=pretrained_lora_alphas,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/wan/text2video.py
354
355
356
357
358
359
360 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/wan/text2video.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
neg_text: Optional[str] = "",
height: Optional[int] = 480,
width: Optional[int] = 832,
num_frames: Optional[int] = 81,
num_fps: Optional[int] = 16,
guidance_scale: Optional[float] = 5.0,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/wan/text2video.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398 | async def generate(
self,
text: str,
neg_text: Optional[str] = "",
height: Optional[int] = 480,
width: Optional[int] = 832,
num_frames: Optional[int] = 81,
num_fps: Optional[int] = 16,
guidance_scale: Optional[float] = 5.0,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
):
assert self._pipe is not None
async with self._lock:
video = self._pipe(
text,
neg_text=neg_text,
height=height,
width=width,
num_frames=num_frames,
num_fps=num_fps,
guidance_scale=guidance_scale,
num_timesteps=num_timesteps,
seed=seed,
)
buffer = io.BytesIO()
with open(video, "rb") as f:
buffer.write(f.read())
buffer.seek(0)
return StreamingResponse(
buffer,
media_type="video/mp4",
headers={"Content-Disposition": "attachment; filename=output.mp4"},
)
|
WanForImage2VideoFastAPI
Tip
core/fastapi/wan/image2video is the section for configuration of WanForImage2VideoFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/wan/image2video.py
326
327
328
329
330
331
332
333
334
335
336 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/wan/image2video")
router = config.getoption("router", "/core/fastapi/wan/image2video")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["POST"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(
pretrained_name: Optional[str] = "wan-v2.2-i2v-14b",
pretrained_lora_names: Optional[
Union[str, List[str]]
] = None,
pretrained_lora_weights: Optional[
Union[float, List[float]]
] = 1.0,
pretrained_lora_alphas: Optional[
Union[float, List[float]]
] = 32.0,
)
Source code in src/unitorch/cli/fastapis/wan/image2video.py
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356 | def start(
self,
pretrained_name: Optional[str] = "wan-v2.2-i2v-14b",
pretrained_lora_names: Optional[Union[str, List[str]]] = None,
pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
self._pipe = WanForImage2VideoFastAPIPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
pretrained_lora_names=pretrained_lora_names,
pretrained_lora_weights=pretrained_lora_weights,
pretrained_lora_alphas=pretrained_lora_alphas,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/wan/image2video.py
358
359
360
361
362
363
364 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/wan/image2video.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
image: UploadFile,
neg_text: Optional[str] = "",
num_frames: Optional[int] = 81,
num_fps: Optional[int] = 16,
guidance_scale: Optional[float] = 5.0,
strength: Optional[float] = 1.0,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/wan/image2video.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404 | async def generate(
self,
text: str,
image: UploadFile,
neg_text: Optional[str] = "",
num_frames: Optional[int] = 81,
num_fps: Optional[int] = 16,
guidance_scale: Optional[float] = 5.0,
strength: Optional[float] = 1.0,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
video = self._pipe(
text,
image,
neg_text=neg_text,
num_frames=num_frames,
num_fps=num_fps,
guidance_scale=guidance_scale,
strength=strength,
num_timesteps=num_timesteps,
seed=seed,
)
buffer = io.BytesIO()
with open(video, "rb") as f:
buffer.write(f.read())
buffer.seek(0)
return StreamingResponse(
buffer,
media_type="video/mp4",
headers={"Content-Disposition": "attachment; filename=output.mp4"},
)
|
QWenImageText2ImageFastAPI
Tip
core/fastapi/qwen_image/text2image is the section for configuration of QWenImageText2ImageFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
346
347
348
349
350
351
352
353
354
355
356 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/qwen_image/text2image")
router = config.getoption("router", "/core/fastapi/qwen_image/text2image")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["GET"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["POST"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(
pretrained_name: Optional[str] = "qwen-image",
pretrained_lora_names: Optional[
Union[str, List[str]]
] = None,
pretrained_lora_weights: Optional[
Union[float, List[float]]
] = 1.0,
pretrained_lora_alphas: Optional[
Union[float, List[float]]
] = 32.0,
)
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376 | def start(
self,
pretrained_name: Optional[str] = "qwen-image",
pretrained_lora_names: Optional[Union[str, List[str]]] = None,
pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
self._pipe = QWenImageForText2ImageFastAPIPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
pretrained_lora_names=pretrained_lora_names,
pretrained_lora_weights=pretrained_lora_weights,
pretrained_lora_alphas=pretrained_lora_alphas,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
378
379
380
381
382
383
384 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
height: Optional[int] = 512,
width: Optional[int] = 512,
guidance_scale: Optional[float] = 4.0,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415 | async def generate(
self,
text: str,
height: Optional[int] = 512,
width: Optional[int] = 512,
guidance_scale: Optional[float] = 4.0,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
):
assert self._pipe is not None
async with self._lock:
image = self._pipe(
text,
height=height,
width=width,
guidance_scale=guidance_scale,
num_timesteps=num_timesteps,
seed=seed,
)
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
QWenImageEditingFastAPI
Tip
core/fastapi/qwen_image/editing is the section for configuration of QWenImageEditingFastAPI.
Bases: GenericFastAPI
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
367
368
369
370
371
372
373
374
375
376
377 | def __init__(self, config: Config):
self.config = config
config.set_default_section(f"core/fastapi/qwen_image/editing")
router = config.getoption("router", "/core/fastapi/qwen_image/editing")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["POST"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(
pretrained_name: Optional[str] = "qwen-image-editing",
pretrained_lora_names: Optional[
Union[str, List[str]]
] = None,
pretrained_lora_weights: Optional[
Union[float, List[float]]
] = 1.0,
pretrained_lora_alphas: Optional[
Union[float, List[float]]
] = 32.0,
)
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397 | def start(
self,
pretrained_name: Optional[str] = "qwen-image-editing",
pretrained_lora_names: Optional[Union[str, List[str]]] = None,
pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
self._pipe = QWenImageForImageEditingFastAPIPipeline.from_config(
self.config,
pretrained_name=pretrained_name,
pretrained_lora_names=pretrained_lora_names,
pretrained_lora_weights=pretrained_lora_weights,
pretrained_lora_alphas=pretrained_lora_alphas,
)
return "start success"
|
stop
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
399
400
401
402
403
404
405 | def stop(self):
self._pipe.to("cpu")
del self._pipe
gc.collect()
torch.cuda.empty_cache()
self._pipe = None
return "stop success"
|
status
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
| def status(self):
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
image: UploadFile,
height: Optional[int] = 512,
width: Optional[int] = 512,
guidance_scale: Optional[float] = 2.5,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440 | async def generate(
self,
text: str,
image: UploadFile,
height: Optional[int] = 512,
width: Optional[int] = 512,
guidance_scale: Optional[float] = 2.5,
num_timesteps: Optional[int] = 50,
seed: Optional[int] = 1123,
):
assert self._pipe is not None
image_bytes = await image.read()
image = Image.open(io.BytesIO(image_bytes))
async with self._lock:
image = self._pipe(
text,
image=image,
height=height,
width=width,
guidance_scale=guidance_scale,
num_timesteps=num_timesteps,
seed=seed,
)
buffer = io.BytesIO()
image.save(buffer, format="PNG")
return StreamingResponse(
io.BytesIO(buffer.getvalue()),
media_type="image/png",
)
|
QWen3VLLMFastAPI
Tip
core/fastapi/vllm/qwen3 is the section for configuration of QWen3VLLMFastAPI.
Bases: GenericFastAPI
FastAPI service for QWen3 text generation powered by vLLM.
Exposes /generate, /status, /start, and /stop endpoints
under a configurable router prefix (default /core/fastapi/vllm/qwen3).
Source code in src/unitorch/cli/fastapis/qwen_vllm.py
26
27
28
29
30
31
32
33
34
35
36 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/vllm/qwen3")
router = config.getoption("router", "/core/fastapi/vllm/qwen3")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'qwen3-4b-thinking')
Loads and starts the vLLM QWen3 engine.
Parameters:
| Name |
Type |
Description |
Default |
pretrained_name
|
str
|
Pretrained model name to load. Defaults to "qwen3-4b-thinking".
|
'qwen3-4b-thinking'
|
Source code in src/unitorch/cli/fastapis/qwen_vllm.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 | def start(self, pretrained_name: str = "qwen3-4b-thinking"):
"""
Loads and starts the vLLM QWen3 engine.
Args:
pretrained_name (str): Pretrained model name to load. Defaults to ``"qwen3-4b-thinking"``.
"""
pretrained_name_or_path = nested_dict_value(
pretrained_vllm_infos, pretrained_name, "pretrained_name_or_path"
)
self.config.set_default_section("core/fastapi/vllm/qwen3")
if pretrained_name_or_path is not None:
self.config.set(
"core/fastapi/vllm/qwen3", "pretrained_name", pretrained_name
)
self._pipe = QWen3VLLMForGeneration.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Stops and unloads the vLLM engine, releasing GPU memory.
Source code in src/unitorch/cli/fastapis/qwen_vllm.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76 | def stop(self):
"""
Stops and unloads the vLLM engine, releasing GPU memory.
"""
del self._pipe
gc.collect()
try:
import torch
torch.cuda.empty_cache()
except Exception:
pass
self._pipe = None
return "stop success"
|
status
Returns "running" if the engine is loaded, otherwise "stopped".
Source code in src/unitorch/cli/fastapis/qwen_vllm.py
| def status(self):
"""Returns ``"running"`` if the engine is loaded, otherwise ``"stopped"``."""
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
use_chat_template: Optional[bool] = True,
max_gen_seq_length: Optional[int] = 512,
min_gen_seq_length: Optional[int] = 0,
num_return_sequences: Optional[int] = 1,
num_beams: Optional[int] = 1,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
repetition_penalty: Optional[float] = 1.0,
stop: Optional[Union[str, List[str]]] = None,
)
Generates a text completion for the given prompt.
Parameters:
| Name |
Type |
Description |
Default |
text
|
str
|
Input prompt or JSON-encoded message list (when use_chat_template=True).
|
required
|
use_chat_template
|
bool
|
Apply chat template formatting. Defaults to True.
|
True
|
max_gen_seq_length
|
int
|
Maximum tokens to generate. Defaults to 512.
|
512
|
min_gen_seq_length
|
int
|
Minimum tokens to generate. Defaults to 0.
|
0
|
num_return_sequences
|
int
|
Number of completions to return. Defaults to 1.
|
1
|
num_beams
|
int
|
Beam search width. Defaults to 1.
|
1
|
do_sample
|
bool
|
Enable sampling-based decoding. Defaults to False.
|
False
|
temperature
|
float
|
Sampling temperature. Defaults to 1.0.
|
1.0
|
top_k
|
int
|
Top-k sampling. Defaults to 50.
|
50
|
top_p
|
float
|
Top-p (nucleus) sampling. Defaults to 1.0.
|
1.0
|
repetition_penalty
|
float
|
Repetition penalty. Defaults to 1.0.
|
1.0
|
stop
|
str or List[str]
|
Stop string(s) to end generation.
|
None
|
Returns:
| Type |
Description |
|
|
str or List[str]: Generated text. Single string when num_return_sequences=1.
|
Source code in src/unitorch/cli/fastapis/qwen_vllm.py
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144 | async def generate(
self,
text: str,
use_chat_template: Optional[bool] = True,
max_gen_seq_length: Optional[int] = 512,
min_gen_seq_length: Optional[int] = 0,
num_return_sequences: Optional[int] = 1,
num_beams: Optional[int] = 1,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
repetition_penalty: Optional[float] = 1.0,
stop: Optional[Union[str, List[str]]] = None,
):
"""
Generates a text completion for the given prompt.
Args:
text (str): Input prompt or JSON-encoded message list (when ``use_chat_template=True``).
use_chat_template (bool): Apply chat template formatting. Defaults to True.
max_gen_seq_length (int): Maximum tokens to generate. Defaults to 512.
min_gen_seq_length (int): Minimum tokens to generate. Defaults to 0.
num_return_sequences (int): Number of completions to return. Defaults to 1.
num_beams (int): Beam search width. Defaults to 1.
do_sample (bool): Enable sampling-based decoding. Defaults to False.
temperature (float): Sampling temperature. Defaults to 1.0.
top_k (int): Top-k sampling. Defaults to 50.
top_p (float): Top-p (nucleus) sampling. Defaults to 1.0.
repetition_penalty (float): Repetition penalty. Defaults to 1.0.
stop (str or List[str], optional): Stop string(s) to end generation.
Returns:
str or List[str]: Generated text. Single string when ``num_return_sequences=1``.
"""
assert self._pipe is not None, "Service not started. Call /start first."
processor = self._pipe.processor
prompt = (
processor.chat_template(messages=json.loads(text))
if use_chat_template
else text
)
inputs = processor.generation_inputs(text=prompt)
import torch
input_ids = inputs.input_ids.unsqueeze(0)
async with self._lock:
outputs = self._pipe.generate(
input_ids=input_ids,
max_gen_seq_length=max_gen_seq_length,
min_gen_seq_length=min_gen_seq_length,
num_return_sequences=num_return_sequences,
num_beams=num_beams,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p,
repetition_penalty=repetition_penalty,
stop=stop,
)
decoded = processor.detokenize(sequences=outputs.sequences)
sequences = decoded[0]
return sequences[0] if num_return_sequences == 1 else sequences
|
QWen3VLVLLMFastAPI
Tip
core/fastapi/vllm/qwen3_vl is the section for configuration of QWen3VLVLLMFastAPI.
Bases: GenericFastAPI
FastAPI service for QWen3-VL vision-language generation powered by vLLM.
Exposes /generate, /status, /start, and /stop endpoints
under a configurable router prefix (default /core/fastapi/vllm/qwen3_vl).
Accepts both text-only and multimodal (text + image) generation requests.
Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
29
30
31
32
33
34
35
36
37
38
39 | def __init__(self, config: Config):
self.config = config
config.set_default_section("core/fastapi/vllm/qwen3_vl")
router = config.getoption("router", "/core/fastapi/vllm/qwen3_vl")
self._pipe = None
self._router = APIRouter(prefix=router)
self._router.add_api_route("/generate", self.generate, methods=["POST"])
self._router.add_api_route("/status", self.status, methods=["GET"])
self._router.add_api_route("/start", self.start, methods=["GET"])
self._router.add_api_route("/stop", self.stop, methods=["GET"])
self._lock = asyncio.Lock()
|
config
instance-attribute
_router
instance-attribute
_router = APIRouter(prefix=router)
start
start(pretrained_name: str = 'qwen3-vl-2b-instruct')
Loads and starts the vLLM QWen3-VL multimodal engine.
Parameters:
| Name |
Type |
Description |
Default |
pretrained_name
|
str
|
Pretrained model name to load. Defaults to "qwen3-vl-2b-instruct".
|
'qwen3-vl-2b-instruct'
|
Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 | def start(self, pretrained_name: str = "qwen3-vl-2b-instruct"):
"""
Loads and starts the vLLM QWen3-VL multimodal engine.
Args:
pretrained_name (str): Pretrained model name to load. Defaults to ``"qwen3-vl-2b-instruct"``.
"""
self.config.set_default_section("core/fastapi/vllm/qwen3_vl")
self.config.set(
"core/fastapi/vllm/qwen3_vl", "pretrained_name", pretrained_name
)
self._pipe = QWen3VLVLLMForGeneration.from_config(
self.config,
pretrained_name=pretrained_name,
)
return "start success"
|
stop
Stops and unloads the vLLM engine, releasing GPU memory.
Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75 | def stop(self):
"""
Stops and unloads the vLLM engine, releasing GPU memory.
"""
del self._pipe
gc.collect()
try:
import torch
torch.cuda.empty_cache()
except Exception:
pass
self._pipe = None
return "stop success"
|
status
Returns "running" if the engine is loaded, otherwise "stopped".
Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
| def status(self):
"""Returns ``"running"`` if the engine is loaded, otherwise ``"stopped"``."""
return "running" if self._pipe is not None else "stopped"
|
generate
async
generate(
text: str,
image: Optional[UploadFile] = File(default=None),
use_chat_template: Optional[bool] = True,
max_gen_seq_length: Optional[int] = 512,
min_gen_seq_length: Optional[int] = 0,
num_return_sequences: Optional[int] = 1,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
repetition_penalty: Optional[float] = 1.0,
stop: Optional[Union[str, List[str]]] = None,
)
Generates a text completion for the given prompt and optional image.
Parameters:
| Name |
Type |
Description |
Default |
text
|
str
|
Input prompt or JSON-encoded message list (when use_chat_template=True).
|
required
|
image
|
UploadFile
|
Uploaded image file for multimodal generation.
|
File(default=None)
|
use_chat_template
|
bool
|
Apply chat template formatting. Defaults to True.
|
True
|
max_gen_seq_length
|
int
|
Maximum tokens to generate. Defaults to 512.
|
512
|
min_gen_seq_length
|
int
|
Minimum tokens to generate. Defaults to 0.
|
0
|
num_return_sequences
|
int
|
Number of completions to return. Defaults to 1.
|
1
|
do_sample
|
bool
|
Enable sampling-based decoding. Defaults to False.
|
False
|
temperature
|
float
|
Sampling temperature. Defaults to 1.0.
|
1.0
|
top_k
|
int
|
Top-k sampling. Defaults to 50.
|
50
|
top_p
|
float
|
Top-p (nucleus) sampling. Defaults to 1.0.
|
1.0
|
repetition_penalty
|
float
|
Repetition penalty. Defaults to 1.0.
|
1.0
|
stop
|
str or List[str]
|
|
None
|
Returns:
| Type |
Description |
|
|
str or List[str]: Generated text. Single string when num_return_sequences=1.
|
Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156 | async def generate(
self,
text: str,
image: Optional[UploadFile] = File(default=None),
use_chat_template: Optional[bool] = True,
max_gen_seq_length: Optional[int] = 512,
min_gen_seq_length: Optional[int] = 0,
num_return_sequences: Optional[int] = 1,
do_sample: Optional[bool] = False,
temperature: Optional[float] = 1.0,
top_k: Optional[int] = 50,
top_p: Optional[float] = 1.0,
repetition_penalty: Optional[float] = 1.0,
stop: Optional[Union[str, List[str]]] = None,
):
"""
Generates a text completion for the given prompt and optional image.
Args:
text (str): Input prompt or JSON-encoded message list (when ``use_chat_template=True``).
image (UploadFile, optional): Uploaded image file for multimodal generation.
use_chat_template (bool): Apply chat template formatting. Defaults to True.
max_gen_seq_length (int): Maximum tokens to generate. Defaults to 512.
min_gen_seq_length (int): Minimum tokens to generate. Defaults to 0.
num_return_sequences (int): Number of completions to return. Defaults to 1.
do_sample (bool): Enable sampling-based decoding. Defaults to False.
temperature (float): Sampling temperature. Defaults to 1.0.
top_k (int): Top-k sampling. Defaults to 50.
top_p (float): Top-p (nucleus) sampling. Defaults to 1.0.
repetition_penalty (float): Repetition penalty. Defaults to 1.0.
stop (str or List[str], optional): Stop string(s).
Returns:
str or List[str]: Generated text. Single string when ``num_return_sequences=1``.
"""
assert self._pipe is not None, "Service not started. Call /start first."
pil_image = None
if image is not None:
content = await image.read()
pil_image = Image.open(io.BytesIO(content)).convert("RGB")
processor = self._pipe.processor
prompt = (
processor.chat_template(messages=json.loads(text))
if use_chat_template
else text
)
inputs = processor.generation_inputs(
text=prompt,
images=[pil_image] if pil_image is not None else [],
)
input_ids = inputs.input_ids.unsqueeze(0)
pixel_values = (
inputs.pixel_values.unsqueeze(0) if pil_image is not None else None
)
image_grid_thw = inputs.image_grid_thw if pil_image is not None else None
async with self._lock:
outputs = self._pipe.generate(
input_ids=input_ids,
pixel_values=pixel_values,
image_grid_thw=image_grid_thw,
max_gen_seq_length=max_gen_seq_length,
min_gen_seq_length=min_gen_seq_length,
num_return_sequences=num_return_sequences,
do_sample=do_sample,
temperature=temperature,
top_k=top_k,
top_p=top_p,
repetition_penalty=repetition_penalty,
stop=stop,
)
decoded = processor.detokenize(sequences=outputs.sequences)
sequences = decoded[0]
return sequences[0] if num_return_sequences == 1 else sequences
|