Skip to content

unitorch.cli.fastapis¤

InfoFastAPI¤

Tip

core/fastapi/info is the section for configuration of InfoFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/info.py
14
15
16
17
18
19
20
21
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/info")
    router = config.getoption("router", "/core/fastapi/info")
    self._device = config.getoption("device", "cpu")
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_device instance-attribute ¤

_device = getoption('device', 'cpu')

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start()
Source code in src/unitorch/cli/fastapis/info.py
27
28
def start(self):
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/info.py
30
31
def stop(self):
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/info.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def status(self):
    mem_info = psutil.virtual_memory()
    stats = {
        "cpu": {
            "total": mem_info[0] / 1024**3,
            "free": mem_info[1] / 1024**3,
            "used": mem_info[3] / 1024**3,
        }
    }
    if self._device != "cpu":
        if isinstance(self._device, list):
            for device in self._device:
                free, total = torch.cuda.mem_get_info(device)
                total = total / 1024**3
                free = free / 1024**3
                used = total - free
                stats = {
                    **stats,
                    **{
                        f"cuda:{device}": {
                            "total": total,
                            "free": free,
                            "used": used,
                        }
                    },
                }
        else:
            free, total = torch.cuda.mem_get_info(self._device)
            total = total / 1024**3
            free = free / 1024**3
            used = total - free
            stats = {
                **stats,
                **{"cuda": {"total": total, "free": free, "used": used}},
            }
    return stats

BRIAFastAPI¤

Tip

core/fastapi/bria is the section for configuration of BRIAFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/bria.py
80
81
82
83
84
85
86
87
88
89
90
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/bria")
    router = config.getoption("router", "/core/fastapi/bria")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start()
Source code in src/unitorch/cli/fastapis/bria.py
 96
 97
 98
 99
100
101
def start(self):
    self._pipe = BRIAForSegmentationPipeline.from_config(
        self.config,
        pretrained_weight_path="https://huggingface.co/datasets/fuliucansheng/hubfiles/resolve/main/bria_rmbg2.0_pytorch_model.bin",
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/bria.py
103
104
105
106
107
108
109
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/bria.py
111
112
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(image: UploadFile, threshold: float = 0.5)
Source code in src/unitorch/cli/fastapis/bria.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
async def generate(
    self,
    image: UploadFile,
    threshold: float = 0.5,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        mask = self._pipe(image, threshold=threshold)

    buffer = io.BytesIO()
    mask.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

ClipForClassificationFastAPI¤

Tip

core/fastapi/clip is the section for configuration of ClipForClassificationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py
638
639
640
641
642
643
644
645
646
647
648
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip")
    router = config.getoption("router", "/core/fastapi/clip")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
654
655
656
657
658
659
def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForClassificationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/clip.py
661
662
663
664
665
666
667
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/clip.py
669
670
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 512,
)
Source code in src/unitorch/cli/fastapis/clip.py
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
async def generate(
    self,
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 512,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(
            text,
            image,
            max_seq_length=max_seq_length,
        )

    return result

ClipForTextClassificationFastAPI¤

Tip

core/fastapi/clip/text is the section for configuration of ClipForTextClassificationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py
693
694
695
696
697
698
699
700
701
702
703
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip/text")
    router = config.getoption("router", "/core/fastapi/clip/text")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
709
710
711
712
713
714
def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForTextClassificationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/clip.py
716
717
718
719
720
721
722
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/clip.py
724
725
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(text: str, max_seq_length: Optional[int] = 512)
Source code in src/unitorch/cli/fastapis/clip.py
727
728
729
730
731
732
733
734
735
736
737
738
739
async def generate(
    self,
    text: str,
    max_seq_length: Optional[int] = 512,
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            text,
            max_seq_length=max_seq_length,
        )

    return result

ClipForImageClassificationFastAPI¤

Tip

core/fastapi/clip/image is the section for configuration of ClipForImageClassificationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py
744
745
746
747
748
749
750
751
752
753
754
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip/image")
    router = config.getoption("router", "/core/fastapi/clip/image")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
760
761
762
763
764
765
def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForImageClassificationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/clip.py
767
768
769
770
771
772
773
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/clip.py
775
776
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/clip.py
778
779
780
781
782
783
784
785
786
787
788
async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(image)

    return result

ClipForMatchingFastAPI¤

Tip

core/fastapi/clip/matching is the section for configuration of ClipForMatchingFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py
793
794
795
796
797
798
799
800
801
802
803
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip/matching")
    router = config.getoption("router", "/core/fastapi/clip/matching")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')
Source code in src/unitorch/cli/fastapis/clip.py
809
810
811
812
813
814
def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForMatchingPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/clip.py
816
817
818
819
820
821
822
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/clip.py
824
825
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 77,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/clip.py
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
async def generate(
    self,
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 77,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(
            text,
            image,
            max_seq_length=max_seq_length,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

DetrForDetectionFastAPI¤

Tip

core/fastapi/detr is the section for configuration of DetrForDetectionFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/detr.py
153
154
155
156
157
158
159
160
161
162
163
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/detr")
    router = config.getoption("router", "/core/fastapi/detr")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: Optional[str] = 'detr-resnet-50')
Source code in src/unitorch/cli/fastapis/detr.py
169
170
171
172
173
174
def start(self, pretrained_name: Optional[str] = "detr-resnet-50"):
    self._pipe = DetrForDetectionPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/detr.py
176
177
178
179
180
181
182
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/detr.py
184
185
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(image: UploadFile, threshold: float = 0.5)
Source code in src/unitorch/cli/fastapis/detr.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
async def generate(
    self,
    image: UploadFile,
    threshold: float = 0.5,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result_image = self._pipe(image, threshold=threshold)

    buffer = io.BytesIO()
    result_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

DPTForDepthEstimationFastAPI¤

Tip

core/fastapi/dpt is the section for configuration of DPTForDepthEstimationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/dpt.py
132
133
134
135
136
137
138
139
140
141
142
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/dpt")
    router = config.getoption("router", "/core/fastapi/dpt")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: Optional[str] = 'dpt-large')
Source code in src/unitorch/cli/fastapis/dpt.py
148
149
150
151
152
153
def start(self, pretrained_name: Optional[str] = "dpt-large"):
    self._pipe = DPTForDepthEstimationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/dpt.py
155
156
157
158
159
160
161
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/dpt.py
163
164
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/dpt.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result_image = self._pipe(image)

    buffer = io.BytesIO()
    result_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

GroundingDinoForDetectionFastAPI¤

Tip

core/fastapi/grounding_dino is the section for configuration of GroundingDinoForDetectionFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/grounding_dino.py
181
182
183
184
185
186
187
188
189
190
191
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/grounding_dino")
    router = config.getoption("router", "/core/fastapi/grounding_dino")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "grounding-dino-tiny",
)
Source code in src/unitorch/cli/fastapis/grounding_dino.py
197
198
199
200
201
202
def start(self, pretrained_name: Optional[str] = "grounding-dino-tiny"):
    self._pipe = GroundingDinoForDetectionPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/grounding_dino.py
204
205
206
207
208
209
210
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/grounding_dino.py
212
213
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile,
    text_threshold: float = 0.25,
    box_threshold: float = 0.25,
)
Source code in src/unitorch/cli/fastapis/grounding_dino.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
async def generate(
    self,
    text: str,
    image: UploadFile,
    text_threshold: float = 0.25,
    box_threshold: float = 0.25,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result_image = self._pipe(
            text,
            image,
            text_threshold=text_threshold,
            box_threshold=box_threshold,
        )

    buffer = io.BytesIO()
    result_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

LlamaForGenerationFastAPI¤

Tip

core/fastapi/llama is the section for configuration of LlamaForGenerationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/llama.py
268
269
270
271
272
273
274
275
276
277
278
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/llama")
    router = config.getoption("router", "/core/fastapi/llama")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'llama-3.2-1b-instruct')
Source code in src/unitorch/cli/fastapis/llama.py
284
285
286
287
288
289
def start(self, pretrained_name: str = "llama-3.2-1b-instruct"):
    self._pipe = LlamaForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/llama.py
291
292
293
294
295
296
297
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/llama.py
299
300
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = [2],
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/llama.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
async def generate(
    self,
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = [2],
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            prompt,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            decoder_start_token_id=decoder_start_token_id,
            decoder_end_token_id=decoder_end_token_id,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            repetition_penalty=repetition_penalty,
            no_repeat_ngram_size=no_repeat_ngram_size,
            early_stopping=early_stopping,
            length_penalty=length_penalty,
            num_beam_groups=num_beam_groups,
            diversity_penalty=diversity_penalty,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

LlavaMistralClipFastAPI¤

Tip

core/fastapi/llava/mistral_clip is the section for configuration of LlavaMistralClipFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/llava.py
544
545
546
547
548
549
550
551
552
553
554
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/llava/mistral_clip")
    router = config.getoption("router", "/core/fastapi/llava/mistral_clip")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start()
Source code in src/unitorch/cli/fastapis/llava.py
560
561
562
563
564
565
def start(self):
    self._pipe = LlavaMistralClipForGenerationPipeline.from_config(
        self.config,
        pretrained_name="llava-v1.6-mistral-7b-hf",
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/llava.py
567
568
569
570
571
572
573
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/llava.py
575
576
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(text: str, image: UploadFile)
Source code in src/unitorch/cli/fastapis/llava.py
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
async def generate(
    self,
    text: str,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    text = f"[INST] <image>\n {text} [/INST]"
    async with self._lock:
        caption = self._pipe(
            text,
            image,
            lora_checkpoints=[],
            lora_weights=[],
            lora_alphas=[],
            lora_urls=[],
            lora_files=[],
        )

    return caption

LlavaLlamaSiglipFastAPI¤

Tip

core/fastapi/llava/joycaption2 is the section for configuration of LlavaLlamaSiglipFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/llava.py
603
604
605
606
607
608
609
610
611
612
613
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/llava/joycaption2")
    router = config.getoption("router", "/core/fastapi/llava/joycaption2")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start()
Source code in src/unitorch/cli/fastapis/llava.py
619
620
621
622
623
624
def start(self):
    self._pipe = LlavaLlamaSiglipForGenerationPipeline.from_config(
        self.config,
        pretrained_name="llava-v1.6-joycaption-2",
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/llava.py
626
627
628
629
630
631
632
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/llava.py
634
635
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(text: str, image: UploadFile)
Source code in src/unitorch/cli/fastapis/llava.py
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
async def generate(
    self,
    text: str,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    text = f"<|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 26 July 2024\\n\\nYou are a helpful image captioner.<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n<|reserved_special_token_70|><|reserved_special_token_69|><|reserved_special_token_71|>{text}|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n"
    async with self._lock:
        caption = self._pipe(
            text,
            image,
            lora_checkpoints=[],
            lora_weights=[],
            lora_alphas=[],
            lora_urls=[],
            lora_files=[],
        )

    return caption

Mask2FormerForSegmentationFastAPI¤

Tip

core/fastapi/mask2former is the section for configuration of Mask2FormerForSegmentationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/mask2former.py
143
144
145
146
147
148
149
150
151
152
153
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/mask2former")
    router = config.getoption("router", "/core/fastapi/mask2former")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[
        str
    ] = "mask2former-swin-tiny-ade-semantic",
)
Source code in src/unitorch/cli/fastapis/mask2former.py
159
160
161
162
163
164
165
166
def start(
    self, pretrained_name: Optional[str] = "mask2former-swin-tiny-ade-semantic"
):
    self._pipe = Mask2FormerForSegmentationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/mask2former.py
168
169
170
171
172
173
174
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/mask2former.py
176
177
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/mask2former.py
179
180
181
182
183
184
185
186
187
188
189
async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        results = self._pipe(image)

    return [(mask.tolist(), label) for mask, label in results]

MistralForGenerationFastAPI¤

Tip

core/fastapi/mistral is the section for configuration of MistralForGenerationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/mistral.py
270
271
272
273
274
275
276
277
278
279
280
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/mistral")
    router = config.getoption("router", "/core/fastapi/mistral")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'mistral-7b-instruct-v0.1')
Source code in src/unitorch/cli/fastapis/mistral.py
286
287
288
289
290
291
def start(self, pretrained_name: str = "mistral-7b-instruct-v0.1"):
    self._pipe = MistralForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/mistral.py
293
294
295
296
297
298
299
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/mistral.py
301
302
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/mistral.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
async def generate(
    self,
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            prompt,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            decoder_start_token_id=decoder_start_token_id,
            decoder_end_token_id=decoder_end_token_id,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            repetition_penalty=repetition_penalty,
            no_repeat_ngram_size=no_repeat_ngram_size,
            early_stopping=early_stopping,
            length_penalty=length_penalty,
            num_beam_groups=num_beam_groups,
            diversity_penalty=diversity_penalty,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

QWen3FastAPI¤

Tip

core/fastapi/qwen3 is the section for configuration of QWen3FastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen.py
267
268
269
270
271
272
273
274
275
276
277
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen3")
    router = config.getoption("router", "/core/fastapi/qwen3")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'qwen3-4b-thinking')
Source code in src/unitorch/cli/fastapis/qwen.py
283
284
285
286
287
288
def start(self, pretrained_name: str = "qwen3-4b-thinking"):
    self._pipe = QWen3ForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/qwen.py
290
291
292
293
294
295
296
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/qwen.py
298
299
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/qwen.py
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
async def generate(
    self,
    text: str,
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            text,
            use_chat_template=use_chat_template,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

QWen3VLFastAPI¤

Tip

core/fastapi/qwen3_vl is the section for configuration of QWen3VLFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen_vl.py
284
285
286
287
288
289
290
291
292
293
294
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen3_vl")
    router = config.getoption("router", "/core/fastapi/qwen3_vl")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'qwen3-vl-8b-instruct')
Source code in src/unitorch/cli/fastapis/qwen_vl.py
300
301
302
303
304
305
def start(self, pretrained_name: str = "qwen3-vl-8b-instruct"):
    self._pipe = QWen3VLForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/qwen_vl.py
307
308
309
310
311
312
313
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/qwen_vl.py
315
316
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile = File(...),
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/qwen_vl.py
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
async def generate(
    self,
    text: str,
    image: UploadFile = File(...),
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image = await image.read()
    image = Image.open(io.BytesIO(image)).convert("RGB")
    async with self._lock:
        result = self._pipe(
            text,
            images=image,
            use_chat_template=use_chat_template,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

SamForSegmentationFastAPI¤

Tip

core/fastapi/sam is the section for configuration of SamForSegmentationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/sam.py
242
243
244
245
246
247
248
249
250
251
252
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/sam")
    router = config.getoption("router", "/core/fastapi/sam")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: Optional[str] = 'sam-vit-base')
Source code in src/unitorch/cli/fastapis/sam.py
258
259
260
261
262
263
def start(self, pretrained_name: Optional[str] = "sam-vit-base"):
    self._pipe = SamForSegmentationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/sam.py
265
266
267
268
269
270
271
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/sam.py
273
274
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    image: UploadFile,
    points: Optional[List] = None,
    boxes: Optional[List] = None,
    mask_threshold: float = 0.1,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/sam.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
async def generate(
    self,
    image: UploadFile,
    points: Optional[List] = None,
    boxes: Optional[List] = None,
    mask_threshold: float = 0.1,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        mask_image = self._pipe(
            image,
            points=points,
            boxes=boxes,
            mask_threshold=mask_threshold,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    if mask_image is None:
        return StreamingResponse(
            io.BytesIO(),
            media_type="image/png",
        )

    buffer = io.BytesIO()
    mask_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

SegformerForSegmentationFastAPI¤

Tip

core/fastapi/segformer is the section for configuration of SegformerForSegmentationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/segformer.py
151
152
153
154
155
156
157
158
159
160
161
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/segformer")
    router = config.getoption("router", "/core/fastapi/segformer")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[
        str
    ] = "segformer-swin-tiny-ade-semantic",
)
Source code in src/unitorch/cli/fastapis/segformer.py
167
168
169
170
171
172
173
174
def start(
    self, pretrained_name: Optional[str] = "segformer-swin-tiny-ade-semantic"
):
    self._pipe = SegformerForSegmentationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/segformer.py
176
177
178
179
180
181
182
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/segformer.py
184
185
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(image: UploadFile)
Source code in src/unitorch/cli/fastapis/segformer.py
187
188
189
190
191
192
193
194
195
196
197
async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        results = self._pipe(image)

    return [(mask.tolist(), label) for mask, label in results]

Siglip2ForMatchingFastAPI¤

Tip

core/fastapi/siglip is the section for configuration of Siglip2ForMatchingFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/siglip.py
207
208
209
210
211
212
213
214
215
216
217
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/siglip")
    router = config.getoption("router", "/core/fastapi/siglip")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'siglip-base-patch16-224')
Source code in src/unitorch/cli/fastapis/siglip.py
223
224
225
226
227
228
def start(self, pretrained_name: str = "siglip-base-patch16-224"):
    self._pipe = Siglip2ForMatchingPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/siglip.py
230
231
232
233
234
235
236
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/siglip.py
238
239
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)
Source code in src/unitorch/cli/fastapis/siglip.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
async def generate(
    self,
    text: str,
    image: UploadFile,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(
            text,
            image,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

WanForText2VideoFastAPI¤

Tip

core/fastapi/wan/text2video is the section for configuration of WanForText2VideoFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/wan/text2video.py
322
323
324
325
326
327
328
329
330
331
332
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/wan/text2video")
    router = config.getoption("router", "/core/fastapi/wan/text2video")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "wan-v2.2-t2v-14b",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)
Source code in src/unitorch/cli/fastapis/wan/text2video.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def start(
    self,
    pretrained_name: Optional[str] = "wan-v2.2-t2v-14b",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = WanForText2VideoFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/wan/text2video.py
354
355
356
357
358
359
360
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/wan/text2video.py
362
363
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    neg_text: Optional[str] = "",
    height: Optional[int] = 480,
    width: Optional[int] = 832,
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/wan/text2video.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
async def generate(
    self,
    text: str,
    neg_text: Optional[str] = "",
    height: Optional[int] = 480,
    width: Optional[int] = 832,
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    async with self._lock:
        video = self._pipe(
            text,
            neg_text=neg_text,
            height=height,
            width=width,
            num_frames=num_frames,
            num_fps=num_fps,
            guidance_scale=guidance_scale,
            num_timesteps=num_timesteps,
            seed=seed,
        )
    buffer = io.BytesIO()
    with open(video, "rb") as f:
        buffer.write(f.read())
    buffer.seek(0)
    return StreamingResponse(
        buffer,
        media_type="video/mp4",
        headers={"Content-Disposition": "attachment; filename=output.mp4"},
    )

WanForImage2VideoFastAPI¤

Tip

core/fastapi/wan/image2video is the section for configuration of WanForImage2VideoFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/wan/image2video.py
326
327
328
329
330
331
332
333
334
335
336
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/wan/image2video")
    router = config.getoption("router", "/core/fastapi/wan/image2video")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "wan-v2.2-i2v-14b",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)
Source code in src/unitorch/cli/fastapis/wan/image2video.py
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
def start(
    self,
    pretrained_name: Optional[str] = "wan-v2.2-i2v-14b",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = WanForImage2VideoFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/wan/image2video.py
358
359
360
361
362
363
364
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/wan/image2video.py
366
367
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile,
    neg_text: Optional[str] = "",
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    strength: Optional[float] = 1.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/wan/image2video.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
async def generate(
    self,
    text: str,
    image: UploadFile,
    neg_text: Optional[str] = "",
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    strength: Optional[float] = 1.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        video = self._pipe(
            text,
            image,
            neg_text=neg_text,
            num_frames=num_frames,
            num_fps=num_fps,
            guidance_scale=guidance_scale,
            strength=strength,
            num_timesteps=num_timesteps,
            seed=seed,
        )
    buffer = io.BytesIO()
    with open(video, "rb") as f:
        buffer.write(f.read())
    buffer.seek(0)
    return StreamingResponse(
        buffer,
        media_type="video/mp4",
        headers={"Content-Disposition": "attachment; filename=output.mp4"},
    )

QWenImageText2ImageFastAPI¤

Tip

core/fastapi/qwen_image/text2image is the section for configuration of QWenImageText2ImageFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
346
347
348
349
350
351
352
353
354
355
356
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen_image/text2image")
    router = config.getoption("router", "/core/fastapi/qwen_image/text2image")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["GET"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "qwen-image",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
def start(
    self,
    pretrained_name: Optional[str] = "qwen-image",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = QWenImageForText2ImageFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
378
379
380
381
382
383
384
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
386
387
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 4.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
async def generate(
    self,
    text: str,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 4.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    async with self._lock:
        image = self._pipe(
            text,
            height=height,
            width=width,
            guidance_scale=guidance_scale,
            num_timesteps=num_timesteps,
            seed=seed,
        )

    buffer = io.BytesIO()
    image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

QWenImageEditingFastAPI¤

Tip

core/fastapi/qwen_image/editing is the section for configuration of QWenImageEditingFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
367
368
369
370
371
372
373
374
375
376
377
def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen_image/editing")
    router = config.getoption("router", "/core/fastapi/qwen_image/editing")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "qwen-image-editing",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
def start(
    self,
    pretrained_name: Optional[str] = "qwen-image-editing",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = QWenImageForImageEditingFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
399
400
401
402
403
404
405
def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
407
408
def status(self):
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: UploadFile,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 2.5,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)
Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
async def generate(
    self,
    text: str,
    image: UploadFile,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 2.5,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        image = self._pipe(
            text,
            image=image,
            height=height,
            width=width,
            guidance_scale=guidance_scale,
            num_timesteps=num_timesteps,
            seed=seed,
        )

    buffer = io.BytesIO()
    image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

QWen3VLLMFastAPI¤

Tip

core/fastapi/vllm/qwen3 is the section for configuration of QWen3VLLMFastAPI.

Bases: GenericFastAPI

FastAPI service for QWen3 text generation powered by vLLM.

Exposes /generate, /status, /start, and /stop endpoints under a configurable router prefix (default /core/fastapi/vllm/qwen3).

Source code in src/unitorch/cli/fastapis/qwen_vllm.py
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/vllm/qwen3")
    router = config.getoption("router", "/core/fastapi/vllm/qwen3")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'qwen3-4b-thinking')

Loads and starts the vLLM QWen3 engine.

Parameters:

Name Type Description Default
pretrained_name str

Pretrained model name to load. Defaults to "qwen3-4b-thinking".

'qwen3-4b-thinking'
Source code in src/unitorch/cli/fastapis/qwen_vllm.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def start(self, pretrained_name: str = "qwen3-4b-thinking"):
    """
    Loads and starts the vLLM QWen3 engine.

    Args:
        pretrained_name (str): Pretrained model name to load. Defaults to ``"qwen3-4b-thinking"``.
    """
    pretrained_name_or_path = nested_dict_value(
        pretrained_vllm_infos, pretrained_name, "pretrained_name_or_path"
    )
    self.config.set_default_section("core/fastapi/vllm/qwen3")
    if pretrained_name_or_path is not None:
        self.config.set(
            "core/fastapi/vllm/qwen3", "pretrained_name", pretrained_name
        )
    self._pipe = QWen3VLLMForGeneration.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Stops and unloads the vLLM engine, releasing GPU memory.

Source code in src/unitorch/cli/fastapis/qwen_vllm.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def stop(self):
    """
    Stops and unloads the vLLM engine, releasing GPU memory.
    """
    del self._pipe
    gc.collect()
    try:
        import torch

        torch.cuda.empty_cache()
    except Exception:
        pass
    self._pipe = None
    return "stop success"

status ¤

status()

Returns "running" if the engine is loaded, otherwise "stopped".

Source code in src/unitorch/cli/fastapis/qwen_vllm.py
78
79
80
def status(self):
    """Returns ``"running"`` if the engine is loaded, otherwise ``"stopped"``."""
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    num_beams: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
)

Generates a text completion for the given prompt.

Parameters:

Name Type Description Default
text str

Input prompt or JSON-encoded message list (when use_chat_template=True).

required
use_chat_template bool

Apply chat template formatting. Defaults to True.

True
max_gen_seq_length int

Maximum tokens to generate. Defaults to 512.

512
min_gen_seq_length int

Minimum tokens to generate. Defaults to 0.

0
num_return_sequences int

Number of completions to return. Defaults to 1.

1
num_beams int

Beam search width. Defaults to 1.

1
do_sample bool

Enable sampling-based decoding. Defaults to False.

False
temperature float

Sampling temperature. Defaults to 1.0.

1.0
top_k int

Top-k sampling. Defaults to 50.

50
top_p float

Top-p (nucleus) sampling. Defaults to 1.0.

1.0
repetition_penalty float

Repetition penalty. Defaults to 1.0.

1.0
stop str or List[str]

Stop string(s) to end generation.

None

Returns:

Type Description

str or List[str]: Generated text. Single string when num_return_sequences=1.

Source code in src/unitorch/cli/fastapis/qwen_vllm.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
async def generate(
    self,
    text: str,
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    num_beams: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
):
    """
    Generates a text completion for the given prompt.

    Args:
        text (str): Input prompt or JSON-encoded message list (when ``use_chat_template=True``).
        use_chat_template (bool): Apply chat template formatting. Defaults to True.
        max_gen_seq_length (int): Maximum tokens to generate. Defaults to 512.
        min_gen_seq_length (int): Minimum tokens to generate. Defaults to 0.
        num_return_sequences (int): Number of completions to return. Defaults to 1.
        num_beams (int): Beam search width. Defaults to 1.
        do_sample (bool): Enable sampling-based decoding. Defaults to False.
        temperature (float): Sampling temperature. Defaults to 1.0.
        top_k (int): Top-k sampling. Defaults to 50.
        top_p (float): Top-p (nucleus) sampling. Defaults to 1.0.
        repetition_penalty (float): Repetition penalty. Defaults to 1.0.
        stop (str or List[str], optional): Stop string(s) to end generation.

    Returns:
        str or List[str]: Generated text. Single string when ``num_return_sequences=1``.
    """
    assert self._pipe is not None, "Service not started. Call /start first."
    processor = self._pipe.processor
    prompt = (
        processor.chat_template(messages=json.loads(text))
        if use_chat_template
        else text
    )
    inputs = processor.generation_inputs(text=prompt)
    import torch

    input_ids = inputs.input_ids.unsqueeze(0)
    async with self._lock:
        outputs = self._pipe.generate(
            input_ids=input_ids,
            max_gen_seq_length=max_gen_seq_length,
            min_gen_seq_length=min_gen_seq_length,
            num_return_sequences=num_return_sequences,
            num_beams=num_beams,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            stop=stop,
        )
    decoded = processor.detokenize(sequences=outputs.sequences)
    sequences = decoded[0]
    return sequences[0] if num_return_sequences == 1 else sequences

QWen3VLVLLMFastAPI¤

Tip

core/fastapi/vllm/qwen3_vl is the section for configuration of QWen3VLVLLMFastAPI.

Bases: GenericFastAPI

FastAPI service for QWen3-VL vision-language generation powered by vLLM.

Exposes /generate, /status, /start, and /stop endpoints under a configurable router prefix (default /core/fastapi/vllm/qwen3_vl). Accepts both text-only and multimodal (text + image) generation requests.

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
29
30
31
32
33
34
35
36
37
38
39
def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/vllm/qwen3_vl")
    router = config.getoption("router", "/core/fastapi/vllm/qwen3_vl")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config instance-attribute ¤

config = config

_pipe instance-attribute ¤

_pipe = None

_router instance-attribute ¤

_router = APIRouter(prefix=router)

_lock instance-attribute ¤

_lock = Lock()

router property ¤

router

start ¤

start(pretrained_name: str = 'qwen3-vl-2b-instruct')

Loads and starts the vLLM QWen3-VL multimodal engine.

Parameters:

Name Type Description Default
pretrained_name str

Pretrained model name to load. Defaults to "qwen3-vl-2b-instruct".

'qwen3-vl-2b-instruct'
Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def start(self, pretrained_name: str = "qwen3-vl-2b-instruct"):
    """
    Loads and starts the vLLM QWen3-VL multimodal engine.

    Args:
        pretrained_name (str): Pretrained model name to load. Defaults to ``"qwen3-vl-2b-instruct"``.
    """
    self.config.set_default_section("core/fastapi/vllm/qwen3_vl")
    self.config.set(
        "core/fastapi/vllm/qwen3_vl", "pretrained_name", pretrained_name
    )
    self._pipe = QWen3VLVLLMForGeneration.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Stops and unloads the vLLM engine, releasing GPU memory.

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def stop(self):
    """
    Stops and unloads the vLLM engine, releasing GPU memory.
    """
    del self._pipe
    gc.collect()
    try:
        import torch

        torch.cuda.empty_cache()
    except Exception:
        pass
    self._pipe = None
    return "stop success"

status ¤

status()

Returns "running" if the engine is loaded, otherwise "stopped".

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
77
78
79
def status(self):
    """Returns ``"running"`` if the engine is loaded, otherwise ``"stopped"``."""
    return "running" if self._pipe is not None else "stopped"

generate async ¤

generate(
    text: str,
    image: Optional[UploadFile] = File(default=None),
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
)

Generates a text completion for the given prompt and optional image.

Parameters:

Name Type Description Default
text str

Input prompt or JSON-encoded message list (when use_chat_template=True).

required
image UploadFile

Uploaded image file for multimodal generation.

File(default=None)
use_chat_template bool

Apply chat template formatting. Defaults to True.

True
max_gen_seq_length int

Maximum tokens to generate. Defaults to 512.

512
min_gen_seq_length int

Minimum tokens to generate. Defaults to 0.

0
num_return_sequences int

Number of completions to return. Defaults to 1.

1
do_sample bool

Enable sampling-based decoding. Defaults to False.

False
temperature float

Sampling temperature. Defaults to 1.0.

1.0
top_k int

Top-k sampling. Defaults to 50.

50
top_p float

Top-p (nucleus) sampling. Defaults to 1.0.

1.0
repetition_penalty float

Repetition penalty. Defaults to 1.0.

1.0
stop str or List[str]

Stop string(s).

None

Returns:

Type Description

str or List[str]: Generated text. Single string when num_return_sequences=1.

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
async def generate(
    self,
    text: str,
    image: Optional[UploadFile] = File(default=None),
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
):
    """
    Generates a text completion for the given prompt and optional image.

    Args:
        text (str): Input prompt or JSON-encoded message list (when ``use_chat_template=True``).
        image (UploadFile, optional): Uploaded image file for multimodal generation.
        use_chat_template (bool): Apply chat template formatting. Defaults to True.
        max_gen_seq_length (int): Maximum tokens to generate. Defaults to 512.
        min_gen_seq_length (int): Minimum tokens to generate. Defaults to 0.
        num_return_sequences (int): Number of completions to return. Defaults to 1.
        do_sample (bool): Enable sampling-based decoding. Defaults to False.
        temperature (float): Sampling temperature. Defaults to 1.0.
        top_k (int): Top-k sampling. Defaults to 50.
        top_p (float): Top-p (nucleus) sampling. Defaults to 1.0.
        repetition_penalty (float): Repetition penalty. Defaults to 1.0.
        stop (str or List[str], optional): Stop string(s).

    Returns:
        str or List[str]: Generated text. Single string when ``num_return_sequences=1``.
    """
    assert self._pipe is not None, "Service not started. Call /start first."

    pil_image = None
    if image is not None:
        content = await image.read()
        pil_image = Image.open(io.BytesIO(content)).convert("RGB")

    processor = self._pipe.processor
    prompt = (
        processor.chat_template(messages=json.loads(text))
        if use_chat_template
        else text
    )
    inputs = processor.generation_inputs(
        text=prompt,
        images=[pil_image] if pil_image is not None else [],
    )
    input_ids = inputs.input_ids.unsqueeze(0)
    pixel_values = (
        inputs.pixel_values.unsqueeze(0) if pil_image is not None else None
    )
    image_grid_thw = inputs.image_grid_thw if pil_image is not None else None

    async with self._lock:
        outputs = self._pipe.generate(
            input_ids=input_ids,
            pixel_values=pixel_values,
            image_grid_thw=image_grid_thw,
            max_gen_seq_length=max_gen_seq_length,
            min_gen_seq_length=min_gen_seq_length,
            num_return_sequences=num_return_sequences,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            stop=stop,
        )
    decoded = processor.detokenize(sequences=outputs.sequences)
    sequences = decoded[0]
    return sequences[0] if num_return_sequences == 1 else sequences