unitorch.cli.fastapis¤

InfoFastAPI¤

Tip

core/fastapi/info is the section for configuration of InfoFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/info.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/info")
    router = config.getoption("router", "/core/fastapi/info")
    self._device = config.getoption("device", "cpu")
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_device `instance-attribute` ¤

_device = getoption('device', 'cpu')

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start()

Source code in src/unitorch/cli/fastapis/info.py

def start(self):
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/info.py

def stop(self):
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/info.py

def status(self):
    mem_info = psutil.virtual_memory()
    stats = {
        "cpu": {
            "total": mem_info[0] / 1024**3,
            "free": mem_info[1] / 1024**3,
            "used": mem_info[3] / 1024**3,
        }
    }
    if self._device != "cpu":
        if isinstance(self._device, list):
            for device in self._device:
                free, total = torch.cuda.mem_get_info(device)
                total = total / 1024**3
                free = free / 1024**3
                used = total - free
                stats = {
                    **stats,
                    **{
                        f"cuda:{device}": {
                            "total": total,
                            "free": free,
                            "used": used,
                        }
                    },
                }
        else:
            free, total = torch.cuda.mem_get_info(self._device)
            total = total / 1024**3
            free = free / 1024**3
            used = total - free
            stats = {
                **stats,
                **{"cuda": {"total": total, "free": free, "used": used}},
            }
    return stats

BRIAFastAPI¤

Tip

core/fastapi/bria is the section for configuration of BRIAFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/bria.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/bria")
    router = config.getoption("router", "/core/fastapi/bria")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start()

Source code in src/unitorch/cli/fastapis/bria.py

def start(self):
    self._pipe = BRIAForSegmentationPipeline.from_config(
        self.config,
        pretrained_weight_path="https://huggingface.co/datasets/fuliucansheng/hubfiles/resolve/main/bria_rmbg2.0_pytorch_model.bin",
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/bria.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/bria.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(image: UploadFile, threshold: float = 0.5)

Source code in src/unitorch/cli/fastapis/bria.py

async def generate(
    self,
    image: UploadFile,
    threshold: float = 0.5,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        mask = self._pipe(image, threshold=threshold)

    buffer = io.BytesIO()
    mask.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

ClipForClassificationFastAPI¤

Tip

core/fastapi/clip is the section for configuration of ClipForClassificationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip")
    router = config.getoption("router", "/core/fastapi/clip")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')

Source code in src/unitorch/cli/fastapis/clip.py

def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForClassificationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/clip.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/clip.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 512,
)

Source code in src/unitorch/cli/fastapis/clip.py

async def generate(
    self,
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 512,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(
            text,
            image,
            max_seq_length=max_seq_length,
        )

    return result

ClipForTextClassificationFastAPI¤

Tip

core/fastapi/clip/text is the section for configuration of ClipForTextClassificationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip/text")
    router = config.getoption("router", "/core/fastapi/clip/text")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')

Source code in src/unitorch/cli/fastapis/clip.py

def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForTextClassificationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/clip.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/clip.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(text: str, max_seq_length: Optional[int] = 512)

Source code in src/unitorch/cli/fastapis/clip.py

async def generate(
    self,
    text: str,
    max_seq_length: Optional[int] = 512,
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            text,
            max_seq_length=max_seq_length,
        )

    return result

ClipForImageClassificationFastAPI¤

Tip

core/fastapi/clip/image is the section for configuration of ClipForImageClassificationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip/image")
    router = config.getoption("router", "/core/fastapi/clip/image")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')

Source code in src/unitorch/cli/fastapis/clip.py

def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForImageClassificationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/clip.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/clip.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(image: UploadFile)

Source code in src/unitorch/cli/fastapis/clip.py

async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(image)

    return result

ClipForMatchingFastAPI¤

Tip

core/fastapi/clip/matching is the section for configuration of ClipForMatchingFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/clip.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/clip/matching")
    router = config.getoption("router", "/core/fastapi/clip/matching")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'clip-vit-base-patch16')

Source code in src/unitorch/cli/fastapis/clip.py

def start(self, pretrained_name: str = "clip-vit-base-patch16"):
    self._pipe = ClipForMatchingPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/clip.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/clip.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 77,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/clip.py

async def generate(
    self,
    text: str,
    image: UploadFile,
    max_seq_length: Optional[int] = 77,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(
            text,
            image,
            max_seq_length=max_seq_length,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

DetrForDetectionFastAPI¤

Tip

core/fastapi/detr is the section for configuration of DetrForDetectionFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/detr.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/detr")
    router = config.getoption("router", "/core/fastapi/detr")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: Optional[str] = 'detr-resnet-50')

Source code in src/unitorch/cli/fastapis/detr.py

def start(self, pretrained_name: Optional[str] = "detr-resnet-50"):
    self._pipe = DetrForDetectionPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/detr.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/detr.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(image: UploadFile, threshold: float = 0.5)

Source code in src/unitorch/cli/fastapis/detr.py

async def generate(
    self,
    image: UploadFile,
    threshold: float = 0.5,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result_image = self._pipe(image, threshold=threshold)

    buffer = io.BytesIO()
    result_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

DPTForDepthEstimationFastAPI¤

Tip

core/fastapi/dpt is the section for configuration of DPTForDepthEstimationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/dpt.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/dpt")
    router = config.getoption("router", "/core/fastapi/dpt")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: Optional[str] = 'dpt-large')

Source code in src/unitorch/cli/fastapis/dpt.py

def start(self, pretrained_name: Optional[str] = "dpt-large"):
    self._pipe = DPTForDepthEstimationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/dpt.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/dpt.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(image: UploadFile)

Source code in src/unitorch/cli/fastapis/dpt.py

async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result_image = self._pipe(image)

    buffer = io.BytesIO()
    result_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

GroundingDinoForDetectionFastAPI¤

Tip

core/fastapi/grounding_dino is the section for configuration of GroundingDinoForDetectionFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/grounding_dino.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/grounding_dino")
    router = config.getoption("router", "/core/fastapi/grounding_dino")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "grounding-dino-tiny",
)

Source code in src/unitorch/cli/fastapis/grounding_dino.py

def start(self, pretrained_name: Optional[str] = "grounding-dino-tiny"):
    self._pipe = GroundingDinoForDetectionPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/grounding_dino.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/grounding_dino.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile,
    text_threshold: float = 0.25,
    box_threshold: float = 0.25,
)

Source code in src/unitorch/cli/fastapis/grounding_dino.py

async def generate(
    self,
    text: str,
    image: UploadFile,
    text_threshold: float = 0.25,
    box_threshold: float = 0.25,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result_image = self._pipe(
            text,
            image,
            text_threshold=text_threshold,
            box_threshold=box_threshold,
        )

    buffer = io.BytesIO()
    result_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

LlamaForGenerationFastAPI¤

Tip

core/fastapi/llama is the section for configuration of LlamaForGenerationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/llama.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/llama")
    router = config.getoption("router", "/core/fastapi/llama")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'llama-3.2-1b-instruct')

Source code in src/unitorch/cli/fastapis/llama.py

def start(self, pretrained_name: str = "llama-3.2-1b-instruct"):
    self._pipe = LlamaForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/llama.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/llama.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = [2],
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/llama.py

async def generate(
    self,
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = [2],
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            prompt,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            decoder_start_token_id=decoder_start_token_id,
            decoder_end_token_id=decoder_end_token_id,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            repetition_penalty=repetition_penalty,
            no_repeat_ngram_size=no_repeat_ngram_size,
            early_stopping=early_stopping,
            length_penalty=length_penalty,
            num_beam_groups=num_beam_groups,
            diversity_penalty=diversity_penalty,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

LlavaMistralClipFastAPI¤

Tip

core/fastapi/llava/mistral_clip is the section for configuration of LlavaMistralClipFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/llava.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/llava/mistral_clip")
    router = config.getoption("router", "/core/fastapi/llava/mistral_clip")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start()

Source code in src/unitorch/cli/fastapis/llava.py

def start(self):
    self._pipe = LlavaMistralClipForGenerationPipeline.from_config(
        self.config,
        pretrained_name="llava-v1.6-mistral-7b-hf",
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/llava.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/llava.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(text: str, image: UploadFile)

Source code in src/unitorch/cli/fastapis/llava.py

async def generate(
    self,
    text: str,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    text = f"[INST] <image>\n {text} [/INST]"
    async with self._lock:
        caption = self._pipe(
            text,
            image,
            lora_checkpoints=[],
            lora_weights=[],
            lora_alphas=[],
            lora_urls=[],
            lora_files=[],
        )

    return caption

LlavaLlamaSiglipFastAPI¤

Tip

core/fastapi/llava/joycaption2 is the section for configuration of LlavaLlamaSiglipFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/llava.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/llava/joycaption2")
    router = config.getoption("router", "/core/fastapi/llava/joycaption2")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start()

Source code in src/unitorch/cli/fastapis/llava.py

def start(self):
    self._pipe = LlavaLlamaSiglipForGenerationPipeline.from_config(
        self.config,
        pretrained_name="llava-v1.6-joycaption-2",
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/llava.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/llava.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(text: str, image: UploadFile)

Source code in src/unitorch/cli/fastapis/llava.py

async def generate(
    self,
    text: str,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    text = f"<|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 26 July 2024\\n\\nYou are a helpful image captioner.<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n<|reserved_special_token_70|><|reserved_special_token_69|><|reserved_special_token_71|>{text}|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n"
    async with self._lock:
        caption = self._pipe(
            text,
            image,
            lora_checkpoints=[],
            lora_weights=[],
            lora_alphas=[],
            lora_urls=[],
            lora_files=[],
        )

    return caption

Mask2FormerForSegmentationFastAPI¤

Tip

core/fastapi/mask2former is the section for configuration of Mask2FormerForSegmentationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/mask2former.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/mask2former")
    router = config.getoption("router", "/core/fastapi/mask2former")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[
        str
    ] = "mask2former-swin-tiny-ade-semantic",
)

Source code in src/unitorch/cli/fastapis/mask2former.py

def start(
    self, pretrained_name: Optional[str] = "mask2former-swin-tiny-ade-semantic"
):
    self._pipe = Mask2FormerForSegmentationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/mask2former.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/mask2former.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(image: UploadFile)

Source code in src/unitorch/cli/fastapis/mask2former.py

async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        results = self._pipe(image)

    return [(mask.tolist(), label) for mask, label in results]

MistralForGenerationFastAPI¤

Tip

core/fastapi/mistral is the section for configuration of MistralForGenerationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/mistral.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/mistral")
    router = config.getoption("router", "/core/fastapi/mistral")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'mistral-7b-instruct-v0.1')

Source code in src/unitorch/cli/fastapis/mistral.py

def start(self, pretrained_name: str = "mistral-7b-instruct-v0.1"):
    self._pipe = MistralForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/mistral.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/mistral.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/mistral.py

async def generate(
    self,
    prompt: str,
    max_seq_length: Optional[int] = 512,
    num_beams: Optional[int] = 2,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            prompt,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            decoder_start_token_id=decoder_start_token_id,
            decoder_end_token_id=decoder_end_token_id,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            repetition_penalty=repetition_penalty,
            no_repeat_ngram_size=no_repeat_ngram_size,
            early_stopping=early_stopping,
            length_penalty=length_penalty,
            num_beam_groups=num_beam_groups,
            diversity_penalty=diversity_penalty,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

QWen3FastAPI¤

Tip

core/fastapi/qwen3 is the section for configuration of QWen3FastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen3")
    router = config.getoption("router", "/core/fastapi/qwen3")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'qwen3-4b-thinking')

Source code in src/unitorch/cli/fastapis/qwen.py

def start(self, pretrained_name: str = "qwen3-4b-thinking"):
    self._pipe = QWen3ForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/qwen.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/qwen.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/qwen.py

async def generate(
    self,
    text: str,
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    async with self._lock:
        result = self._pipe(
            text,
            use_chat_template=use_chat_template,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

QWen3VLFastAPI¤

Tip

core/fastapi/qwen3_vl is the section for configuration of QWen3VLFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen_vl.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen3_vl")
    router = config.getoption("router", "/core/fastapi/qwen3_vl")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'qwen3-vl-8b-instruct')

Source code in src/unitorch/cli/fastapis/qwen_vl.py

def start(self, pretrained_name: str = "qwen3-vl-8b-instruct"):
    self._pipe = QWen3VLForGenerationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/qwen_vl.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/qwen_vl.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile = File(...),
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/qwen_vl.py

async def generate(
    self,
    text: str,
    image: UploadFile = File(...),
    use_chat_template: Optional[bool] = True,
    max_seq_length: Optional[int] = 12800,
    num_beams: Optional[int] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image = await image.read()
    image = Image.open(io.BytesIO(image)).convert("RGB")
    async with self._lock:
        result = self._pipe(
            text,
            images=image,
            use_chat_template=use_chat_template,
            max_seq_length=max_seq_length,
            num_beams=num_beams,
            num_return_sequences=num_return_sequences,
            min_gen_seq_length=min_gen_seq_length,
            max_gen_seq_length=max_gen_seq_length,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

SamForSegmentationFastAPI¤

Tip

core/fastapi/sam is the section for configuration of SamForSegmentationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/sam.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/sam")
    router = config.getoption("router", "/core/fastapi/sam")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: Optional[str] = 'sam-vit-base')

Source code in src/unitorch/cli/fastapis/sam.py

def start(self, pretrained_name: Optional[str] = "sam-vit-base"):
    self._pipe = SamForSegmentationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/sam.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/sam.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    image: UploadFile,
    points: Optional[List] = None,
    boxes: Optional[List] = None,
    mask_threshold: float = 0.1,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/sam.py

async def generate(
    self,
    image: UploadFile,
    points: Optional[List] = None,
    boxes: Optional[List] = None,
    mask_threshold: float = 0.1,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        mask_image = self._pipe(
            image,
            points=points,
            boxes=boxes,
            mask_threshold=mask_threshold,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    if mask_image is None:
        return StreamingResponse(
            io.BytesIO(),
            media_type="image/png",
        )

    buffer = io.BytesIO()
    mask_image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

SegformerForSegmentationFastAPI¤

Tip

core/fastapi/segformer is the section for configuration of SegformerForSegmentationFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/segformer.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/segformer")
    router = config.getoption("router", "/core/fastapi/segformer")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[
        str
    ] = "segformer-swin-tiny-ade-semantic",
)

Source code in src/unitorch/cli/fastapis/segformer.py

def start(
    self, pretrained_name: Optional[str] = "segformer-swin-tiny-ade-semantic"
):
    self._pipe = SegformerForSegmentationPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/segformer.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/segformer.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(image: UploadFile)

Source code in src/unitorch/cli/fastapis/segformer.py

async def generate(
    self,
    image: UploadFile,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        results = self._pipe(image)

    return [(mask.tolist(), label) for mask, label in results]

Siglip2ForMatchingFastAPI¤

Tip

core/fastapi/siglip is the section for configuration of Siglip2ForMatchingFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/siglip.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/siglip")
    router = config.getoption("router", "/core/fastapi/siglip")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'siglip-base-patch16-224')

Source code in src/unitorch/cli/fastapis/siglip.py

def start(self, pretrained_name: str = "siglip-base-patch16-224"):
    self._pipe = Siglip2ForMatchingPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/siglip.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/siglip.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
)

Source code in src/unitorch/cli/fastapis/siglip.py

async def generate(
    self,
    text: str,
    image: UploadFile,
    lora_checkpoints: Optional[Union[str, List[str]]] = [],
    lora_weights: Optional[Union[float, List[float]]] = [],
    lora_alphas: Optional[Union[float, List[float]]] = [],
    lora_urls: Optional[Union[str, List[str]]] = [],
    lora_files: Optional[Union[str, List[str]]] = [],
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        result = self._pipe(
            text,
            image,
            lora_checkpoints=lora_checkpoints,
            lora_weights=lora_weights,
            lora_alphas=lora_alphas,
            lora_urls=lora_urls,
            lora_files=lora_files,
        )

    return result

WanForText2VideoFastAPI¤

Tip

core/fastapi/wan/text2video is the section for configuration of WanForText2VideoFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/wan/text2video.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/wan/text2video")
    router = config.getoption("router", "/core/fastapi/wan/text2video")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "wan-v2.2-t2v-14b",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)

Source code in src/unitorch/cli/fastapis/wan/text2video.py

def start(
    self,
    pretrained_name: Optional[str] = "wan-v2.2-t2v-14b",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = WanForText2VideoFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/wan/text2video.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/wan/text2video.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    neg_text: Optional[str] = "",
    height: Optional[int] = 480,
    width: Optional[int] = 832,
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)

Source code in src/unitorch/cli/fastapis/wan/text2video.py

async def generate(
    self,
    text: str,
    neg_text: Optional[str] = "",
    height: Optional[int] = 480,
    width: Optional[int] = 832,
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    async with self._lock:
        video = self._pipe(
            text,
            neg_text=neg_text,
            height=height,
            width=width,
            num_frames=num_frames,
            num_fps=num_fps,
            guidance_scale=guidance_scale,
            num_timesteps=num_timesteps,
            seed=seed,
        )
    buffer = io.BytesIO()
    with open(video, "rb") as f:
        buffer.write(f.read())
    buffer.seek(0)
    return StreamingResponse(
        buffer,
        media_type="video/mp4",
        headers={"Content-Disposition": "attachment; filename=output.mp4"},
    )

WanForImage2VideoFastAPI¤

Tip

core/fastapi/wan/image2video is the section for configuration of WanForImage2VideoFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/wan/image2video.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/wan/image2video")
    router = config.getoption("router", "/core/fastapi/wan/image2video")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "wan-v2.2-i2v-14b",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)

Source code in src/unitorch/cli/fastapis/wan/image2video.py

def start(
    self,
    pretrained_name: Optional[str] = "wan-v2.2-i2v-14b",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = WanForImage2VideoFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/wan/image2video.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/wan/image2video.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile,
    neg_text: Optional[str] = "",
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    strength: Optional[float] = 1.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)

Source code in src/unitorch/cli/fastapis/wan/image2video.py

async def generate(
    self,
    text: str,
    image: UploadFile,
    neg_text: Optional[str] = "",
    num_frames: Optional[int] = 81,
    num_fps: Optional[int] = 16,
    guidance_scale: Optional[float] = 5.0,
    strength: Optional[float] = 1.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        video = self._pipe(
            text,
            image,
            neg_text=neg_text,
            num_frames=num_frames,
            num_fps=num_fps,
            guidance_scale=guidance_scale,
            strength=strength,
            num_timesteps=num_timesteps,
            seed=seed,
        )
    buffer = io.BytesIO()
    with open(video, "rb") as f:
        buffer.write(f.read())
    buffer.seek(0)
    return StreamingResponse(
        buffer,
        media_type="video/mp4",
        headers={"Content-Disposition": "attachment; filename=output.mp4"},
    )

QWenImageText2ImageFastAPI¤

Tip

core/fastapi/qwen_image/text2image is the section for configuration of QWenImageText2ImageFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen_image/text2image")
    router = config.getoption("router", "/core/fastapi/qwen_image/text2image")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["GET"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "qwen-image",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)

Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py

def start(
    self,
    pretrained_name: Optional[str] = "qwen-image",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = QWenImageForText2ImageFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 4.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)

Source code in src/unitorch/cli/fastapis/qwen_image/text2image.py

async def generate(
    self,
    text: str,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 4.0,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    async with self._lock:
        image = self._pipe(
            text,
            height=height,
            width=width,
            guidance_scale=guidance_scale,
            num_timesteps=num_timesteps,
            seed=seed,
        )

    buffer = io.BytesIO()
    image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

QWenImageEditingFastAPI¤

Tip

core/fastapi/qwen_image/editing is the section for configuration of QWenImageEditingFastAPI.

Bases: GenericFastAPI

Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section(f"core/fastapi/qwen_image/editing")
    router = config.getoption("router", "/core/fastapi/qwen_image/editing")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["POST"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(
    pretrained_name: Optional[str] = "qwen-image-editing",
    pretrained_lora_names: Optional[
        Union[str, List[str]]
    ] = None,
    pretrained_lora_weights: Optional[
        Union[float, List[float]]
    ] = 1.0,
    pretrained_lora_alphas: Optional[
        Union[float, List[float]]
    ] = 32.0,
)

Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py

def start(
    self,
    pretrained_name: Optional[str] = "qwen-image-editing",
    pretrained_lora_names: Optional[Union[str, List[str]]] = None,
    pretrained_lora_weights: Optional[Union[float, List[float]]] = 1.0,
    pretrained_lora_alphas: Optional[Union[float, List[float]]] = 32.0,
):
    self._pipe = QWenImageForImageEditingFastAPIPipeline.from_config(
        self.config,
        pretrained_name=pretrained_name,
        pretrained_lora_names=pretrained_lora_names,
        pretrained_lora_weights=pretrained_lora_weights,
        pretrained_lora_alphas=pretrained_lora_alphas,
    )
    return "start success"

stop ¤

stop()

Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py

def stop(self):
    self._pipe.to("cpu")
    del self._pipe
    gc.collect()
    torch.cuda.empty_cache()
    self._pipe = None
    return "stop success"

status ¤

status()

Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py

def status(self):
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: UploadFile,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 2.5,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
)

Source code in src/unitorch/cli/fastapis/qwen_image/image_editing.py

async def generate(
    self,
    text: str,
    image: UploadFile,
    height: Optional[int] = 512,
    width: Optional[int] = 512,
    guidance_scale: Optional[float] = 2.5,
    num_timesteps: Optional[int] = 50,
    seed: Optional[int] = 1123,
):
    assert self._pipe is not None
    image_bytes = await image.read()
    image = Image.open(io.BytesIO(image_bytes))
    async with self._lock:
        image = self._pipe(
            text,
            image=image,
            height=height,
            width=width,
            guidance_scale=guidance_scale,
            num_timesteps=num_timesteps,
            seed=seed,
        )

    buffer = io.BytesIO()
    image.save(buffer, format="PNG")

    return StreamingResponse(
        io.BytesIO(buffer.getvalue()),
        media_type="image/png",
    )

QWen3VLLMFastAPI¤

Tip

core/fastapi/vllm/qwen3 is the section for configuration of QWen3VLLMFastAPI.

Bases: GenericFastAPI

FastAPI service for QWen3 text generation powered by vLLM.

Exposes /generate, /status, /start, and /stop endpoints under a configurable router prefix (default /core/fastapi/vllm/qwen3).

Source code in src/unitorch/cli/fastapis/qwen_vllm.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/vllm/qwen3")
    router = config.getoption("router", "/core/fastapi/vllm/qwen3")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'qwen3-4b-thinking')

Loads and starts the vLLM QWen3 engine.

Parameters:

Name	Type	Description	Default
`pretrained_name`	`str`	Pretrained model name to load. Defaults to `"qwen3-4b-thinking"`.	`'qwen3-4b-thinking'`

Source code in src/unitorch/cli/fastapis/qwen_vllm.py

def start(self, pretrained_name: str = "qwen3-4b-thinking"):
    """
    Loads and starts the vLLM QWen3 engine.

    Args:
        pretrained_name (str): Pretrained model name to load. Defaults to ``"qwen3-4b-thinking"``.
    """
    pretrained_name_or_path = nested_dict_value(
        pretrained_vllm_infos, pretrained_name, "pretrained_name_or_path"
    )
    self.config.set_default_section("core/fastapi/vllm/qwen3")
    if pretrained_name_or_path is not None:
        self.config.set(
            "core/fastapi/vllm/qwen3", "pretrained_name", pretrained_name
        )
    self._pipe = QWen3VLLMForGeneration.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Stops and unloads the vLLM engine, releasing GPU memory.

Source code in src/unitorch/cli/fastapis/qwen_vllm.py

def stop(self):
    """
    Stops and unloads the vLLM engine, releasing GPU memory.
    """
    del self._pipe
    gc.collect()
    try:
        import torch

        torch.cuda.empty_cache()
    except Exception:
        pass
    self._pipe = None
    return "stop success"

status ¤

status()

Returns "running" if the engine is loaded, otherwise "stopped".

Source code in src/unitorch/cli/fastapis/qwen_vllm.py

def status(self):
    """Returns ``"running"`` if the engine is loaded, otherwise ``"stopped"``."""
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    num_beams: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
)

Generates a text completion for the given prompt.

Parameters:

Name	Type	Description	Default
`text`	`str`	Input prompt or JSON-encoded message list (when `use_chat_template=True`).	required
`use_chat_template`	`bool`	Apply chat template formatting. Defaults to True.	`True`
`max_gen_seq_length`	`int`	Maximum tokens to generate. Defaults to 512.	`512`
`min_gen_seq_length`	`int`	Minimum tokens to generate. Defaults to 0.	`0`
`num_return_sequences`	`int`	Number of completions to return. Defaults to 1.	`1`
`num_beams`	`int`	Beam search width. Defaults to 1.	`1`
`do_sample`	`bool`	Enable sampling-based decoding. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p (nucleus) sampling. Defaults to 1.0.	`1.0`
`repetition_penalty`	`float`	Repetition penalty. Defaults to 1.0.	`1.0`
`stop`	`str or List[str]`	Stop string(s) to end generation.	`None`

Returns:

Type	Description
	str or List[str]: Generated text. Single string when `num_return_sequences=1`.

Source code in src/unitorch/cli/fastapis/qwen_vllm.py

async def generate(
    self,
    text: str,
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    num_beams: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
):
    """
    Generates a text completion for the given prompt.

    Args:
        text (str): Input prompt or JSON-encoded message list (when ``use_chat_template=True``).
        use_chat_template (bool): Apply chat template formatting. Defaults to True.
        max_gen_seq_length (int): Maximum tokens to generate. Defaults to 512.
        min_gen_seq_length (int): Minimum tokens to generate. Defaults to 0.
        num_return_sequences (int): Number of completions to return. Defaults to 1.
        num_beams (int): Beam search width. Defaults to 1.
        do_sample (bool): Enable sampling-based decoding. Defaults to False.
        temperature (float): Sampling temperature. Defaults to 1.0.
        top_k (int): Top-k sampling. Defaults to 50.
        top_p (float): Top-p (nucleus) sampling. Defaults to 1.0.
        repetition_penalty (float): Repetition penalty. Defaults to 1.0.
        stop (str or List[str], optional): Stop string(s) to end generation.

    Returns:
        str or List[str]: Generated text. Single string when ``num_return_sequences=1``.
    """
    assert self._pipe is not None, "Service not started. Call /start first."
    processor = self._pipe.processor
    prompt = (
        processor.chat_template(messages=json.loads(text))
        if use_chat_template
        else text
    )
    inputs = processor.generation_inputs(text=prompt)
    import torch

    input_ids = inputs.input_ids.unsqueeze(0)
    async with self._lock:
        outputs = self._pipe.generate(
            input_ids=input_ids,
            max_gen_seq_length=max_gen_seq_length,
            min_gen_seq_length=min_gen_seq_length,
            num_return_sequences=num_return_sequences,
            num_beams=num_beams,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            stop=stop,
        )
    decoded = processor.detokenize(sequences=outputs.sequences)
    sequences = decoded[0]
    return sequences[0] if num_return_sequences == 1 else sequences

QWen3VLVLLMFastAPI¤

Tip

core/fastapi/vllm/qwen3_vl is the section for configuration of QWen3VLVLLMFastAPI.

Bases: GenericFastAPI

FastAPI service for QWen3-VL vision-language generation powered by vLLM.

Exposes /generate, /status, /start, and /stop endpoints under a configurable router prefix (default /core/fastapi/vllm/qwen3_vl). Accepts both text-only and multimodal (text + image) generation requests.

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py

def __init__(self, config: Config):
    self.config = config
    config.set_default_section("core/fastapi/vllm/qwen3_vl")
    router = config.getoption("router", "/core/fastapi/vllm/qwen3_vl")
    self._pipe = None
    self._router = APIRouter(prefix=router)
    self._router.add_api_route("/generate", self.generate, methods=["POST"])
    self._router.add_api_route("/status", self.status, methods=["GET"])
    self._router.add_api_route("/start", self.start, methods=["GET"])
    self._router.add_api_route("/stop", self.stop, methods=["GET"])
    self._lock = asyncio.Lock()

config `instance-attribute` ¤

config = config

_pipe `instance-attribute` ¤

_pipe = None

_router `instance-attribute` ¤

_router = APIRouter(prefix=router)

_lock `instance-attribute` ¤

_lock = Lock()

router `property` ¤

router

start ¤

start(pretrained_name: str = 'qwen3-vl-2b-instruct')

Loads and starts the vLLM QWen3-VL multimodal engine.

Parameters:

Name	Type	Description	Default
`pretrained_name`	`str`	Pretrained model name to load. Defaults to `"qwen3-vl-2b-instruct"`.	`'qwen3-vl-2b-instruct'`

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py

def start(self, pretrained_name: str = "qwen3-vl-2b-instruct"):
    """
    Loads and starts the vLLM QWen3-VL multimodal engine.

    Args:
        pretrained_name (str): Pretrained model name to load. Defaults to ``"qwen3-vl-2b-instruct"``.
    """
    self.config.set_default_section("core/fastapi/vllm/qwen3_vl")
    self.config.set(
        "core/fastapi/vllm/qwen3_vl", "pretrained_name", pretrained_name
    )
    self._pipe = QWen3VLVLLMForGeneration.from_config(
        self.config,
        pretrained_name=pretrained_name,
    )
    return "start success"

stop ¤

stop()

Stops and unloads the vLLM engine, releasing GPU memory.

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py

def stop(self):
    """
    Stops and unloads the vLLM engine, releasing GPU memory.
    """
    del self._pipe
    gc.collect()
    try:
        import torch

        torch.cuda.empty_cache()
    except Exception:
        pass
    self._pipe = None
    return "stop success"

status ¤

status()

Returns "running" if the engine is loaded, otherwise "stopped".

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py

def status(self):
    """Returns ``"running"`` if the engine is loaded, otherwise ``"stopped"``."""
    return "running" if self._pipe is not None else "stopped"

generate `async` ¤

generate(
    text: str,
    image: Optional[UploadFile] = File(default=None),
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
)

Generates a text completion for the given prompt and optional image.

Parameters:

Name	Type	Description	Default
`text`	`str`	Input prompt or JSON-encoded message list (when `use_chat_template=True`).	required
`image`	`UploadFile`	Uploaded image file for multimodal generation.	`File(default=None)`
`use_chat_template`	`bool`	Apply chat template formatting. Defaults to True.	`True`
`max_gen_seq_length`	`int`	Maximum tokens to generate. Defaults to 512.	`512`
`min_gen_seq_length`	`int`	Minimum tokens to generate. Defaults to 0.	`0`
`num_return_sequences`	`int`	Number of completions to return. Defaults to 1.	`1`
`do_sample`	`bool`	Enable sampling-based decoding. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p (nucleus) sampling. Defaults to 1.0.	`1.0`
`repetition_penalty`	`float`	Repetition penalty. Defaults to 1.0.	`1.0`
`stop`	`str or List[str]`	Stop string(s).	`None`

Returns:

Type	Description
	str or List[str]: Generated text. Single string when `num_return_sequences=1`.

Source code in src/unitorch/cli/fastapis/qwen_vl_vllm.py

async def generate(
    self,
    text: str,
    image: Optional[UploadFile] = File(default=None),
    use_chat_template: Optional[bool] = True,
    max_gen_seq_length: Optional[int] = 512,
    min_gen_seq_length: Optional[int] = 0,
    num_return_sequences: Optional[int] = 1,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
    repetition_penalty: Optional[float] = 1.0,
    stop: Optional[Union[str, List[str]]] = None,
):
    """
    Generates a text completion for the given prompt and optional image.

    Args:
        text (str): Input prompt or JSON-encoded message list (when ``use_chat_template=True``).
        image (UploadFile, optional): Uploaded image file for multimodal generation.
        use_chat_template (bool): Apply chat template formatting. Defaults to True.
        max_gen_seq_length (int): Maximum tokens to generate. Defaults to 512.
        min_gen_seq_length (int): Minimum tokens to generate. Defaults to 0.
        num_return_sequences (int): Number of completions to return. Defaults to 1.
        do_sample (bool): Enable sampling-based decoding. Defaults to False.
        temperature (float): Sampling temperature. Defaults to 1.0.
        top_k (int): Top-k sampling. Defaults to 50.
        top_p (float): Top-p (nucleus) sampling. Defaults to 1.0.
        repetition_penalty (float): Repetition penalty. Defaults to 1.0.
        stop (str or List[str], optional): Stop string(s).

    Returns:
        str or List[str]: Generated text. Single string when ``num_return_sequences=1``.
    """
    assert self._pipe is not None, "Service not started. Call /start first."

    pil_image = None
    if image is not None:
        content = await image.read()
        pil_image = Image.open(io.BytesIO(content)).convert("RGB")

    processor = self._pipe.processor
    prompt = (
        processor.chat_template(messages=json.loads(text))
        if use_chat_template
        else text
    )
    inputs = processor.generation_inputs(
        text=prompt,
        images=[pil_image] if pil_image is not None else [],
    )
    input_ids = inputs.input_ids.unsqueeze(0)
    pixel_values = (
        inputs.pixel_values.unsqueeze(0) if pil_image is not None else None
    )
    image_grid_thw = inputs.image_grid_thw if pil_image is not None else None

    async with self._lock:
        outputs = self._pipe.generate(
            input_ids=input_ids,
            pixel_values=pixel_values,
            image_grid_thw=image_grid_thw,
            max_gen_seq_length=max_gen_seq_length,
            min_gen_seq_length=min_gen_seq_length,
            num_return_sequences=num_return_sequences,
            do_sample=do_sample,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            stop=stop,
        )
    decoded = processor.detokenize(sequences=outputs.sequences)
    sequences = decoded[0]
    return sequences[0] if num_return_sequences == 1 else sequences

unitorch.cli.fastapis¤

InfoFastAPI¤

config instance-attribute ¤

_device instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

BRIAFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

ClipForClassificationFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

ClipForTextClassificationFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

ClipForImageClassificationFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

ClipForMatchingFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

DetrForDetectionFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

DPTForDepthEstimationFastAPI¤

config instance-attribute ¤

_pipe instance-attribute ¤

_router instance-attribute ¤

_lock instance-attribute ¤

router property ¤

start ¤

stop ¤

status ¤

generate async ¤

config `instance-attribute` ¤

_device `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤

_lock `instance-attribute` ¤

router `property` ¤

generate `async` ¤

config `instance-attribute` ¤

_pipe `instance-attribute` ¤

_router `instance-attribute` ¤