refactor(gradio): replace CPU offload with direct GPU unload/lazy-load
Instead of .to('cpu') / .to('cuda'), models are now fully del'd from
GPU (no CPU intermediate) and reloaded on demand:
- _unload_i23d_worker(): del + gc.collect() + empty_cache()
- _ensure_i23d_worker(): lazy reload from pretrained if None
- _unload_tex_pipeline(): del + gc.collect() + empty_cache()
- _ensure_tex_pipeline(): lazy load from tex_conf if None
generation_all() flow in low_vram_mode:
shape gen → _unload_i23d_worker → _ensure_tex_pipeline →
texture gen → _unload_tex_pipeline
(shape model reloads on next _gen_shape call via _ensure_i23d_worker)
Startup: tex_pipeline NOT loaded in low_vram_mode (only tex_conf stored),
reducing startup VRAM from ~13.5GB to ~7.25GB.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -28,6 +28,7 @@ except Exception as e:
|
|||||||
print(f"Warning: Failed to apply torchvision fix: {e}")
|
print(f"Warning: Failed to apply torchvision fix: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
import gc
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import shutil
|
import shutil
|
||||||
@@ -213,6 +214,53 @@ height="{height}" width="100%" frameborder="0"></iframe>'
|
|||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# VRAM management helpers (used when --low_vram_mode is set)
|
||||||
|
# Models are unloaded (del'd) before the other model runs, then reloaded
|
||||||
|
# on next request — no CPU intermediate, VRAM freed immediately.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _unload_i23d_worker():
|
||||||
|
"""Delete shape model from GPU and free VRAM."""
|
||||||
|
global i23d_worker
|
||||||
|
del i23d_worker
|
||||||
|
i23d_worker = None
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_i23d_worker():
|
||||||
|
"""Reload shape model to GPU if it was previously unloaded."""
|
||||||
|
global i23d_worker
|
||||||
|
if i23d_worker is None:
|
||||||
|
from hy3dshape import Hunyuan3DDiTFlowMatchingPipeline
|
||||||
|
logger.info("Reloading shape model to GPU...")
|
||||||
|
i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
|
||||||
|
args.model_path,
|
||||||
|
subfolder=args.subfolder,
|
||||||
|
use_safetensors=False,
|
||||||
|
device=args.device,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _unload_tex_pipeline():
|
||||||
|
"""Delete texture pipeline from GPU and free VRAM."""
|
||||||
|
global tex_pipeline
|
||||||
|
del tex_pipeline
|
||||||
|
tex_pipeline = None
|
||||||
|
gc.collect()
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_tex_pipeline():
|
||||||
|
"""Load texture pipeline to GPU if not already loaded."""
|
||||||
|
global tex_pipeline
|
||||||
|
if tex_pipeline is None and tex_conf is not None:
|
||||||
|
from hy3dpaint.textureGenPipeline import Hunyuan3DPaintPipeline
|
||||||
|
logger.info("Loading texture pipeline to GPU...")
|
||||||
|
tex_pipeline = Hunyuan3DPaintPipeline(tex_conf)
|
||||||
|
|
||||||
|
|
||||||
@spaces.GPU(duration=60)
|
@spaces.GPU(duration=60)
|
||||||
def _gen_shape(
|
def _gen_shape(
|
||||||
caption=None,
|
caption=None,
|
||||||
@@ -297,6 +345,9 @@ def _gen_shape(
|
|||||||
# image to white model
|
# image to white model
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
if args.low_vram_mode:
|
||||||
|
_ensure_i23d_worker()
|
||||||
|
|
||||||
generator = torch.Generator()
|
generator = torch.Generator()
|
||||||
generator = generator.manual_seed(int(seed))
|
generator = generator.manual_seed(int(seed))
|
||||||
outputs = i23d_worker(
|
outputs = i23d_worker(
|
||||||
@@ -379,18 +430,17 @@ def generation_all(
|
|||||||
|
|
||||||
text_path = os.path.join(save_folder, f'textured_mesh.obj')
|
text_path = os.path.join(save_folder, f'textured_mesh.obj')
|
||||||
|
|
||||||
# In low_vram_mode: offload shape model to CPU before texture gen to free VRAM,
|
# In low_vram_mode: unload shape model entirely (del, no CPU copy) to free VRAM,
|
||||||
# mirroring the sequential-load strategy in batch_generate.py.
|
# then load texture pipeline on demand. Shape model reloads lazily on next request.
|
||||||
if args.low_vram_mode:
|
if args.low_vram_mode:
|
||||||
i23d_worker.to('cpu')
|
_unload_i23d_worker()
|
||||||
torch.cuda.empty_cache()
|
_ensure_tex_pipeline()
|
||||||
|
|
||||||
path_textured = tex_pipeline(mesh_path=path, image_path=image, output_mesh_path=text_path, save_glb=False)
|
path_textured = tex_pipeline(mesh_path=path, image_path=image, output_mesh_path=text_path, save_glb=False)
|
||||||
|
|
||||||
# Restore shape model to GPU so subsequent requests don't need to reload from disk.
|
# Unload texture pipeline after use so VRAM is free for the next shape request.
|
||||||
if args.low_vram_mode:
|
if args.low_vram_mode:
|
||||||
i23d_worker.to('cuda')
|
_unload_tex_pipeline()
|
||||||
torch.cuda.empty_cache()
|
|
||||||
|
|
||||||
logger.info("---Texture Generation takes %s seconds ---" % (time.time() - tmp_time))
|
logger.info("---Texture Generation takes %s seconds ---" % (time.time() - tmp_time))
|
||||||
stats['time']['texture generation'] = time.time() - tmp_time
|
stats['time']['texture generation'] = time.time() - tmp_time
|
||||||
@@ -808,11 +858,13 @@ if __name__ == '__main__':
|
|||||||
# texgen_worker.enable_model_cpu_offload()
|
# texgen_worker.enable_model_cpu_offload()
|
||||||
|
|
||||||
from hy3dpaint.textureGenPipeline import Hunyuan3DPaintPipeline, Hunyuan3DPaintConfig
|
from hy3dpaint.textureGenPipeline import Hunyuan3DPaintPipeline, Hunyuan3DPaintConfig
|
||||||
conf = Hunyuan3DPaintConfig(max_num_view=9, resolution=512)
|
tex_conf = Hunyuan3DPaintConfig(max_num_view=9, resolution=512)
|
||||||
conf.realesrgan_ckpt_path = "hy3dpaint/ckpt/RealESRGAN_x4plus.pth"
|
tex_conf.realesrgan_ckpt_path = "hy3dpaint/ckpt/RealESRGAN_x4plus.pth"
|
||||||
conf.multiview_cfg_path = "hy3dpaint/cfgs/hunyuan-paint-pbr.yaml"
|
tex_conf.multiview_cfg_path = "hy3dpaint/cfgs/hunyuan-paint-pbr.yaml"
|
||||||
conf.custom_pipeline = "hy3dpaint/hunyuanpaintpbr"
|
tex_conf.custom_pipeline = "hy3dpaint/hunyuanpaintpbr"
|
||||||
tex_pipeline = Hunyuan3DPaintPipeline(conf)
|
if not args.low_vram_mode:
|
||||||
|
# Load immediately; in low_vram_mode we load on-demand per request.
|
||||||
|
tex_pipeline = Hunyuan3DPaintPipeline(tex_conf)
|
||||||
|
|
||||||
# Not help much, ignore for now.
|
# Not help much, ignore for now.
|
||||||
# if args.compile:
|
# if args.compile:
|
||||||
|
|||||||
Reference in New Issue
Block a user