Files
Hunyuan3D_2.1_Low_VRAM/gradio_app.py
Akasei 70289d04d7 fix: eliminate OOM on RTX 3080 via load_state_dict(assign=True) + low-VRAM mode
Root cause: torch.load() with mmap=True returns fp16 tensors, but
load_state_dict() without assign=True widens them fp16→fp32 in-place,
doubling CPU anon-rss (7 GB fp16 ckpt → 14 GB fp32 params). Combined
with the 2 GB Gradio server baseline, this exceeded the 15 GB physical
RAM limit on the second generation request.

Fix: add assign=True to all load_state_dict calls in pipelines.py and
autoencoders/model.py. With assign=True the mmap fp16 tensors are
assigned directly as model parameters without any fp16→fp32 copy.
When model.to('cuda') is then called, the mmap pages (file-backed,
evictable) are streamed directly to VRAM — CPU anon-rss stays near 0.

Peak RSS is now ~3.9 GB instead of 14.7 GB (killed) across all rounds.

gradio_app.py changes:
- low_vram_mode always takes the full-delete path (never CPU offload)
- glibc malloc tuning at startup (MALLOC_ARENA_MAX=1, malloc_trim)
- preemptive gc.collect(2) + malloc_trim + empty_cache at generation start
- _rlog() memory logging at each major step for monitoring

pipelines.py:
- load_state_dict(..., assign=True) for model, vae, conditioner
- del ckpt after state dict assignment to release mmap fd early

autoencoders/model.py:
- load_state_dict(..., assign=True) in from_single_file
- load_state_dict(..., assign=True) in init_from_ckpt

Verified: 4 consecutive Playwright WebUI rounds (shape+texture) pass
with no OOM. API two-round test also passes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-03-17 02:03:43 +08:00

1074 lines
41 KiB
Python

# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
# Apply torchvision compatibility fix before other imports
import sys
sys.path.insert(0, './hy3dshape')
sys.path.insert(0, './hy3dpaint')
try:
from torchvision_fix import apply_fix
apply_fix()
except ImportError:
print("Warning: torchvision_fix module not found, proceeding without compatibility fix")
except Exception as e:
print(f"Warning: Failed to apply torchvision fix: {e}")
import gc
import os
import random
import shutil
import subprocess
import time
import ctypes
import ctypes.util
from glob import glob
from pathlib import Path
import gradio as gr
import torch
import trimesh
import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
import uuid
import numpy as np
from hy3dshape.utils import logger
from hy3dpaint.convert_utils import create_glb_with_pbr_materials
# ── glibc malloc tuning ───────────────────────────────────────────────────────
# Applied BEFORE any large allocation so glibc honours them from the start.
# M_MMAP_THRESHOLD (-3): allocations > 1 MB use anonymous mmap instead of
# the heap; when freed they are immediately returned to the OS via munmap,
# eliminating heap fragmentation for PyTorch tensors (all >> 1 MB).
# M_ARENA_MAX (-8 via env): limit to 1 arena so malloc_trim() can release
# ALL freed pages, not just the main-thread arena.
os.environ.setdefault("MALLOC_ARENA_MAX", "1")
os.environ.setdefault("MALLOC_MMAP_THRESHOLD_", "1048576") # 1 MB
_libc = ctypes.CDLL(ctypes.util.find_library("c") or "libc.so.6", use_errno=True)
try:
_libc.mallopt(-3, 1024 * 1024) # M_MMAP_THRESHOLD = 1 MB (runtime)
_libc.mallopt(-1, 128 * 1024) # M_TRIM_THRESHOLD = 128 KB (trim aggressively)
except Exception:
pass
def _malloc_trim():
"""Return all free heap pages to the OS (glibc brk-based heap)."""
try:
_libc.malloc_trim(0)
except Exception:
pass
# Allow CUDA allocator to use expandable segments, reducing fragmentation.
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
# Globals for lazy load/unload
i23d_worker = None
tex_pipeline = None
tex_conf = None
MAX_SEED = 1e7
ENV = "Local" # "Huggingface"
if ENV == 'Huggingface':
"""
Setup environment for running on Huggingface platform.
This block performs the following:
- Changes directory to the differentiable renderer folder and runs a shell
script to compile the mesh painter.
- Installs a custom rasterizer wheel package via pip.
Note:
This setup assumes the script is running in the Huggingface environment
with the specified directory structure.
"""
import os, spaces, subprocess, sys, shlex
print("cd /home/user/app/hy3dgen/texgen/differentiable_renderer/ && bash compile_mesh_painter.sh")
os.system("cd /home/user/app/hy3dgen/texgen/differentiable_renderer/ && bash compile_mesh_painter.sh")
print('install custom')
subprocess.run(shlex.split("pip install custom_rasterizer-0.1-cp310-cp310-linux_x86_64.whl"),
check=True)
else:
"""
Define a dummy `spaces` module with a GPU decorator class for local environment.
The GPU decorator is a no-op that simply returns the decorated function unchanged.
This allows code that uses the `spaces.GPU` decorator to run without modification locally.
"""
class spaces:
class GPU:
def __init__(self, duration=60):
self.duration = duration
def __call__(self, func):
return func
def get_example_img_list():
"""
Load and return a sorted list of example image file paths.
Searches recursively for PNG images under the './assets/example_images/' directory.
Returns:
list[str]: Sorted list of file paths to example PNG images.
"""
print('Loading example img list ...')
return sorted(glob('./assets/example_images/**/*.png', recursive=True))
def get_example_txt_list():
"""
Load and return a list of example text prompts.
Reads lines from the './assets/example_prompts.txt' file, stripping whitespace.
Returns:
list[str]: List of example text prompts.
"""
print('Loading example txt list ...')
txt_list = list()
for line in open('./assets/example_prompts.txt', encoding='utf-8'):
txt_list.append(line.strip())
return txt_list
def gen_save_folder(max_size=200):
"""
Generate a new save folder inside SAVE_DIR, maintaining a maximum number of folders.
If the number of existing folders in SAVE_DIR exceeds `max_size`, the oldest folder is removed.
Args:
max_size (int, optional): Maximum number of folders to keep in SAVE_DIR. Defaults to 200.
Returns:
str: Path to the newly created save folder.
"""
os.makedirs(SAVE_DIR, exist_ok=True)
dirs = [f for f in Path(SAVE_DIR).iterdir() if f.is_dir()]
if len(dirs) >= max_size:
oldest_dir = min(dirs, key=lambda x: x.stat().st_ctime)
shutil.rmtree(oldest_dir)
print(f"Removed the oldest folder: {oldest_dir}")
new_folder = os.path.join(SAVE_DIR, str(uuid.uuid4()))
os.makedirs(new_folder, exist_ok=True)
print(f"Created new folder: {new_folder}")
return new_folder
# Removed complex PBR conversion functions - using simple trimesh-based conversion
def export_mesh(mesh, save_folder, textured=False, type='glb'):
"""
Export a mesh to a file in the specified folder, optionally including textures.
Args:
mesh (trimesh.Trimesh): The mesh object to export.
save_folder (str): Directory path where the mesh file will be saved.
textured (bool, optional): Whether to include textures/normals in the export. Defaults to False.
type (str, optional): File format to export ('glb' or 'obj' supported). Defaults to 'glb'.
Returns:
str: The full path to the exported mesh file.
"""
if textured:
path = os.path.join(save_folder, f'textured_mesh.{type}')
else:
path = os.path.join(save_folder, f'white_mesh.{type}')
if type not in ['glb', 'obj']:
mesh.export(path)
else:
mesh.export(path, include_normals=textured)
return path
def quick_convert_with_obj2gltf(obj_path: str, glb_path: str) -> bool:
# 执行转换
textures = {
'albedo': obj_path.replace('.obj', '.jpg'),
'metallic': obj_path.replace('.obj', '_metallic.jpg'),
'roughness': obj_path.replace('.obj', '_roughness.jpg')
}
create_glb_with_pbr_materials(obj_path, textures, glb_path)
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
if randomize_seed:
seed = random.randint(0, MAX_SEED)
return seed
def build_model_viewer_html(save_folder, height=660, width=790, textured=False):
# Remove first folder from path to make relative path
if textured:
related_path = f"./textured_mesh.glb"
template_name = './assets/modelviewer-textured-template.html'
output_html_path = os.path.join(save_folder, f'textured_mesh.html')
else:
related_path = f"./white_mesh.glb"
template_name = './assets/modelviewer-template.html'
output_html_path = os.path.join(save_folder, f'white_mesh.html')
offset = 50 if textured else 10
with open(os.path.join(CURRENT_DIR, template_name), 'r', encoding='utf-8') as f:
template_html = f.read()
with open(output_html_path, 'w', encoding='utf-8') as f:
template_html = template_html.replace('#height#', f'{height - offset}')
template_html = template_html.replace('#width#', f'{width}')
template_html = template_html.replace('#src#', f'{related_path}/')
f.write(template_html)
rel_path = os.path.relpath(output_html_path, SAVE_DIR)
iframe_tag = f'<iframe src="/static/{rel_path}" \
height="{height}" width="100%" frameborder="0"></iframe>'
print(f'Find html file {output_html_path}, \
{os.path.exists(output_html_path)}, relative HTML path is /static/{rel_path}')
return f"""
<div style='height: {height}; width: 100%;'>
{iframe_tag}
</div>
"""
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# VRAM management helpers (used when --low_vram_mode is set)
#
# Adaptive strategy based on available system RAM:
#
# When switching from shape → texture (or vice versa):
# 1. Check available RAM via /proc/meminfo
# 2. If enough RAM to hold a model in CPU while loading the other (~17GB):
# → .to('cpu') the outgoing model (fast, no disk reload needed later)
# 3. If RAM is tight:
# → fully del the outgoing model, reload from disk later (~20-30s)
#
# This allows machines with ≥32GB RAM to swap models instantly,
# while 16GB machines safely fall back to disk reload.
# ---------------------------------------------------------------------------
# Approximate RAM required (GB) to hold one model in CPU while loading another.
# With mmap=True loading, staging a model needs ~0 extra heap RAM.
# So threshold = size of model in CPU RAM = ~7.5GB, plus 3GB headroom = 10.5GB.
# With 16GB total, we need at least ~10.5GB free to safely offload i23d to CPU.
_RAM_THRESHOLD_GB = 10.5
# Track whether i23d is offloaded to CPU RAM (vs deleted entirely).
_i23d_on_cpu = False
def _get_available_ram_gb():
"""Return available system RAM in GB from /proc/meminfo."""
try:
with open('/proc/meminfo') as f:
for line in f:
if line.startswith('MemAvailable:'):
return int(line.split()[1]) / (1024 * 1024)
except Exception:
pass
return 0.0
def _can_offload_to_cpu():
"""Check if there's enough RAM to keep i23d in CPU while loading tex."""
available = _get_available_ram_gb()
can = available >= _RAM_THRESHOLD_GB
logger.info(
f"RAM check: {available:.1f}GB available, "
f"need {_RAM_THRESHOLD_GB:.1f}GB for CPU offload → "
f"{'CPU offload (fast)' if can else 'full delete (safe)'}"
)
return can
def _prepare_for_tex():
"""Free VRAM from shape model before loading texture pipeline.
In low_vram_mode the shape model is always fully deleted so that its
~7.25 GB of VRAM is completely free before the texture pipeline loads.
CPU-offload path is only considered when low_vram_mode is disabled.
"""
global i23d_worker, _i23d_on_cpu
if i23d_worker is None:
_ensure_tex_pipeline()
return
if not args.low_vram_mode and _can_offload_to_cpu():
logger.info("Offloading shape model to CPU RAM (fast path)...")
i23d_worker.to('cpu')
_i23d_on_cpu = True
gc.collect()
_malloc_trim()
torch.cuda.empty_cache()
else:
logger.info("Deleting shape model entirely (low_vram path)...")
del i23d_worker
i23d_worker = None
_i23d_on_cpu = False
gc.collect()
gc.collect()
_malloc_trim()
torch.cuda.empty_cache()
_ensure_tex_pipeline()
def _ensure_i23d_worker():
"""Load shape model to GPU.
In low_vram_mode always reload from disk (CPU-offload path is never used).
"""
global i23d_worker, _i23d_on_cpu
if not args.low_vram_mode and i23d_worker is not None and _i23d_on_cpu:
logger.info("Restoring shape model from CPU to GPU (fast path)...")
i23d_worker.to(args.device)
_i23d_on_cpu = False
elif i23d_worker is None:
logger.info("Reloading shape model from disk to GPU...")
gc.collect()
_malloc_trim()
torch.cuda.empty_cache()
from hy3dshape import Hunyuan3DDiTFlowMatchingPipeline
i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
args.model_path,
subfolder=args.subfolder,
use_safetensors=False,
device=args.device,
)
_i23d_on_cpu = False
# else: already on GPU, nothing to do
def _unload_tex_pipeline():
"""Delete texture pipeline entirely, freeing its VRAM."""
global tex_pipeline
if tex_pipeline is not None:
logger.info("Unloading texture pipeline from memory...")
del tex_pipeline
tex_pipeline = None
gc.collect()
gc.collect()
_malloc_trim()
torch.cuda.empty_cache()
def _ensure_tex_pipeline():
"""Load texture pipeline to GPU if not already loaded."""
global tex_pipeline
if tex_pipeline is None and tex_conf is not None:
gc.collect()
_malloc_trim()
torch.cuda.empty_cache()
from hy3dpaint.textureGenPipeline import Hunyuan3DPaintPipeline
logger.info("Loading texture pipeline to GPU...")
tex_pipeline = Hunyuan3DPaintPipeline(tex_conf)
@spaces.GPU(duration=60)
def _gen_shape(
caption=None,
image=None,
mv_image_front=None,
mv_image_back=None,
mv_image_left=None,
mv_image_right=None,
steps=50,
guidance_scale=7.5,
seed=1234,
octree_resolution=256,
check_box_rembg=False,
num_chunks=200000,
randomize_seed: bool = False,
):
if not MV_MODE and image is None and caption is None:
raise gr.Error("Please provide either a caption or an image.")
if MV_MODE:
if mv_image_front is None and mv_image_back is None \
and mv_image_left is None and mv_image_right is None:
raise gr.Error("Please provide at least one view image.")
image = {}
if mv_image_front:
image['front'] = mv_image_front
if mv_image_back:
image['back'] = mv_image_back
if mv_image_left:
image['left'] = mv_image_left
if mv_image_right:
image['right'] = mv_image_right
seed = int(randomize_seed_fn(seed, randomize_seed))
octree_resolution = int(octree_resolution)
if caption: print('prompt is', caption)
save_folder = gen_save_folder()
stats = {
'model': {
'shapegen': f'{args.model_path}/{args.subfolder}',
'texgen': f'{args.texgen_model_path}',
},
'params': {
'caption': caption,
'steps': steps,
'guidance_scale': guidance_scale,
'seed': seed,
'octree_resolution': octree_resolution,
'check_box_rembg': check_box_rembg,
'num_chunks': num_chunks,
}
}
time_meta = {}
if image is None:
start_time = time.time()
try:
image = t2i_worker(caption)
except Exception as e:
raise gr.Error(f"Text to 3D is disable. \
Please enable it by `python gradio_app.py --enable_t23d`.")
time_meta['text2image'] = time.time() - start_time
# remove disk io to make responding faster, uncomment at your will.
# image.save(os.path.join(save_folder, 'input.png'))
if MV_MODE:
start_time = time.time()
for k, v in image.items():
if check_box_rembg or v.mode == "RGB":
img = rmbg_worker(v.convert('RGB'))
image[k] = img
time_meta['remove background'] = time.time() - start_time
else:
if check_box_rembg or image.mode == "RGB":
start_time = time.time()
image = rmbg_worker(image.convert('RGB'))
time_meta['remove background'] = time.time() - start_time
# remove disk io to make responding faster, uncomment at your will.
# image.save(os.path.join(save_folder, 'rembg.png'))
# image to white model
start_time = time.time()
if args.low_vram_mode:
_ensure_i23d_worker()
generator = torch.Generator()
generator = generator.manual_seed(int(seed))
outputs = i23d_worker(
image=image,
num_inference_steps=steps,
guidance_scale=guidance_scale,
generator=generator,
octree_resolution=octree_resolution,
num_chunks=num_chunks,
output_type='mesh'
)
time_meta['shape generation'] = time.time() - start_time
logger.info("---Shape generation takes %s seconds ---" % (time.time() - start_time))
tmp_start = time.time()
mesh = export_to_trimesh(outputs)[0]
time_meta['export to trimesh'] = time.time() - tmp_start
stats['number_of_faces'] = mesh.faces.shape[0]
stats['number_of_vertices'] = mesh.vertices.shape[0]
stats['time'] = time_meta
main_image = image if not MV_MODE else image['front']
return mesh, main_image, save_folder, stats, seed
@spaces.GPU(duration=60)
def generation_all(
caption=None,
image=None,
mv_image_front=None,
mv_image_back=None,
mv_image_left=None,
mv_image_right=None,
steps=50,
guidance_scale=7.5,
seed=1234,
octree_resolution=256,
check_box_rembg=False,
num_chunks=200000,
randomize_seed: bool = False,
):
import os as _os
def _rss_mb():
try:
with open('/proc/self/status') as _f:
for _l in _f:
if _l.startswith('VmRSS:'):
return int(_l.split()[1]) // 1024
except Exception:
pass
return 0
def _rlog(label):
vram = torch.cuda.memory_allocated() // (1024*1024)
logger.info(f"[MEM] {label:40s} RSS={_rss_mb():6d} MB VRAM={vram:5d} MB")
# Proactively free any memory left over from previous generations so that
# fresh model loading starts from the lowest possible RSS baseline.
gc.collect(2)
_malloc_trim()
torch.cuda.empty_cache()
_rlog("generation_all start")
start_time_0 = time.time()
mesh, image, save_folder, stats, seed = _gen_shape(
caption,
image,
mv_image_front=mv_image_front,
mv_image_back=mv_image_back,
mv_image_left=mv_image_left,
mv_image_right=mv_image_right,
steps=steps,
guidance_scale=guidance_scale,
seed=seed,
octree_resolution=octree_resolution,
check_box_rembg=check_box_rembg,
num_chunks=num_chunks,
randomize_seed=randomize_seed,
)
_rlog("after _gen_shape")
path = export_mesh(mesh, save_folder, textured=False)
print(path)
print('='*40)
tmp_time = time.time()
mesh = face_reduce_worker(mesh)
# path = export_mesh(mesh, save_folder, textured=False, type='glb')
path = export_mesh(mesh, save_folder, textured=False, type='obj') # 这样操作也会 core dump
logger.info("---Face Reduction takes %s seconds ---" % (time.time() - tmp_time))
stats['time']['face reduction'] = time.time() - tmp_time
_rlog("after face reduction")
tmp_time = time.time()
text_path = os.path.join(save_folder, f'textured_mesh.obj')
# In low_vram_mode: delete shape model then load texture pipeline.
if args.low_vram_mode:
_prepare_for_tex()
_rlog("after _prepare_for_tex (shape deleted, tex loaded)")
path_textured = tex_pipeline(mesh_path=path, image_path=image, output_mesh_path=text_path, save_glb=False)
_rlog("after tex_pipeline inference")
# Unload texture pipeline after use so VRAM is free for the next shape request.
if args.low_vram_mode:
_unload_tex_pipeline()
_rlog("after _unload_tex_pipeline")
logger.info("---Texture Generation takes %s seconds ---" % (time.time() - tmp_time))
stats['time']['texture generation'] = time.time() - tmp_time
tmp_time = time.time()
# Convert textured OBJ to GLB using obj2gltf with PBR support
glb_path_textured = os.path.join(save_folder, 'textured_mesh.glb')
conversion_success = quick_convert_with_obj2gltf(path_textured, glb_path_textured)
logger.info("---Convert textured OBJ to GLB takes %s seconds ---" % (time.time() - tmp_time))
stats['time']['convert textured OBJ to GLB'] = time.time() - tmp_time
stats['time']['total'] = time.time() - start_time_0
model_viewer_html_textured = build_model_viewer_html(save_folder,
height=HTML_HEIGHT,
width=HTML_WIDTH, textured=True)
if args.low_vram_mode:
torch.cuda.empty_cache()
_rlog("generation_all complete")
return (
gr.update(value=path),
gr.update(value=glb_path_textured),
model_viewer_html_textured,
stats,
seed,
)
@spaces.GPU(duration=60)
def shape_generation(
caption=None,
image=None,
mv_image_front=None,
mv_image_back=None,
mv_image_left=None,
mv_image_right=None,
steps=50,
guidance_scale=7.5,
seed=1234,
octree_resolution=256,
check_box_rembg=False,
num_chunks=200000,
randomize_seed: bool = False,
):
start_time_0 = time.time()
mesh, image, save_folder, stats, seed = _gen_shape(
caption,
image,
mv_image_front=mv_image_front,
mv_image_back=mv_image_back,
mv_image_left=mv_image_left,
mv_image_right=mv_image_right,
steps=steps,
guidance_scale=guidance_scale,
seed=seed,
octree_resolution=octree_resolution,
check_box_rembg=check_box_rembg,
num_chunks=num_chunks,
randomize_seed=randomize_seed,
)
stats['time']['total'] = time.time() - start_time_0
mesh.metadata['extras'] = stats
path = export_mesh(mesh, save_folder, textured=False)
model_viewer_html = build_model_viewer_html(save_folder, height=HTML_HEIGHT, width=HTML_WIDTH)
if args.low_vram_mode:
torch.cuda.empty_cache()
return (
gr.update(value=path),
model_viewer_html,
stats,
seed,
)
def build_app():
title = 'Hunyuan3D-2: High Resolution Textured 3D Assets Generation'
if MV_MODE:
title = 'Hunyuan3D-2mv: Image to 3D Generation with 1-4 Views'
if 'mini' in args.subfolder:
title = 'Hunyuan3D-2mini: Strong 0.6B Image to Shape Generator'
title = 'Hunyuan-3D-2.1'
if TURBO_MODE:
title = title.replace(':', '-Turbo: Fast ')
title_html = f"""
<div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
{title}
</div>
<div align="center">
Tencent Hunyuan3D Team
</div>
"""
custom_css = """
.app.svelte-wpkpf6.svelte-wpkpf6:not(.fill_width) {
max-width: 1480px;
}
.mv-image button .wrap {
font-size: 10px;
}
.mv-image .icon-wrap {
width: 20px;
}
"""
with gr.Blocks(theme=gr.themes.Base(), title='Hunyuan-3D-2.1', analytics_enabled=False, css=custom_css) as demo:
gr.HTML(title_html)
with gr.Row():
with gr.Column(scale=3):
with gr.Tabs(selected='tab_img_prompt') as tabs_prompt:
with gr.Tab('Image Prompt', id='tab_img_prompt', visible=not MV_MODE) as tab_ip:
image = gr.Image(label='Image', type='pil', image_mode='RGBA', height=290)
caption = gr.State(None)
# with gr.Tab('Text Prompt', id='tab_txt_prompt', visible=HAS_T2I and not MV_MODE) as tab_tp:
# caption = gr.Textbox(label='Text Prompt',
# placeholder='HunyuanDiT will be used to generate image.',
# info='Example: A 3D model of a cute cat, white background')
with gr.Tab('MultiView Prompt', visible=MV_MODE) as tab_mv:
# gr.Label('Please upload at least one front image.')
with gr.Row():
mv_image_front = gr.Image(label='Front', type='pil', image_mode='RGBA', height=140,
min_width=100, elem_classes='mv-image')
mv_image_back = gr.Image(label='Back', type='pil', image_mode='RGBA', height=140,
min_width=100, elem_classes='mv-image')
with gr.Row():
mv_image_left = gr.Image(label='Left', type='pil', image_mode='RGBA', height=140,
min_width=100, elem_classes='mv-image')
mv_image_right = gr.Image(label='Right', type='pil', image_mode='RGBA', height=140,
min_width=100, elem_classes='mv-image')
with gr.Row():
btn = gr.Button(value='Gen Shape', variant='primary', min_width=100)
btn_all = gr.Button(value='Gen Textured Shape',
variant='primary',
visible=HAS_TEXTUREGEN,
min_width=100)
with gr.Group():
file_out = gr.File(label="File", visible=False)
file_out2 = gr.File(label="File", visible=False)
with gr.Tabs(selected='tab_options' if TURBO_MODE else 'tab_export'):
with gr.Tab("Options", id='tab_options', visible=TURBO_MODE):
gen_mode = gr.Radio(
label='Generation Mode',
info='Recommendation: Turbo for most cases, \
Fast for very complex cases, Standard seldom use.',
choices=['Turbo', 'Fast', 'Standard'],
value='Turbo')
decode_mode = gr.Radio(
label='Decoding Mode',
info='The resolution for exporting mesh from generated vectset',
choices=['Low', 'Standard', 'High'],
value='Standard')
with gr.Tab('Advanced Options', id='tab_advanced_options'):
with gr.Row():
check_box_rembg = gr.Checkbox(
value=True,
label='Remove Background',
min_width=100)
randomize_seed = gr.Checkbox(
label="Randomize seed",
value=True,
min_width=100)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=1234,
min_width=100,
)
with gr.Row():
num_steps = gr.Slider(maximum=100,
minimum=1,
value=5 if 'turbo' in args.subfolder else 30,
step=1, label='Inference Steps')
octree_resolution = gr.Slider(maximum=512,
minimum=16,
value=256,
label='Octree Resolution')
with gr.Row():
cfg_scale = gr.Number(value=5.0, label='Guidance Scale', min_width=100)
num_chunks = gr.Slider(maximum=5000000, minimum=1000, value=8000,
label='Number of Chunks', min_width=100)
with gr.Tab("Export", id='tab_export'):
with gr.Row():
file_type = gr.Dropdown(label='File Type',
choices=SUPPORTED_FORMATS,
value='glb', min_width=100)
reduce_face = gr.Checkbox(label='Simplify Mesh',
value=False, min_width=100)
export_texture = gr.Checkbox(label='Include Texture', value=False,
visible=False, min_width=100)
target_face_num = gr.Slider(maximum=1000000, minimum=100, value=10000,
label='Target Face Number')
with gr.Row():
confirm_export = gr.Button(value="Transform", min_width=100)
file_export = gr.DownloadButton(label="Download", variant='primary',
interactive=False, min_width=100)
with gr.Column(scale=6):
with gr.Tabs(selected='gen_mesh_panel') as tabs_output:
with gr.Tab('Generated Mesh', id='gen_mesh_panel'):
html_gen_mesh = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output')
with gr.Tab('Exporting Mesh', id='export_mesh_panel'):
html_export_mesh = gr.HTML(HTML_OUTPUT_PLACEHOLDER, label='Output')
with gr.Tab('Mesh Statistic', id='stats_panel'):
stats = gr.Json({}, label='Mesh Stats')
with gr.Column(scale=3 if MV_MODE else 2):
with gr.Tabs(selected='tab_img_gallery') as gallery:
with gr.Tab('Image to 3D Gallery',
id='tab_img_gallery',
visible=not MV_MODE) as tab_gi:
with gr.Row():
gr.Examples(examples=example_is, inputs=[image],
label=None, examples_per_page=18)
tab_ip.select(fn=lambda: gr.update(selected='tab_img_gallery'), outputs=gallery)
#if HAS_T2I:
# tab_tp.select(fn=lambda: gr.update(selected='tab_txt_gallery'), outputs=gallery)
btn.click(
shape_generation,
inputs=[
caption,
image,
mv_image_front,
mv_image_back,
mv_image_left,
mv_image_right,
num_steps,
cfg_scale,
seed,
octree_resolution,
check_box_rembg,
num_chunks,
randomize_seed,
],
outputs=[file_out, html_gen_mesh, stats, seed]
).then(
lambda: (gr.update(visible=False, value=False), gr.update(interactive=True), gr.update(interactive=True),
gr.update(interactive=False)),
outputs=[export_texture, reduce_face, confirm_export, file_export],
).then(
lambda: gr.update(selected='gen_mesh_panel'),
outputs=[tabs_output],
)
btn_all.click(
generation_all,
inputs=[
caption,
image,
mv_image_front,
mv_image_back,
mv_image_left,
mv_image_right,
num_steps,
cfg_scale,
seed,
octree_resolution,
check_box_rembg,
num_chunks,
randomize_seed,
],
outputs=[file_out, file_out2, html_gen_mesh, stats, seed]
).then(
lambda: (gr.update(visible=True, value=True), gr.update(interactive=False), gr.update(interactive=True),
gr.update(interactive=False)),
outputs=[export_texture, reduce_face, confirm_export, file_export],
).then(
lambda: gr.update(selected='gen_mesh_panel'),
outputs=[tabs_output],
)
def on_gen_mode_change(value):
if value == 'Turbo':
return gr.update(value=5)
elif value == 'Fast':
return gr.update(value=10)
else:
return gr.update(value=30)
gen_mode.change(on_gen_mode_change, inputs=[gen_mode], outputs=[num_steps])
def on_decode_mode_change(value):
if value == 'Low':
return gr.update(value=196)
elif value == 'Standard':
return gr.update(value=256)
else:
return gr.update(value=384)
decode_mode.change(on_decode_mode_change, inputs=[decode_mode],
outputs=[octree_resolution])
def on_export_click(file_out, file_out2, file_type,
reduce_face, export_texture, target_face_num):
if file_out is None:
raise gr.Error('Please generate a mesh first.')
print(f'exporting {file_out}')
print(f'reduce face to {target_face_num}')
if export_texture:
mesh = trimesh.load(file_out2)
save_folder = gen_save_folder()
path = export_mesh(mesh, save_folder, textured=True, type=file_type)
# for preview
save_folder = gen_save_folder()
_ = export_mesh(mesh, save_folder, textured=True)
model_viewer_html = build_model_viewer_html(save_folder,
height=HTML_HEIGHT,
width=HTML_WIDTH,
textured=True)
else:
mesh = trimesh.load(file_out)
mesh = floater_remove_worker(mesh)
mesh = degenerate_face_remove_worker(mesh)
if reduce_face:
mesh = face_reduce_worker(mesh, target_face_num)
save_folder = gen_save_folder()
path = export_mesh(mesh, save_folder, textured=False, type=file_type)
# for preview
save_folder = gen_save_folder()
_ = export_mesh(mesh, save_folder, textured=False)
model_viewer_html = build_model_viewer_html(save_folder,
height=HTML_HEIGHT,
width=HTML_WIDTH,
textured=False)
print(f'export to {path}')
return model_viewer_html, gr.update(value=path, interactive=True)
confirm_export.click(
lambda: gr.update(selected='export_mesh_panel'),
outputs=[tabs_output],
).then(
on_export_click,
inputs=[file_out, file_out2, file_type, reduce_face, export_texture, target_face_num],
outputs=[html_export_mesh, file_export]
)
return demo
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, default='tencent/Hunyuan3D-2.1')
parser.add_argument("--subfolder", type=str, default='hunyuan3d-dit-v2-1')
parser.add_argument("--texgen_model_path", type=str, default='tencent/Hunyuan3D-2.1')
parser.add_argument('--port', type=int, default=8080)
parser.add_argument('--host', type=str, default='0.0.0.0')
parser.add_argument('--device', type=str, default='cuda')
parser.add_argument('--mc_algo', type=str, default='mc')
parser.add_argument('--cache-path', type=str, default='./save_dir')
parser.add_argument('--enable_t23d', action='store_true')
parser.add_argument('--disable_tex', action='store_true')
parser.add_argument('--enable_flashvdm', action='store_true')
parser.add_argument('--compile', action='store_true')
parser.add_argument('--low_vram_mode', action='store_true')
args = parser.parse_args()
SAVE_DIR = args.cache_path
os.makedirs(SAVE_DIR, exist_ok=True)
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
MV_MODE = 'mv' in args.model_path
TURBO_MODE = 'turbo' in args.subfolder
HTML_HEIGHT = 690 if MV_MODE else 650
HTML_WIDTH = 500
HTML_OUTPUT_PLACEHOLDER = f"""
<div style='height: {650}px; width: 100%; border-radius: 8px; border-color: #e5e7eb; border-style: solid; border-width: 1px; display: flex; justify-content: center; align-items: center;'>
<div style='text-align: center; font-size: 16px; color: #6b7280;'>
<p style="color: #8d8d8d;">Welcome to Hunyuan3D!</p>
<p style="color: #8d8d8d;">No mesh here.</p>
</div>
</div>
"""
INPUT_MESH_HTML = """
<div style='height: 490px; width: 100%; border-radius: 8px;
border-color: #e5e7eb; order-style: solid; border-width: 1px;'>
</div>
"""
example_is = get_example_img_list()
example_ts = get_example_txt_list()
SUPPORTED_FORMATS = ['glb', 'obj', 'ply', 'stl']
HAS_TEXTUREGEN = False
if not args.disable_tex:
try:
# Apply torchvision fix before importing basicsr/RealESRGAN
print("Applying torchvision compatibility fix for texture generation...")
try:
from torchvision_fix import apply_fix
fix_result = apply_fix()
if not fix_result:
print("Warning: Torchvision fix may not have been applied successfully")
except Exception as fix_error:
print(f"Warning: Failed to apply torchvision fix: {fix_error}")
# from hy3dgen.texgen import Hunyuan3DPaintPipeline
# texgen_worker = Hunyuan3DPaintPipeline.from_pretrained(args.texgen_model_path)
# if args.low_vram_mode:
# texgen_worker.enable_model_cpu_offload()
from hy3dpaint.textureGenPipeline import Hunyuan3DPaintPipeline, Hunyuan3DPaintConfig
tex_conf = Hunyuan3DPaintConfig(max_num_view=9, resolution=512)
tex_conf.realesrgan_ckpt_path = "hy3dpaint/ckpt/RealESRGAN_x4plus.pth"
tex_conf.multiview_cfg_path = "hy3dpaint/cfgs/hunyuan-paint-pbr.yaml"
tex_conf.custom_pipeline = "hy3dpaint/hunyuanpaintpbr"
if not args.low_vram_mode:
# Load immediately; in low_vram_mode we load on-demand per request.
tex_pipeline = Hunyuan3DPaintPipeline(tex_conf)
# Not help much, ignore for now.
# if args.compile:
# texgen_worker.models['delight_model'].pipeline.unet.compile()
# texgen_worker.models['delight_model'].pipeline.vae.compile()
# texgen_worker.models['multiview_model'].pipeline.unet.compile()
# texgen_worker.models['multiview_model'].pipeline.vae.compile()
HAS_TEXTUREGEN = True
except Exception as e:
import traceback
traceback.print_exc()
print(f"Error loading texture generator: {e}")
print("Failed to load texture generator.")
print('Please try to install requirements by following README.md')
HAS_TEXTUREGEN = False
HAS_T2I = True
if args.enable_t23d:
from hy3dgen.text2image import HunyuanDiTPipeline
t2i_worker = HunyuanDiTPipeline('Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled')
HAS_T2I = True
from hy3dshape import FaceReducer, FloaterRemover, DegenerateFaceRemover, MeshSimplifier, \
Hunyuan3DDiTFlowMatchingPipeline
from hy3dshape.pipelines import export_to_trimesh
from hy3dshape.rembg import BackgroundRemover
rmbg_worker = BackgroundRemover()
if args.low_vram_mode:
# Defer i23d loading to first request — saves ~7.25GB VRAM at startup
# and avoids keeping it in RAM while tex pipeline loads.
logger.info("low_vram_mode: shape model will be loaded on first request")
else:
i23d_worker = Hunyuan3DDiTFlowMatchingPipeline.from_pretrained(
args.model_path,
subfolder=args.subfolder,
use_safetensors=False,
device=args.device,
)
if args.enable_flashvdm:
mc_algo = 'mc' if args.device in ['cpu', 'mps'] else args.mc_algo
i23d_worker.enable_flashvdm(mc_algo=mc_algo)
if args.compile:
i23d_worker.compile()
floater_remove_worker = FloaterRemover()
degenerate_face_remove_worker = DegenerateFaceRemover()
face_reduce_worker = FaceReducer()
# https://discuss.huggingface.co/t/how-to-serve-an-html-file/33921/2
# create a FastAPI app
app = FastAPI()
# create a static directory to store the static files
static_dir = Path(SAVE_DIR).absolute()
static_dir.mkdir(parents=True, exist_ok=True)
app.mount("/static", StaticFiles(directory=static_dir, html=True), name="static")
shutil.copytree('./assets/env_maps', os.path.join(static_dir, 'env_maps'), dirs_exist_ok=True)
if args.low_vram_mode:
torch.cuda.empty_cache()
demo = build_app()
app = gr.mount_gradio_app(app, demo, path="/")
uvicorn.run(app, host=args.host, port=args.port)