This commit is contained in:
Huiwenshi
2025-06-13 23:53:14 +08:00
parent 70ee89e0a2
commit c88bee648e
581 changed files with 30365 additions and 1 deletions

View File

@@ -0,0 +1,13 @@
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.

View File

@@ -0,0 +1,41 @@
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
import numpy as np
from PIL import Image
class imageSuperNet:
def __init__(self, config) -> None:
from realesrgan import RealESRGANer
from basicsr.archs.rrdbnet_arch import RRDBNet
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
upsampler = RealESRGANer(
scale=4,
model_path=config.realesrgan_ckpt_path,
dni_weight=None,
model=model,
tile=0,
tile_pad=10,
pre_pad=0,
half=True,
gpu_id=None,
)
self.upsampler = upsampler
def __call__(self, image):
output, _ = self.upsampler.enhance(np.array(image))
output = Image.fromarray(output)
return output

View File

@@ -0,0 +1,127 @@
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
import os
import torch
import random
import numpy as np
from PIL import Image
from typing import List
import huggingface_hub
from omegaconf import OmegaConf
from diffusers import DiffusionPipeline
from diffusers import EulerAncestralDiscreteScheduler, DDIMScheduler, UniPCMultistepScheduler
class multiviewDiffusionNet:
def __init__(self, config) -> None:
self.device = config.device
cfg_path = config.multiview_cfg_path
cfg = OmegaConf.load(cfg_path)
self.cfg = cfg
self.mode = self.cfg.model.params.stable_diffusion_config.custom_pipeline[2:]
model_path = huggingface_hub.snapshot_download(
repo_id=config.multiview_pretrained_path,
allow_patterns=["hunyuan3d-paintpbr-v2-1/*"],
)
model_path = os.path.join(model_path, "hunyuan3d-paintpbr-v2-1")
pipeline = DiffusionPipeline.from_pretrained(
model_path,
custom_pipeline="hunyuanpaintpbr",
torch_dtype=torch.float16
)
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing")
pipeline.set_progress_bar_config(disable=True)
pipeline.eval()
setattr(pipeline, "view_size", cfg.model.params.get("view_size", 320))
self.pipeline = pipeline.to(self.device)
if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
from hunyuanpaintpbr.modules import Dino_v2
self.dino_v2 = Dino_v2(config.dino_ckpt_path).to(torch.float16)
self.dino_v2 = self.dino_v2.to(self.device)
def seed_everything(self, seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PL_GLOBAL_SEED"] = str(seed)
@torch.no_grad()
def __call__(self, images, conditions, prompt=None, custom_view_size=None, resize_input=False):
pils = self.forward_one(
images, conditions, prompt=prompt, custom_view_size=custom_view_size, resize_input=resize_input
)
return pils
def forward_one(self, input_images, control_images, prompt=None, custom_view_size=None, resize_input=False):
self.seed_everything(0)
custom_view_size = custom_view_size if custom_view_size is not None else self.pipeline.view_size
if not isinstance(input_images, List):
input_images = [input_images]
if not resize_input:
input_images = [
input_image.resize((self.pipeline.view_size, self.pipeline.view_size)) for input_image in input_images
]
else:
input_images = [input_image.resize((custom_view_size, custom_view_size)) for input_image in input_images]
for i in range(len(control_images)):
control_images[i] = control_images[i].resize((custom_view_size, custom_view_size))
if control_images[i].mode == "L":
control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode="1")
kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
num_view = len(control_images) // 2
normal_image = [[control_images[i] for i in range(num_view)]]
position_image = [[control_images[i + num_view] for i in range(num_view)]]
kwargs["width"] = custom_view_size
kwargs["height"] = custom_view_size
kwargs["num_in_batch"] = num_view
kwargs["images_normal"] = normal_image
kwargs["images_position"] = position_image
if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
dino_hidden_states = self.dino_v2(input_images[0])
kwargs["dino_hidden_states"] = dino_hidden_states
sync_condition = None
infer_steps_dict = {
"EulerAncestralDiscreteScheduler": 30,
"UniPCMultistepScheduler": 15,
"DDIMScheduler": 50,
"ShiftSNRScheduler": 15,
}
mvd_image = self.pipeline(
input_images[0:1],
num_inference_steps=infer_steps_dict[self.pipeline.scheduler.__class__.__name__],
prompt=prompt,
sync_condition=sync_condition,
guidance_scale=3.0,
**kwargs,
).images
if "pbr" in self.mode:
mvd_image = {"albedo": mvd_image[:num_view], "mr": mvd_image[num_view:]}
# mvd_image = {'albedo':mvd_image[:num_view]}
else:
mvd_image = {"hdr": mvd_image}
return mvd_image

View File

@@ -0,0 +1,135 @@
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
import torch
import numpy as np
class ViewProcessor:
def __init__(self, config, render):
self.config = config
self.render = render
def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
normal_maps = []
for elev, azim in zip(camera_elevs, camera_azims):
normal_map = self.render.render_normal(elev, azim, use_abs_coor=use_abs_coor, return_type="pl")
normal_maps.append(normal_map)
return normal_maps
def render_position_multiview(self, camera_elevs, camera_azims):
position_maps = []
for elev, azim in zip(camera_elevs, camera_azims):
position_map = self.render.render_position(elev, azim, return_type="pl")
position_maps.append(position_map)
return position_maps
def bake_view_selection(
self, candidate_camera_elevs, candidate_camera_azims, candidate_view_weights, max_selected_view_num
):
original_resolution = self.render.default_resolution
self.render.set_default_render_resolution(1024)
selected_camera_elevs = []
selected_camera_azims = []
selected_view_weights = []
selected_alpha_maps = []
viewed_tri_idxs = []
viewed_masks = []
# 计算每个三角片的面积
face_areas = self.render.get_face_areas(from_one_index=True)
total_area = face_areas.sum()
face_area_ratios = face_areas / total_area
candidate_view_num = len(candidate_camera_elevs)
self.render.set_boundary_unreliable_scale(2)
for elev, azim in zip(candidate_camera_elevs, candidate_camera_azims):
viewed_tri_idx = self.render.render_alpha(elev, azim, return_type="np")
viewed_tri_idxs.append(set(np.unique(viewed_tri_idx.flatten())))
viewed_masks.append(viewed_tri_idx[0, :, :, 0] > 0)
is_selected = [False for _ in range(candidate_view_num)]
total_viewed_tri_idxs = set()
total_viewed_area = 0.0
for idx in range(6):
selected_camera_elevs.append(candidate_camera_elevs[idx])
selected_camera_azims.append(candidate_camera_azims[idx])
selected_view_weights.append(candidate_view_weights[idx])
selected_alpha_maps.append(viewed_masks[idx])
is_selected[idx] = True
total_viewed_tri_idxs.update(viewed_tri_idxs[idx])
total_viewed_area = face_area_ratios[list(total_viewed_tri_idxs)].sum()
for iter in range(max_selected_view_num - len(selected_view_weights)):
max_inc = 0
max_idx = -1
for idx, (elev, azim, weight) in enumerate(
zip(candidate_camera_elevs, candidate_camera_azims, candidate_view_weights)
):
if is_selected[idx]:
continue
new_tri_idxs = viewed_tri_idxs[idx] - total_viewed_tri_idxs
new_inc_area = face_area_ratios[list(new_tri_idxs)].sum()
if new_inc_area > max_inc:
max_inc = new_inc_area
max_idx = idx
if max_inc > 0.01:
is_selected[max_idx] = True
selected_camera_elevs.append(candidate_camera_elevs[max_idx])
selected_camera_azims.append(candidate_camera_azims[max_idx])
selected_view_weights.append(candidate_view_weights[max_idx])
selected_alpha_maps.append(viewed_masks[max_idx])
total_viewed_tri_idxs = total_viewed_tri_idxs.union(viewed_tri_idxs[max_idx])
total_viewed_area += max_inc
else:
break
self.render.set_default_render_resolution(original_resolution)
return selected_camera_elevs, selected_camera_azims, selected_view_weights
def bake_from_multiview(self, views, camera_elevs, camera_azims, view_weights):
project_textures, project_weighted_cos_maps = [], []
project_boundary_maps = []
for view, camera_elev, camera_azim, weight in zip(views, camera_elevs, camera_azims, view_weights):
project_texture, project_cos_map, project_boundary_map = self.render.back_project(
view, camera_elev, camera_azim
)
project_cos_map = weight * (project_cos_map**self.config.bake_exp)
project_textures.append(project_texture)
project_weighted_cos_maps.append(project_cos_map)
project_boundary_maps.append(project_boundary_map)
texture, ori_trust_map = self.render.fast_bake_texture(project_textures, project_weighted_cos_maps)
return texture, ori_trust_map > 1e-8
def texture_inpaint(self, texture, mask, defualt=None):
if defualt is not None:
mask = mask.astype(bool)
inpaint_value = torch.tensor(defualt, dtype=texture.dtype, device=texture.device)
texture[~mask] = inpaint_value
else:
texture_np = self.render.uv_inpaint(texture, mask)
texture = torch.tensor(texture_np / 255).float().to(texture.device)
return texture

View File

@@ -0,0 +1,37 @@
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
import trimesh
import pymeshlab
def remesh_mesh(mesh_path, remesh_path):
mesh = mesh_simplify_trimesh(mesh_path, remesh_path)
def mesh_simplify_trimesh(inputpath, outputpath, target_count=40000):
# 先去除离散面
ms = pymeshlab.MeshSet()
if inputpath.endswith(".glb"):
ms.load_new_mesh(inputpath, load_in_a_single_layer=True)
else:
ms.load_new_mesh(inputpath)
ms.save_current_mesh(outputpath.replace(".glb", ".obj"), save_textures=False)
# 调用减面函数
courent = trimesh.load(outputpath.replace(".glb", ".obj"), force="mesh")
face_num = courent.faces.shape[0]
if face_num > target_count:
courent = courent.simplify_quadric_decimation(target_count)
courent.export(outputpath)

View File

@@ -0,0 +1,111 @@
# Torchvision compatibility fix for functional_tensor module
# This file helps resolve compatibility issues between different torchvision versions
import sys
import torch
import torchvision
def fix_torchvision_functional_tensor():
"""
Fix torchvision.transforms.functional_tensor import issue
"""
try:
# Check if the module exists in the expected location
import torchvision.transforms.functional_tensor
print("torchvision.transforms.functional_tensor is available")
return True
except ImportError:
print("torchvision.transforms.functional_tensor not found, applying compatibility fix...")
try:
# Create a mock functional_tensor module with the required functions
import torchvision.transforms.functional as F
class FunctionalTensorMock:
"""Mock module to replace functional_tensor"""
@staticmethod
def _get_grayscale_weights(img):
"""Helper to create grayscale weights based on image dimensions"""
weights = torch.tensor([0.299, 0.587, 0.114], device=img.device, dtype=img.dtype)
return weights.view(1, 3, 1, 1) if len(img.shape) == 4 else weights.view(3, 1, 1)
@staticmethod
def _try_import_fallback(module_names, attr_name):
"""Helper to try importing from multiple modules"""
for module_name in module_names:
try:
module = __import__(module_name, fromlist=[attr_name])
if hasattr(module, attr_name):
return getattr(module, attr_name)
except ImportError:
continue
return None
@staticmethod
def rgb_to_grayscale(img, num_output_channels=1):
"""Convert RGB image to grayscale"""
if hasattr(F, 'rgb_to_grayscale'):
return F.rgb_to_grayscale(img, num_output_channels)
# Fallback implementation
weights = FunctionalTensorMock._get_grayscale_weights(img)
grayscale = torch.sum(img * weights, dim=-3, keepdim=True)
if num_output_channels == 3:
repeat_dims = (1, 3, 1, 1) if len(img.shape) == 4 else (3, 1, 1)
grayscale = grayscale.repeat(*repeat_dims)
return grayscale
@staticmethod
def resize(img, size, interpolation=2, antialias=None):
"""Resize function wrapper"""
# Try v2.functional first, then regular functional, then torch.nn.functional
resize_func = FunctionalTensorMock._try_import_fallback([
'torchvision.transforms.v2.functional',
'torchvision.transforms.functional'
], 'resize')
if resize_func:
try:
return resize_func(img, size, interpolation=interpolation, antialias=antialias)
except TypeError:
# Fallback for older versions without antialias parameter
return resize_func(img, size, interpolation=interpolation)
# Final fallback using torch.nn.functional
import torch.nn.functional as torch_F
size = (size, size) if isinstance(size, int) else size
img_input = img.unsqueeze(0) if len(img.shape) == 3 else img
return torch_F.interpolate(img_input, size=size, mode='bilinear', align_corners=False)
def __getattr__(self, name):
"""Fallback to regular functional module"""
func = self._try_import_fallback([
'torchvision.transforms.functional',
'torchvision.transforms.v2.functional'
], name)
if func:
return func
raise AttributeError(f"'{name}' not found in functional_tensor mock")
# Create the mock module instance and monkey patch
sys.modules['torchvision.transforms.functional_tensor'] = FunctionalTensorMock()
print("Applied compatibility fix: created functional_tensor mock module")
return True
except Exception as e:
print(f"Failed to create functional_tensor mock: {e}")
return False
def apply_fix():
"""Apply the torchvision compatibility fix"""
print(f"Torchvision version: {torchvision.__version__}")
return fix_torchvision_functional_tensor()
if __name__ == "__main__":
apply_fix()

View File

@@ -0,0 +1,32 @@
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
# except for the third-party components listed below.
# Hunyuan 3D does not impose any additional limitations beyond what is outlined
# in the repsective licenses of these third-party components.
# Users must comply with all terms and conditions of original licenses of these third-party
# components and must ensure that the usage of the third party components adheres to
# all relevant laws and regulations.
# For avoidance of doubts, Hunyuan 3D means the large language models and
# their software and algorithms, including trained model weights, parameters (including
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
# fine-tuning enabling code and other elements of the foregoing made publicly available
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
import trimesh
import xatlas
def mesh_uv_wrap(mesh):
if isinstance(mesh, trimesh.Scene):
mesh = mesh.dump(concatenate=True)
if len(mesh.faces) > 500000000:
raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.")
vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
mesh.vertices = mesh.vertices[vmapping]
mesh.faces = indices
mesh.visual.uv = uvs
return mesh