init

2025-06-13 23:53:14 +08:00
parent 70ee89e0a2
commit c88bee648e
581 changed files with 30365 additions and 1 deletions
--- a/hy3dpaint/utils/init.py
+++ b/hy3dpaint/utils/init.py
@@ -0,0 +1,13 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
--- a/hy3dpaint/utils/image_super_utils.py
+++ b/hy3dpaint/utils/image_super_utils.py
@@ -0,0 +1,41 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import numpy as np
+from PIL import Image
+
+
+class imageSuperNet:
+    def __init__(self, config) -> None:
+        from realesrgan import RealESRGANer
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+
+        model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+        upsampler = RealESRGANer(
+            scale=4,
+            model_path=config.realesrgan_ckpt_path,
+            dni_weight=None,
+            model=model,
+            tile=0,
+            tile_pad=10,
+            pre_pad=0,
+            half=True,
+            gpu_id=None,
+        )
+        self.upsampler = upsampler
+
+    def __call__(self, image):
+        output, _ = self.upsampler.enhance(np.array(image))
+        output = Image.fromarray(output)
+        return output
--- a/hy3dpaint/utils/multiview_utils.py
+++ b/hy3dpaint/utils/multiview_utils.py
@@ -0,0 +1,127 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import os
+import torch
+import random
+import numpy as np
+from PIL import Image
+from typing import List
+import huggingface_hub
+from omegaconf import OmegaConf
+from diffusers import DiffusionPipeline
+from diffusers import EulerAncestralDiscreteScheduler, DDIMScheduler, UniPCMultistepScheduler
+
+
+class multiviewDiffusionNet:
+    def __init__(self, config) -> None:
+        self.device = config.device
+
+        cfg_path = config.multiview_cfg_path
+        cfg = OmegaConf.load(cfg_path)
+        self.cfg = cfg
+        self.mode = self.cfg.model.params.stable_diffusion_config.custom_pipeline[2:]
+
+        model_path = huggingface_hub.snapshot_download(
+            repo_id=config.multiview_pretrained_path,
+            allow_patterns=["hunyuan3d-paintpbr-v2-1/*"],
+        )
+
+        model_path = os.path.join(model_path, "hunyuan3d-paintpbr-v2-1")
+        pipeline = DiffusionPipeline.from_pretrained(
+            model_path,
+            custom_pipeline="hunyuanpaintpbr", 
+            torch_dtype=torch.float16
+        )
+
+        pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing")
+        pipeline.set_progress_bar_config(disable=True)
+        pipeline.eval()
+        setattr(pipeline, "view_size", cfg.model.params.get("view_size", 320))
+        self.pipeline = pipeline.to(self.device)
+
+        if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
+            from hunyuanpaintpbr.modules import Dino_v2
+            self.dino_v2 = Dino_v2(config.dino_ckpt_path).to(torch.float16)
+            self.dino_v2 = self.dino_v2.to(self.device)
+
+    def seed_everything(self, seed):
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        os.environ["PL_GLOBAL_SEED"] = str(seed)
+
+    @torch.no_grad()
+    def __call__(self, images, conditions, prompt=None, custom_view_size=None, resize_input=False):
+        pils = self.forward_one(
+            images, conditions, prompt=prompt, custom_view_size=custom_view_size, resize_input=resize_input
+        )
+        return pils
+
+    def forward_one(self, input_images, control_images, prompt=None, custom_view_size=None, resize_input=False):
+        self.seed_everything(0)
+        custom_view_size = custom_view_size if custom_view_size is not None else self.pipeline.view_size
+        if not isinstance(input_images, List):
+            input_images = [input_images]
+        if not resize_input:
+            input_images = [
+                input_image.resize((self.pipeline.view_size, self.pipeline.view_size)) for input_image in input_images
+            ]
+        else:
+            input_images = [input_image.resize((custom_view_size, custom_view_size)) for input_image in input_images]
+        for i in range(len(control_images)):
+            control_images[i] = control_images[i].resize((custom_view_size, custom_view_size))
+            if control_images[i].mode == "L":
+                control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode="1")
+        kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0))
+
+        num_view = len(control_images) // 2
+        normal_image = [[control_images[i] for i in range(num_view)]]
+        position_image = [[control_images[i + num_view] for i in range(num_view)]]
+
+        kwargs["width"] = custom_view_size
+        kwargs["height"] = custom_view_size
+        kwargs["num_in_batch"] = num_view
+        kwargs["images_normal"] = normal_image
+        kwargs["images_position"] = position_image
+
+        if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino:
+            dino_hidden_states = self.dino_v2(input_images[0])
+            kwargs["dino_hidden_states"] = dino_hidden_states
+
+        sync_condition = None
+
+        infer_steps_dict = {
+            "EulerAncestralDiscreteScheduler": 30,
+            "UniPCMultistepScheduler": 15,
+            "DDIMScheduler": 50,
+            "ShiftSNRScheduler": 15,
+        }
+
+        mvd_image = self.pipeline(
+            input_images[0:1],
+            num_inference_steps=infer_steps_dict[self.pipeline.scheduler.__class__.__name__],
+            prompt=prompt,
+            sync_condition=sync_condition,
+            guidance_scale=3.0,
+            **kwargs,
+        ).images
+
+        if "pbr" in self.mode:
+            mvd_image = {"albedo": mvd_image[:num_view], "mr": mvd_image[num_view:]}
+            # mvd_image = {'albedo':mvd_image[:num_view]}
+        else:
+            mvd_image = {"hdr": mvd_image}
+
+        return mvd_image
--- a/hy3dpaint/utils/pipeline_utils.py
+++ b/hy3dpaint/utils/pipeline_utils.py
@@ -0,0 +1,135 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import torch
+import numpy as np
+
+
+class ViewProcessor:
+    def __init__(self, config, render):
+        self.config = config
+        self.render = render
+
+    def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
+        normal_maps = []
+        for elev, azim in zip(camera_elevs, camera_azims):
+            normal_map = self.render.render_normal(elev, azim, use_abs_coor=use_abs_coor, return_type="pl")
+            normal_maps.append(normal_map)
+
+        return normal_maps
+
+    def render_position_multiview(self, camera_elevs, camera_azims):
+        position_maps = []
+        for elev, azim in zip(camera_elevs, camera_azims):
+            position_map = self.render.render_position(elev, azim, return_type="pl")
+            position_maps.append(position_map)
+
+        return position_maps
+
+    def bake_view_selection(
+        self, candidate_camera_elevs, candidate_camera_azims, candidate_view_weights, max_selected_view_num
+    ):
+
+        original_resolution = self.render.default_resolution
+        self.render.set_default_render_resolution(1024)
+
+        selected_camera_elevs = []
+        selected_camera_azims = []
+        selected_view_weights = []
+        selected_alpha_maps = []
+        viewed_tri_idxs = []
+        viewed_masks = []
+
+        # 计算每个三角片的面积
+        face_areas = self.render.get_face_areas(from_one_index=True)
+        total_area = face_areas.sum()
+        face_area_ratios = face_areas / total_area
+
+        candidate_view_num = len(candidate_camera_elevs)
+        self.render.set_boundary_unreliable_scale(2)
+
+        for elev, azim in zip(candidate_camera_elevs, candidate_camera_azims):
+            viewed_tri_idx = self.render.render_alpha(elev, azim, return_type="np")
+            viewed_tri_idxs.append(set(np.unique(viewed_tri_idx.flatten())))
+            viewed_masks.append(viewed_tri_idx[0, :, :, 0] > 0)
+
+        is_selected = [False for _ in range(candidate_view_num)]
+        total_viewed_tri_idxs = set()
+        total_viewed_area = 0.0
+
+        for idx in range(6):
+            selected_camera_elevs.append(candidate_camera_elevs[idx])
+            selected_camera_azims.append(candidate_camera_azims[idx])
+            selected_view_weights.append(candidate_view_weights[idx])
+            selected_alpha_maps.append(viewed_masks[idx])
+            is_selected[idx] = True
+            total_viewed_tri_idxs.update(viewed_tri_idxs[idx])
+
+        total_viewed_area = face_area_ratios[list(total_viewed_tri_idxs)].sum()
+        for iter in range(max_selected_view_num - len(selected_view_weights)):
+            max_inc = 0
+            max_idx = -1
+
+            for idx, (elev, azim, weight) in enumerate(
+                zip(candidate_camera_elevs, candidate_camera_azims, candidate_view_weights)
+            ):
+                if is_selected[idx]:
+                    continue
+                new_tri_idxs = viewed_tri_idxs[idx] - total_viewed_tri_idxs
+                new_inc_area = face_area_ratios[list(new_tri_idxs)].sum()
+
+                if new_inc_area > max_inc:
+                    max_inc = new_inc_area
+                    max_idx = idx
+
+            if max_inc > 0.01:
+                is_selected[max_idx] = True
+                selected_camera_elevs.append(candidate_camera_elevs[max_idx])
+                selected_camera_azims.append(candidate_camera_azims[max_idx])
+                selected_view_weights.append(candidate_view_weights[max_idx])
+                selected_alpha_maps.append(viewed_masks[max_idx])
+                total_viewed_tri_idxs = total_viewed_tri_idxs.union(viewed_tri_idxs[max_idx])
+                total_viewed_area += max_inc
+            else:
+                break
+
+        self.render.set_default_render_resolution(original_resolution)
+
+        return selected_camera_elevs, selected_camera_azims, selected_view_weights
+
+    def bake_from_multiview(self, views, camera_elevs, camera_azims, view_weights):
+        project_textures, project_weighted_cos_maps = [], []
+        project_boundary_maps = []
+
+        for view, camera_elev, camera_azim, weight in zip(views, camera_elevs, camera_azims, view_weights):
+            project_texture, project_cos_map, project_boundary_map = self.render.back_project(
+                view, camera_elev, camera_azim
+            )
+            project_cos_map = weight * (project_cos_map**self.config.bake_exp)
+            project_textures.append(project_texture)
+            project_weighted_cos_maps.append(project_cos_map)
+            project_boundary_maps.append(project_boundary_map)
+            texture, ori_trust_map = self.render.fast_bake_texture(project_textures, project_weighted_cos_maps)
+        return texture, ori_trust_map > 1e-8
+
+    def texture_inpaint(self, texture, mask, defualt=None):
+        if defualt is not None:
+            mask = mask.astype(bool)
+            inpaint_value = torch.tensor(defualt, dtype=texture.dtype, device=texture.device)
+            texture[~mask] = inpaint_value
+        else:
+            texture_np = self.render.uv_inpaint(texture, mask)
+            texture = torch.tensor(texture_np / 255).float().to(texture.device)
+
+        return texture
--- a/hy3dpaint/utils/simplify_mesh_utils.py
+++ b/hy3dpaint/utils/simplify_mesh_utils.py
@@ -0,0 +1,37 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import trimesh
+import pymeshlab
+
+
+def remesh_mesh(mesh_path, remesh_path):
+    mesh = mesh_simplify_trimesh(mesh_path, remesh_path)
+
+
+def mesh_simplify_trimesh(inputpath, outputpath, target_count=40000):
+    # 先去除离散面
+    ms = pymeshlab.MeshSet()
+    if inputpath.endswith(".glb"):
+        ms.load_new_mesh(inputpath, load_in_a_single_layer=True)
+    else:
+        ms.load_new_mesh(inputpath)
+    ms.save_current_mesh(outputpath.replace(".glb", ".obj"), save_textures=False)
+    # 调用减面函数
+    courent = trimesh.load(outputpath.replace(".glb", ".obj"), force="mesh")
+    face_num = courent.faces.shape[0]
+
+    if face_num > target_count:
+        courent = courent.simplify_quadric_decimation(target_count)
+    courent.export(outputpath)
--- a/hy3dpaint/utils/torchvision_fix.py
+++ b/hy3dpaint/utils/torchvision_fix.py
@@ -0,0 +1,111 @@
+# Torchvision compatibility fix for functional_tensor module
+# This file helps resolve compatibility issues between different torchvision versions
+
+import sys
+import torch
+import torchvision
+
+def fix_torchvision_functional_tensor():
+    """
+    Fix torchvision.transforms.functional_tensor import issue
+    """
+    try:
+        # Check if the module exists in the expected location
+        import torchvision.transforms.functional_tensor
+        print("torchvision.transforms.functional_tensor is available")
+        return True
+    except ImportError:
+        print("torchvision.transforms.functional_tensor not found, applying compatibility fix...")
+        
+        try:
+            # Create a mock functional_tensor module with the required functions
+            import torchvision.transforms.functional as F
+            
+            class FunctionalTensorMock:
+                """Mock module to replace functional_tensor"""
+                
+                @staticmethod
+                def _get_grayscale_weights(img):
+                    """Helper to create grayscale weights based on image dimensions"""
+                    weights = torch.tensor([0.299, 0.587, 0.114], device=img.device, dtype=img.dtype)
+                    return weights.view(1, 3, 1, 1) if len(img.shape) == 4 else weights.view(3, 1, 1)
+                
+                @staticmethod
+                def _try_import_fallback(module_names, attr_name):
+                    """Helper to try importing from multiple modules"""
+                    for module_name in module_names:
+                        try:
+                            module = __import__(module_name, fromlist=[attr_name])
+                            if hasattr(module, attr_name):
+                                return getattr(module, attr_name)
+                        except ImportError:
+                            continue
+                    return None
+                
+                @staticmethod
+                def rgb_to_grayscale(img, num_output_channels=1):
+                    """Convert RGB image to grayscale"""
+                    if hasattr(F, 'rgb_to_grayscale'):
+                        return F.rgb_to_grayscale(img, num_output_channels)
+                    
+                    # Fallback implementation
+                    weights = FunctionalTensorMock._get_grayscale_weights(img)
+                    grayscale = torch.sum(img * weights, dim=-3, keepdim=True)
+                    
+                    if num_output_channels == 3:
+                        repeat_dims = (1, 3, 1, 1) if len(img.shape) == 4 else (3, 1, 1)
+                        grayscale = grayscale.repeat(*repeat_dims)
+                    
+                    return grayscale
+                
+                @staticmethod
+                def resize(img, size, interpolation=2, antialias=None):
+                    """Resize function wrapper"""
+                    # Try v2.functional first, then regular functional, then torch.nn.functional
+                    resize_func = FunctionalTensorMock._try_import_fallback([
+                        'torchvision.transforms.v2.functional',
+                        'torchvision.transforms.functional'
+                    ], 'resize')
+                    
+                    if resize_func:
+                        try:
+                            return resize_func(img, size, interpolation=interpolation, antialias=antialias)
+                        except TypeError:
+                            # Fallback for older versions without antialias parameter
+                            return resize_func(img, size, interpolation=interpolation)
+                    
+                    # Final fallback using torch.nn.functional
+                    import torch.nn.functional as torch_F
+                    size = (size, size) if isinstance(size, int) else size
+                    img_input = img.unsqueeze(0) if len(img.shape) == 3 else img
+                    return torch_F.interpolate(img_input, size=size, mode='bilinear', align_corners=False)
+                
+                def __getattr__(self, name):
+                    """Fallback to regular functional module"""
+                    func = self._try_import_fallback([
+                        'torchvision.transforms.functional',
+                        'torchvision.transforms.v2.functional'
+                    ], name)
+                    
+                    if func:
+                        return func
+                    
+                    raise AttributeError(f"'{name}' not found in functional_tensor mock")
+            
+            # Create the mock module instance and monkey patch
+            sys.modules['torchvision.transforms.functional_tensor'] = FunctionalTensorMock()
+            print("Applied compatibility fix: created functional_tensor mock module")
+            return True
+            
+        except Exception as e:
+            print(f"Failed to create functional_tensor mock: {e}")
+            return False
+
+def apply_fix():
+    """Apply the torchvision compatibility fix"""
+    print(f"Torchvision version: {torchvision.__version__}")
+    return fix_torchvision_functional_tensor()
+
+if __name__ == "__main__":
+    apply_fix() 
+    
--- a/hy3dpaint/utils/uvwrap_utils.py
+++ b/hy3dpaint/utils/uvwrap_utils.py
@@ -0,0 +1,32 @@
+# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
+# except for the third-party components listed below.
+# Hunyuan 3D does not impose any additional limitations beyond what is outlined
+# in the repsective licenses of these third-party components.
+# Users must comply with all terms and conditions of original licenses of these third-party
+# components and must ensure that the usage of the third party components adheres to
+# all relevant laws and regulations.
+
+# For avoidance of doubts, Hunyuan 3D means the large language models and
+# their software and algorithms, including trained model weights, parameters (including
+# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
+# fine-tuning enabling code and other elements of the foregoing made publicly available
+# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
+
+import trimesh
+import xatlas
+
+
+def mesh_uv_wrap(mesh):
+    if isinstance(mesh, trimesh.Scene):
+        mesh = mesh.dump(concatenate=True)
+
+    if len(mesh.faces) > 500000000:
+        raise ValueError("The mesh has more than 500,000,000 faces, which is not supported.")
+
+    vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
+
+    mesh.vertices = mesh.vertices[vmapping]
+    mesh.faces = indices
+    mesh.visual.uv = uvs
+
+    return mesh