From b06e6ddf37a859b44bf9380dc9b6543393d5b8b3 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 02:29:25 +0800
Subject: [PATCH 01/16] Update pipelines.py

---
 hy3dshape/hy3dshape/pipelines.py | 108 +++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/hy3dshape/hy3dshape/pipelines.py b/hy3dshape/hy3dshape/pipelines.py
index 71de472..0bb7c8f 100644
--- a/hy3dshape/hy3dshape/pipelines.py
+++ b/hy3dshape/hy3dshape/pipelines.py
@@ -781,3 +781,111 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
             box_v, mc_level, num_chunks, octree_resolution, mc_algo,
             enable_pbar=enable_pbar,
         )
+
+    @classmethod
+    @synchronize_timer('Hunyuan3DDiTFlowMatchingPipeline from Lightning Checkpoint')
+    def from_lightning_checkpoint(
+        cls,
+        ckpt_path: str,
+        config_path: str,
+        device: str = 'cuda',
+        dtype: torch.dtype = torch.float16,
+        **kwargs,
+    ):
+        """
+        Loads a model from a checkpoint created by the project's PyTorch Lightning training script.
+
+        This method correctly handles the nested configuration structure and state_dict prefixes
+        produced during training, and can intelligently load sharded checkpoints saved by Deepspeed.
+
+        Args:
+            ckpt_path (str): Path to the .ckpt checkpoint file or directory.
+            config_path (str): Path to the .yaml configuration file used for training.
+            device (str, optional): The device to load the model on. Defaults to 'cuda'.
+            dtype (torch.dtype, optional): The data type for the model. Defaults to torch.float16.
+
+        Returns:
+            Hunyuan3DDiTFlowMatchingPipeline: An instantiated pipeline ready for inference.
+        """
+        from omegaconf import OmegaConf
+        from hy3dshape.utils.misc import instantiate_from_config
+        from hy3dshape.schedulers import FlowMatchEulerDiscreteScheduler
+
+        logger.info(f"Loading model from Lightning checkpoint: {ckpt_path}")
+        logger.info(f"Using training config: {config_path}")
+
+        config = OmegaConf.load(config_path)
+
+        if os.path.isdir(ckpt_path):
+            # Assumes a Deepspeed-saved checkpoint directory
+            model_state_file = os.path.join(ckpt_path, 'checkpoint', 'mp_rank_00_model_states.pt')
+            if not os.path.exists(model_state_file):
+                raise FileNotFoundError(
+                    f"Could not find model weights file 'mp_rank_00_model_states.pt' in Deepspeed checkpoint directory: {os.path.join(ckpt_path, 'checkpoint')}"
+                )
+            
+            logger.info(f"Detected Deepspeed checkpoint directory, loading weights from: '{model_state_file}'")
+            ckpt = torch.load(model_state_file, map_location='cpu', weights_only=False)
+            # Deepspeed weights are often nested under the 'module' key
+            state_dict = ckpt.get('module', ckpt)
+        else:
+            # Standard .ckpt file
+            logger.info("Detected standard .ckpt file.")
+            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
+            state_dict = ckpt.get('state_dict', ckpt)
+
+        # 1. Instantiate components that were frozen during training.
+        #    They will load their own pretrained weights upon instantiation.
+        logger.info("Instantiating VAE, Conditioner, and ImageProcessor...")
+        vae = instantiate_from_config(config.model.params.first_stage_config)
+        conditioner = instantiate_from_config(config.model.params.cond_stage_config)
+        image_processor = instantiate_from_config(config.model.params.image_processor_cfg)
+
+        # 2. Instantiate the component that was trained (the Denoiser).
+        logger.info("Instantiating Denoiser...")
+        denoiser = instantiate_from_config(config.model.params.denoiser_cfg)
+        
+        # 3. Load weights only for the Denoiser from our training checkpoint.
+        possible_prefixes = ["model.model.", "_forward_module.model.", "model."]
+        denoiser_dict = {}
+        matched_prefix = None
+        for prefix in possible_prefixes:
+            sub_dict = {k.replace(prefix, ''): v for k, v in state_dict.items() if k.startswith(prefix)}
+            if sub_dict:
+                denoiser_dict = sub_dict
+                matched_prefix = prefix
+                break
+                
+        if denoiser_dict:
+            logger.info(f"Successfully matched Denoiser weight prefix: '{matched_prefix}'")
+            missing_keys, unexpected_keys = denoiser.load_state_dict(denoiser_dict, strict=False)
+            logger.info(" Successfully loaded weights for 'denoiser'.")
+            if missing_keys:
+                logger.warning(f"  - Missing keys: {missing_keys}")
+            if unexpected_keys:
+                logger.warning(f"  - Unexpected keys: {unexpected_keys}")
+        else:
+            logger.warning("Could not find weights for 'denoiser' in checkpoint. It will be randomly initialized.")
+
+        # 4. Instantiate a new, inference-compatible scheduler.
+        logger.info("Creating a new scheduler for inference...")
+        scheduler = FlowMatchEulerDiscreteScheduler()
+        
+        # 5. Assemble the final, healthy pipeline.
+        pipeline = cls(
+            model=denoiser,
+            vae=vae,
+            scheduler=scheduler,
+            conditioner=conditioner,
+            image_processor=image_processor,
+            **kwargs,
+        )
+        
+        # 6. Move all model components to the correct device and set to evaluation mode.
+        pipeline.to(torch.device(device), dtype=dtype)
+        pipeline.model.eval()
+        pipeline.vae.eval()
+        pipeline.conditioner.eval()
+        
+        logger.info("\n Pipeline successfully assembled from Lightning checkpoint!")
+        return pipeline

From e34a3ba752a3a2c2f77c5d8270aa4677392839aa Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 02:33:30 +0800
Subject: [PATCH 02/16] Create run_inference_with_fix.py

---
 run_inference_with_fix.py | 56 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 run_inference_with_fix.py

diff --git a/run_inference_with_fix.py b/run_inference_with_fix.py
new file mode 100644
index 0000000..a69f9a2
--- /dev/null
+++ b/run_inference_with_fix.py
@@ -0,0 +1,56 @@
+import os
+import sys
+import torch
+from PIL import Image
+
+# Add the project's sub-directory to the path to allow direct imports
+sys.path.insert(0, os.path.join(os.path.abspath('.'), 'hy3dshape'))
+from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline
+
+# --- 1. Set up the paths for your trained model ---
+
+# The training configuration file you used
+CONFIG_PATH = "./hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml"
+
+# !!! IMPORTANT: Change this to the actual checkpoint directory you generated !!!
+# This should be the path to the directory, e.g., 'ckpt-step=00002000.ckpt'
+CKPT_PATH = "./hy3dshape/output_folder/dit/overfitting_depth_16_token_4096_lr1e4/ckpt/ckpt-step=00002000.ckpt"
+
+# The input image for inference
+IMAGE_PATH = "./assets/demo.png"
+
+# The path where the final 3D model will be saved
+OUTPUT_PATH = "./my_model_output.glb"
+
+
+if __name__ == '__main__':
+    # Setup device and data type
+    if not torch.cuda.is_available():
+        print("Warning: CUDA not available, running on CPU. This will be very slow.")
+        device = torch.device('cpu')
+        # Use float32 on CPU as it does not support bfloat16
+        dtype = torch.float32
+    else:
+        device = torch.device('cuda')
+        # Use the same precision as in training for best results
+        dtype = torch.bfloat16
+
+    print("\n--- Attempting to load the model using the new from_lightning_checkpoint method ---")
+
+    # Load the pipeline using the new, elegant class method
+    pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_lightning_checkpoint(
+        ckpt_path=CKPT_PATH,
+        config_path=CONFIG_PATH,
+        device=str(device),
+        dtype=dtype,
+    )
+
+    print("\n Model loaded successfully! Starting inference...")
+    input_image = Image.open(IMAGE_PATH)
+
+    # Run inference
+    mesh_output = pipeline(image=input_image)[0]
+
+    # Save the result
+    mesh_output.export(OUTPUT_PATH)
+    print(f"\n Inference complete! The 3D model has been saved to: {OUTPUT_PATH}")

From 8cd92830fbac44a39490ee0d98214aad07ae749f Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 15:51:55 +0800
Subject: [PATCH 03/16] Update train_deepspeed.sh

auto detect
---
 hy3dshape/scripts/train_deepspeed.sh | 36 +++++++++++++++++-----------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/hy3dshape/scripts/train_deepspeed.sh b/hy3dshape/scripts/train_deepspeed.sh
index de8c61e..278cf73 100644
--- a/hy3dshape/scripts/train_deepspeed.sh
+++ b/hy3dshape/scripts/train_deepspeed.sh
@@ -1,11 +1,22 @@
 # If: ImportError: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.20' not found 
 # Do: ln /usr/local/gcc-8.3/lib64/libstdc++.so.6 -sf /usr/lib64/libstdc++.so.6
 
+# Try to auto-detect a suitable network interface if NCCL_SOCKET_IFNAME is not already set.
+if [ -z "$NCCL_SOCKET_IFNAME" ]; then
+    # Find the first physical-like interface by excluding common virtual/loopback names.
+    DETECTED_IFACE=$(ls /sys/class/net | grep -vE '^(lo|docker|veth|cali|tunl|kube|ib|usb)' | head -n 1)
+    if [ -n "$DETECTED_IFACE" ]; then
+        echo "NCCL_SOCKET_IFNAME is not set. Auto-detected and exporting: $DETECTED_IFACE"
+        export NCCL_SOCKET_IFNAME=$DETECTED_IFACE
+    else
+        echo "Warning: Could not auto-detect a network interface. You may need to set NCCL_SOCKET_IFNAME manually if NCCL fails."
+    fi
+fi
+
 export NCCL_IB_TIMEOUT=24
 export NCCL_NVLS_ENABLE=0
-NET_TYPE="high"
+NET_TYPE="high" 
 if [[ "${NET_TYPE}" = "low" ]]; then
-    export NCCL_SOCKET_IFNAME=eth1
     export NCCL_IB_GID_INDEX=3
     export NCCL_IB_HCA=mlx5_2:1,mlx5_2:1
     export NCCL_IB_SL=3
@@ -18,12 +29,10 @@ else
     export NCCL_IB_SL=3
     export NCCL_CHECK_DISABLE=1
     export NCCL_P2P_DISABLE=0
-    export NCCL_IB_DISABLE=0
+    export NCCL_IB_DISABLE=1
     export NCCL_LL_THRESHOLD=16384
     export NCCL_IB_CUDA_SUPPORT=1
-    export NCCL_SOCKET_IFNAME=bond1
-    export UCX_NET_DEVICES=bond1
-    export NCCL_IB_HCA=mlx5_bond_1,mlx5_bond_5,mlx5_bond_3,mlx5_bond_7,mlx5_bond_4,mlx5_bond_8,mlx5_bond_2,mlx5_bond_6
+    # DELETED: The hardcoded export NCCL_SOCKET_IFNAME and UCX_NET_DEVICES lines
     export NCCL_COLLNET_ENABLE=0
     export SHARP_COLL_ENABLE_SAT=0
     export NCCL_NET_GDR_LEVEL=2
@@ -40,12 +49,13 @@ master_ip=$4
 config=$5
 output_dir=$6
 
-
-echo node_num $node_num
-echo node_rank $node_rank
-echo master_ip $master_ip
-echo config $config
-echo output_dir $output_dir
+echo "--- Script Arguments ---"
+echo "node_num: $node_num"
+echo "node_rank: $node_rank"
+echo "master_ip: $master_ip"
+echo "config: $config"
+echo "output_dir: $output_dir"
+echo "----------------------"
 
 if test -d "$output_dir"; then
     cp $config $output_dir
@@ -58,7 +68,6 @@ NODE_RANK=$node_rank \
 HF_HUB_OFFLINE=0 \
 MASTER_PORT=12348 \
 MASTER_ADDR=$master_ip \
-NCCL_SOCKET_IFNAME=bond1 \
 NCCL_IB_GID_INDEX=3 \
 NCCL_NVLS_ENABLE=0 \
 python3 main.py \
@@ -67,4 +76,3 @@ python3 main.py \
     --config $config \
     --output_dir $output_dir \
     --deepspeed
-

From f2f19d74a8ca9e76d9dc15b567d4bd5b5ab3e17b Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 15:53:01 +0800
Subject: [PATCH 04/16] Update
 hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml

add explain
---
 ...ng-flowmatching-dinol518-bf16-lr1e4-4096.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
index bc3ec2a..193c5ae 100644
--- a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
+++ b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
@@ -33,6 +33,22 @@ dataset:
     std: &std [0.5, 0.5, 0.5]
 
     #! Point cloud sampling
+    # Important: The total number of points (pc_size + pc_sharpedge_size) is coupled with the
+    # architecture of the pretrained ShapeVAE in `first_stage_config`. You must ensure that
+    # the total number of points provided by the dataset matches the model's expectation.
+    #
+    # For the default pretrained ShapeVAE (`tencent/Hunyuan3D-2.1` VAE), the model
+    # implicitly expects a total of 81920 points.
+    #
+    # Correct configuration (for the default model):
+    #   pc_size: 81920
+    #   pc_sharpedge_size: 0
+    #
+    # Incorrect configuration that will cause a `split_with_sizes` RuntimeError:
+    #   pc_size: 10240
+    #   pc_sharpedge_size: 10240
+    #
+    # If you need to use a different number of points, you must retrain the ShapeVAE model first.
     pc_size: &pc_size 81920
     pc_sharpedge_size: &pc_sharpedge_size 0
     sharpedge_label: &sharpedge_label true

From af935af6881edeb2461a0e98fc3abf549eb88cd6 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:36:46 +0800
Subject: [PATCH 05/16] Update train_deepspeed.sh

---
 hy3dshape/scripts/train_deepspeed.sh | 35 +++++++++++-----------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/hy3dshape/scripts/train_deepspeed.sh b/hy3dshape/scripts/train_deepspeed.sh
index 278cf73..ef9ff9d 100644
--- a/hy3dshape/scripts/train_deepspeed.sh
+++ b/hy3dshape/scripts/train_deepspeed.sh
@@ -1,22 +1,11 @@
 # If: ImportError: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.20' not found 
 # Do: ln /usr/local/gcc-8.3/lib64/libstdc++.so.6 -sf /usr/lib64/libstdc++.so.6
 
-# Try to auto-detect a suitable network interface if NCCL_SOCKET_IFNAME is not already set.
-if [ -z "$NCCL_SOCKET_IFNAME" ]; then
-    # Find the first physical-like interface by excluding common virtual/loopback names.
-    DETECTED_IFACE=$(ls /sys/class/net | grep -vE '^(lo|docker|veth|cali|tunl|kube|ib|usb)' | head -n 1)
-    if [ -n "$DETECTED_IFACE" ]; then
-        echo "NCCL_SOCKET_IFNAME is not set. Auto-detected and exporting: $DETECTED_IFACE"
-        export NCCL_SOCKET_IFNAME=$DETECTED_IFACE
-    else
-        echo "Warning: Could not auto-detect a network interface. You may need to set NCCL_SOCKET_IFNAME manually if NCCL fails."
-    fi
-fi
-
 export NCCL_IB_TIMEOUT=24
 export NCCL_NVLS_ENABLE=0
-NET_TYPE="high" 
+NET_TYPE="high"
 if [[ "${NET_TYPE}" = "low" ]]; then
+    export NCCL_SOCKET_IFNAME=eth1
     export NCCL_IB_GID_INDEX=3
     export NCCL_IB_HCA=mlx5_2:1,mlx5_2:1
     export NCCL_IB_SL=3
@@ -29,10 +18,12 @@ else
     export NCCL_IB_SL=3
     export NCCL_CHECK_DISABLE=1
     export NCCL_P2P_DISABLE=0
-    export NCCL_IB_DISABLE=1
+    export NCCL_IB_DISABLE=0
     export NCCL_LL_THRESHOLD=16384
     export NCCL_IB_CUDA_SUPPORT=1
-    # DELETED: The hardcoded export NCCL_SOCKET_IFNAME and UCX_NET_DEVICES lines
+    export NCCL_SOCKET_IFNAME=bond1
+    export UCX_NET_DEVICES=bond1
+    export NCCL_IB_HCA=mlx5_bond_1,mlx5_bond_5,mlx5_bond_3,mlx5_bond_7,mlx5_bond_4,mlx5_bond_8,mlx5_bond_2,mlx5_bond_6
     export NCCL_COLLNET_ENABLE=0
     export SHARP_COLL_ENABLE_SAT=0
     export NCCL_NET_GDR_LEVEL=2
@@ -49,13 +40,12 @@ master_ip=$4
 config=$5
 output_dir=$6
 
-echo "--- Script Arguments ---"
-echo "node_num: $node_num"
-echo "node_rank: $node_rank"
-echo "master_ip: $master_ip"
-echo "config: $config"
-echo "output_dir: $output_dir"
-echo "----------------------"
+
+echo node_num $node_num
+echo node_rank $node_rank
+echo master_ip $master_ip
+echo config $config
+echo output_dir $output_dir
 
 if test -d "$output_dir"; then
     cp $config $output_dir
@@ -68,6 +58,7 @@ NODE_RANK=$node_rank \
 HF_HUB_OFFLINE=0 \
 MASTER_PORT=12348 \
 MASTER_ADDR=$master_ip \
+NCCL_SOCKET_IFNAME=bond1 \
 NCCL_IB_GID_INDEX=3 \
 NCCL_NVLS_ENABLE=0 \
 python3 main.py \

From de7996251db38eded27eb999ebec5cf377a53138 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:37:32 +0800
Subject: [PATCH 06/16] Update
 hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml

---
 ...ng-flowmatching-dinol518-bf16-lr1e4-4096.yaml | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
index 193c5ae..bc3ec2a 100644
--- a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
+++ b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
@@ -33,22 +33,6 @@ dataset:
     std: &std [0.5, 0.5, 0.5]
 
     #! Point cloud sampling
-    # Important: The total number of points (pc_size + pc_sharpedge_size) is coupled with the
-    # architecture of the pretrained ShapeVAE in `first_stage_config`. You must ensure that
-    # the total number of points provided by the dataset matches the model's expectation.
-    #
-    # For the default pretrained ShapeVAE (`tencent/Hunyuan3D-2.1` VAE), the model
-    # implicitly expects a total of 81920 points.
-    #
-    # Correct configuration (for the default model):
-    #   pc_size: 81920
-    #   pc_sharpedge_size: 0
-    #
-    # Incorrect configuration that will cause a `split_with_sizes` RuntimeError:
-    #   pc_size: 10240
-    #   pc_sharpedge_size: 10240
-    #
-    # If you need to use a different number of points, you must retrain the ShapeVAE model first.
     pc_size: &pc_size 81920
     pc_sharpedge_size: &pc_sharpedge_size 0
     sharpedge_label: &sharpedge_label true

From c6d4cb89e21ddf47fc7c125bef6e32e4b50d6180 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:39:10 +0800
Subject: [PATCH 07/16] Update train_deepspeed.sh


From 6726877bbb6790e7936ab840c94de72e7f3a0b08 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:40:01 +0800
Subject: [PATCH 08/16] Update train_deepspeed.sh


From 96349ad5d0e3a6db3c4a37bbe26239f3136a0614 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:43:40 +0800
Subject: [PATCH 09/16] Update train_deepspeed.sh

---
 hy3dshape/scripts/train_deepspeed.sh | 35 +++++++++++++++++-----------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/hy3dshape/scripts/train_deepspeed.sh b/hy3dshape/scripts/train_deepspeed.sh
index ef9ff9d..278cf73 100644
--- a/hy3dshape/scripts/train_deepspeed.sh
+++ b/hy3dshape/scripts/train_deepspeed.sh
@@ -1,11 +1,22 @@
 # If: ImportError: /usr/lib64/libstdc++.so.6: version `GLIBCXX_3.4.20' not found 
 # Do: ln /usr/local/gcc-8.3/lib64/libstdc++.so.6 -sf /usr/lib64/libstdc++.so.6
 
+# Try to auto-detect a suitable network interface if NCCL_SOCKET_IFNAME is not already set.
+if [ -z "$NCCL_SOCKET_IFNAME" ]; then
+    # Find the first physical-like interface by excluding common virtual/loopback names.
+    DETECTED_IFACE=$(ls /sys/class/net | grep -vE '^(lo|docker|veth|cali|tunl|kube|ib|usb)' | head -n 1)
+    if [ -n "$DETECTED_IFACE" ]; then
+        echo "NCCL_SOCKET_IFNAME is not set. Auto-detected and exporting: $DETECTED_IFACE"
+        export NCCL_SOCKET_IFNAME=$DETECTED_IFACE
+    else
+        echo "Warning: Could not auto-detect a network interface. You may need to set NCCL_SOCKET_IFNAME manually if NCCL fails."
+    fi
+fi
+
 export NCCL_IB_TIMEOUT=24
 export NCCL_NVLS_ENABLE=0
-NET_TYPE="high"
+NET_TYPE="high" 
 if [[ "${NET_TYPE}" = "low" ]]; then
-    export NCCL_SOCKET_IFNAME=eth1
     export NCCL_IB_GID_INDEX=3
     export NCCL_IB_HCA=mlx5_2:1,mlx5_2:1
     export NCCL_IB_SL=3
@@ -18,12 +29,10 @@ else
     export NCCL_IB_SL=3
     export NCCL_CHECK_DISABLE=1
     export NCCL_P2P_DISABLE=0
-    export NCCL_IB_DISABLE=0
+    export NCCL_IB_DISABLE=1
     export NCCL_LL_THRESHOLD=16384
     export NCCL_IB_CUDA_SUPPORT=1
-    export NCCL_SOCKET_IFNAME=bond1
-    export UCX_NET_DEVICES=bond1
-    export NCCL_IB_HCA=mlx5_bond_1,mlx5_bond_5,mlx5_bond_3,mlx5_bond_7,mlx5_bond_4,mlx5_bond_8,mlx5_bond_2,mlx5_bond_6
+    # DELETED: The hardcoded export NCCL_SOCKET_IFNAME and UCX_NET_DEVICES lines
     export NCCL_COLLNET_ENABLE=0
     export SHARP_COLL_ENABLE_SAT=0
     export NCCL_NET_GDR_LEVEL=2
@@ -40,12 +49,13 @@ master_ip=$4
 config=$5
 output_dir=$6
 
-
-echo node_num $node_num
-echo node_rank $node_rank
-echo master_ip $master_ip
-echo config $config
-echo output_dir $output_dir
+echo "--- Script Arguments ---"
+echo "node_num: $node_num"
+echo "node_rank: $node_rank"
+echo "master_ip: $master_ip"
+echo "config: $config"
+echo "output_dir: $output_dir"
+echo "----------------------"
 
 if test -d "$output_dir"; then
     cp $config $output_dir
@@ -58,7 +68,6 @@ NODE_RANK=$node_rank \
 HF_HUB_OFFLINE=0 \
 MASTER_PORT=12348 \
 MASTER_ADDR=$master_ip \
-NCCL_SOCKET_IFNAME=bond1 \
 NCCL_IB_GID_INDEX=3 \
 NCCL_NVLS_ENABLE=0 \
 python3 main.py \

From dc2ea32d76c8645e0d4453a83ff402511f8fe353 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:47:40 +0800
Subject: [PATCH 10/16] Update
 hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml

---
 ...ng-flowmatching-dinol518-bf16-lr1e4-4096.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
index bc3ec2a..193c5ae 100644
--- a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
+++ b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
@@ -33,6 +33,22 @@ dataset:
     std: &std [0.5, 0.5, 0.5]
 
     #! Point cloud sampling
+    # Important: The total number of points (pc_size + pc_sharpedge_size) is coupled with the
+    # architecture of the pretrained ShapeVAE in `first_stage_config`. You must ensure that
+    # the total number of points provided by the dataset matches the model's expectation.
+    #
+    # For the default pretrained ShapeVAE (`tencent/Hunyuan3D-2.1` VAE), the model
+    # implicitly expects a total of 81920 points.
+    #
+    # Correct configuration (for the default model):
+    #   pc_size: 81920
+    #   pc_sharpedge_size: 0
+    #
+    # Incorrect configuration that will cause a `split_with_sizes` RuntimeError:
+    #   pc_size: 10240
+    #   pc_sharpedge_size: 10240
+    #
+    # If you need to use a different number of points, you must retrain the ShapeVAE model first.
     pc_size: &pc_size 81920
     pc_sharpedge_size: &pc_sharpedge_size 0
     sharpedge_label: &sharpedge_label true

From f0a008279e856e36cd79c1c019248f52528144b6 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:51:33 +0800
Subject: [PATCH 11/16] Update pipelines.py

---
 hy3dshape/hy3dshape/pipelines.py | 108 -------------------------------
 1 file changed, 108 deletions(-)

diff --git a/hy3dshape/hy3dshape/pipelines.py b/hy3dshape/hy3dshape/pipelines.py
index 0bb7c8f..71de472 100644
--- a/hy3dshape/hy3dshape/pipelines.py
+++ b/hy3dshape/hy3dshape/pipelines.py
@@ -781,111 +781,3 @@ class Hunyuan3DDiTFlowMatchingPipeline(Hunyuan3DDiTPipeline):
             box_v, mc_level, num_chunks, octree_resolution, mc_algo,
             enable_pbar=enable_pbar,
         )
-
-    @classmethod
-    @synchronize_timer('Hunyuan3DDiTFlowMatchingPipeline from Lightning Checkpoint')
-    def from_lightning_checkpoint(
-        cls,
-        ckpt_path: str,
-        config_path: str,
-        device: str = 'cuda',
-        dtype: torch.dtype = torch.float16,
-        **kwargs,
-    ):
-        """
-        Loads a model from a checkpoint created by the project's PyTorch Lightning training script.
-
-        This method correctly handles the nested configuration structure and state_dict prefixes
-        produced during training, and can intelligently load sharded checkpoints saved by Deepspeed.
-
-        Args:
-            ckpt_path (str): Path to the .ckpt checkpoint file or directory.
-            config_path (str): Path to the .yaml configuration file used for training.
-            device (str, optional): The device to load the model on. Defaults to 'cuda'.
-            dtype (torch.dtype, optional): The data type for the model. Defaults to torch.float16.
-
-        Returns:
-            Hunyuan3DDiTFlowMatchingPipeline: An instantiated pipeline ready for inference.
-        """
-        from omegaconf import OmegaConf
-        from hy3dshape.utils.misc import instantiate_from_config
-        from hy3dshape.schedulers import FlowMatchEulerDiscreteScheduler
-
-        logger.info(f"Loading model from Lightning checkpoint: {ckpt_path}")
-        logger.info(f"Using training config: {config_path}")
-
-        config = OmegaConf.load(config_path)
-
-        if os.path.isdir(ckpt_path):
-            # Assumes a Deepspeed-saved checkpoint directory
-            model_state_file = os.path.join(ckpt_path, 'checkpoint', 'mp_rank_00_model_states.pt')
-            if not os.path.exists(model_state_file):
-                raise FileNotFoundError(
-                    f"Could not find model weights file 'mp_rank_00_model_states.pt' in Deepspeed checkpoint directory: {os.path.join(ckpt_path, 'checkpoint')}"
-                )
-            
-            logger.info(f"Detected Deepspeed checkpoint directory, loading weights from: '{model_state_file}'")
-            ckpt = torch.load(model_state_file, map_location='cpu', weights_only=False)
-            # Deepspeed weights are often nested under the 'module' key
-            state_dict = ckpt.get('module', ckpt)
-        else:
-            # Standard .ckpt file
-            logger.info("Detected standard .ckpt file.")
-            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
-            state_dict = ckpt.get('state_dict', ckpt)
-
-        # 1. Instantiate components that were frozen during training.
-        #    They will load their own pretrained weights upon instantiation.
-        logger.info("Instantiating VAE, Conditioner, and ImageProcessor...")
-        vae = instantiate_from_config(config.model.params.first_stage_config)
-        conditioner = instantiate_from_config(config.model.params.cond_stage_config)
-        image_processor = instantiate_from_config(config.model.params.image_processor_cfg)
-
-        # 2. Instantiate the component that was trained (the Denoiser).
-        logger.info("Instantiating Denoiser...")
-        denoiser = instantiate_from_config(config.model.params.denoiser_cfg)
-        
-        # 3. Load weights only for the Denoiser from our training checkpoint.
-        possible_prefixes = ["model.model.", "_forward_module.model.", "model."]
-        denoiser_dict = {}
-        matched_prefix = None
-        for prefix in possible_prefixes:
-            sub_dict = {k.replace(prefix, ''): v for k, v in state_dict.items() if k.startswith(prefix)}
-            if sub_dict:
-                denoiser_dict = sub_dict
-                matched_prefix = prefix
-                break
-                
-        if denoiser_dict:
-            logger.info(f"Successfully matched Denoiser weight prefix: '{matched_prefix}'")
-            missing_keys, unexpected_keys = denoiser.load_state_dict(denoiser_dict, strict=False)
-            logger.info(" Successfully loaded weights for 'denoiser'.")
-            if missing_keys:
-                logger.warning(f"  - Missing keys: {missing_keys}")
-            if unexpected_keys:
-                logger.warning(f"  - Unexpected keys: {unexpected_keys}")
-        else:
-            logger.warning("Could not find weights for 'denoiser' in checkpoint. It will be randomly initialized.")
-
-        # 4. Instantiate a new, inference-compatible scheduler.
-        logger.info("Creating a new scheduler for inference...")
-        scheduler = FlowMatchEulerDiscreteScheduler()
-        
-        # 5. Assemble the final, healthy pipeline.
-        pipeline = cls(
-            model=denoiser,
-            vae=vae,
-            scheduler=scheduler,
-            conditioner=conditioner,
-            image_processor=image_processor,
-            **kwargs,
-        )
-        
-        # 6. Move all model components to the correct device and set to evaluation mode.
-        pipeline.to(torch.device(device), dtype=dtype)
-        pipeline.model.eval()
-        pipeline.vae.eval()
-        pipeline.conditioner.eval()
-        
-        logger.info("\n Pipeline successfully assembled from Lightning checkpoint!")
-        return pipeline

From 7a9d765627b3f3131bfcb089ce54a6582256e33d Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:53:19 +0800
Subject: [PATCH 12/16] Update run_inference_with_fix.py

---
 run_inference_with_fix.py | 55 ---------------------------------------
 1 file changed, 55 deletions(-)

diff --git a/run_inference_with_fix.py b/run_inference_with_fix.py
index a69f9a2..8b13789 100644
--- a/run_inference_with_fix.py
+++ b/run_inference_with_fix.py
@@ -1,56 +1 @@
-import os
-import sys
-import torch
-from PIL import Image
 
-# Add the project's sub-directory to the path to allow direct imports
-sys.path.insert(0, os.path.join(os.path.abspath('.'), 'hy3dshape'))
-from hy3dshape.pipelines import Hunyuan3DDiTFlowMatchingPipeline
-
-# --- 1. Set up the paths for your trained model ---
-
-# The training configuration file you used
-CONFIG_PATH = "./hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml"
-
-# !!! IMPORTANT: Change this to the actual checkpoint directory you generated !!!
-# This should be the path to the directory, e.g., 'ckpt-step=00002000.ckpt'
-CKPT_PATH = "./hy3dshape/output_folder/dit/overfitting_depth_16_token_4096_lr1e4/ckpt/ckpt-step=00002000.ckpt"
-
-# The input image for inference
-IMAGE_PATH = "./assets/demo.png"
-
-# The path where the final 3D model will be saved
-OUTPUT_PATH = "./my_model_output.glb"
-
-
-if __name__ == '__main__':
-    # Setup device and data type
-    if not torch.cuda.is_available():
-        print("Warning: CUDA not available, running on CPU. This will be very slow.")
-        device = torch.device('cpu')
-        # Use float32 on CPU as it does not support bfloat16
-        dtype = torch.float32
-    else:
-        device = torch.device('cuda')
-        # Use the same precision as in training for best results
-        dtype = torch.bfloat16
-
-    print("\n--- Attempting to load the model using the new from_lightning_checkpoint method ---")
-
-    # Load the pipeline using the new, elegant class method
-    pipeline = Hunyuan3DDiTFlowMatchingPipeline.from_lightning_checkpoint(
-        ckpt_path=CKPT_PATH,
-        config_path=CONFIG_PATH,
-        device=str(device),
-        dtype=dtype,
-    )
-
-    print("\n Model loaded successfully! Starting inference...")
-    input_image = Image.open(IMAGE_PATH)
-
-    # Run inference
-    mesh_output = pipeline(image=input_image)[0]
-
-    # Save the result
-    mesh_output.export(OUTPUT_PATH)
-    print(f"\n Inference complete! The 3D model has been saved to: {OUTPUT_PATH}")

From 8eff6d82337b4d5f8a90673682f28c450d30fc25 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 16:54:43 +0800
Subject: [PATCH 13/16] Delete run_inference_with_fix.py

---
 run_inference_with_fix.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 run_inference_with_fix.py

diff --git a/run_inference_with_fix.py b/run_inference_with_fix.py
deleted file mode 100644
index 8b13789..0000000
--- a/run_inference_with_fix.py
+++ /dev/null
@@ -1 +0,0 @@
-

From f4e03076654ea0264755dbc0bd8d727b2d90efc4 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Fri, 11 Jul 2025 18:32:16 +0800
Subject: [PATCH 14/16] Update train_deepspeed.sh

---
 hy3dshape/scripts/train_deepspeed.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hy3dshape/scripts/train_deepspeed.sh b/hy3dshape/scripts/train_deepspeed.sh
index 278cf73..444e5cc 100644
--- a/hy3dshape/scripts/train_deepspeed.sh
+++ b/hy3dshape/scripts/train_deepspeed.sh
@@ -29,10 +29,9 @@ else
     export NCCL_IB_SL=3
     export NCCL_CHECK_DISABLE=1
     export NCCL_P2P_DISABLE=0
-    export NCCL_IB_DISABLE=1
+    export NCCL_IB_DISABLE=0
     export NCCL_LL_THRESHOLD=16384
     export NCCL_IB_CUDA_SUPPORT=1
-    # DELETED: The hardcoded export NCCL_SOCKET_IFNAME and UCX_NET_DEVICES lines
     export NCCL_COLLNET_ENABLE=0
     export SHARP_COLL_ENABLE_SAT=0
     export NCCL_NET_GDR_LEVEL=2

From d9fc4d31bfcf9fe89d8dc4ab41d22231efd356e1 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Wed, 6 Aug 2025 01:12:13 +0800
Subject: [PATCH 15/16] Update
 hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml

repair
---
 ...flowmatching-dinol518-bf16-lr1e4-4096.yaml | 21 +++++--------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
index 193c5ae..b32a87e 100644
--- a/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
+++ b/hy3dshape/configs/hunyuandit-mini-overfitting-flowmatching-dinol518-bf16-lr1e4-4096.yaml
@@ -33,22 +33,6 @@ dataset:
     std: &std [0.5, 0.5, 0.5]
 
     #! Point cloud sampling
-    # Important: The total number of points (pc_size + pc_sharpedge_size) is coupled with the
-    # architecture of the pretrained ShapeVAE in `first_stage_config`. You must ensure that
-    # the total number of points provided by the dataset matches the model's expectation.
-    #
-    # For the default pretrained ShapeVAE (`tencent/Hunyuan3D-2.1` VAE), the model
-    # implicitly expects a total of 81920 points.
-    #
-    # Correct configuration (for the default model):
-    #   pc_size: 81920
-    #   pc_sharpedge_size: 0
-    #
-    # Incorrect configuration that will cause a `split_with_sizes` RuntimeError:
-    #   pc_size: 10240
-    #   pc_sharpedge_size: 10240
-    #
-    # If you need to use a different number of points, you must retrain the ShapeVAE model first.
     pc_size: &pc_size 81920
     pc_sharpedge_size: &pc_sharpedge_size 0
     sharpedge_label: &sharpedge_label true
@@ -74,6 +58,11 @@ model:
     first_stage_config:
       target: hy3dshape.models.autoencoders.ShapeVAE
       from_pretrained: tencent/Hunyuan3D-2.1
+      #Ensure same with the dataset setting
+      params: 
+        pc_size: 81920              
+        pc_sharpedge_size: 0 
+
 
     cond_stage_config:
       target: hy3dshape.models.conditioner.SingleImageEncoder

From b3dd50ba3736ced7e733aa9b34054a4bf0b42829 Mon Sep 17 00:00:00 2001
From: s572915912 <54531516+s572915912@users.noreply.github.com>
Date: Wed, 6 Aug 2025 01:14:49 +0800
Subject: [PATCH 16/16] Update misc.py

repair
---
 hy3dshape/hy3dshape/utils/misc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hy3dshape/hy3dshape/utils/misc.py b/hy3dshape/hy3dshape/utils/misc.py
index 55e1136..f875e6b 100644
--- a/hy3dshape/hy3dshape/utils/misc.py
+++ b/hy3dshape/hy3dshape/utils/misc.py
@@ -49,10 +49,12 @@ def instantiate_from_config(config, **kwargs):
     cls = get_obj_from_str(config["target"])
 
     if config.get("from_pretrained", None):
+        params_kwargs = config.get("params", {})
         return cls.from_pretrained(
                     config["from_pretrained"], 
                     use_safetensors=config.get('use_safetensors', False),
-                    variant=config.get('variant', 'fp16'))
+                    variant=config.get('variant', 'fp16'),
+                    **params_kwargs) 
 
     params = config.get("params", dict())
     # params.update(kwargs)