diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..35410ca
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/Hunyuan3D-2.1.iml b/.idea/Hunyuan3D-2.1.iml
new file mode 100644
index 0000000..41f56a8
--- /dev/null
+++ b/.idea/Hunyuan3D-2.1.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="hunyuan" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="GOOGLE" />
+    <option name="myDocStringFormat" value="Google" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..cfe85d6
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="hunyuan" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..23b18b0
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Hunyuan3D-2.1.iml" filepath="$PROJECT_DIR$/.idea/Hunyuan3D-2.1.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
index f02bcba..cbe21e1 100644
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
@@ -312,7 +312,7 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
     std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution)
 {
     if (view_layer_positions.size() != 3 || num_level < 1) {
-        printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
+        printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level);
         return {{},{},{},{}};
     }
 
@@ -394,8 +394,8 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
     std::vector<torch::Tensor> grid_evencorners(grids.size());
     std::vector<torch::Tensor> grid_oddcorners(grids.size());
 
-    texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
-    texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
+    texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), 3}, float_options);
+    texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
     float* positions_out_ptr = texture_positions[0].data_ptr<float>();
     memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
     positions_out_ptr = texture_positions[1].data_ptr<float>();
@@ -404,25 +404,24 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
     }
 
     for (int i = 0; i < grids.size(); ++i) {
-        grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
-        long* nptr = grid_neighbors[i].data_ptr<long>();
+        grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), 9}, int64_options);
+        int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
         for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
             nptr[j] = grids[i].seq2neighbor[j];
         }
 
-        grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
-        grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
-        long* dptr = grid_evencorners[i].data_ptr<long>();
+        grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
+        int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
         for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
             dptr[j] = grids[i].seq2evencorner[j];
         }
-        dptr = grid_oddcorners[i].data_ptr<long>();
+        dptr = grid_oddcorners[i].data_ptr<int64_t>();
         for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
             dptr[j] = grids[i].seq2oddcorner[j];
         }            
         if (i + 1 < grids.size()) {
-            grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
-            long* dptr = grid_downsamples[i].data_ptr<long>();
+            grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
+            int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
             for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
                 dptr[j] = grids[i].downsample_seq[j];
             }
@@ -439,7 +438,7 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
     int num_level, int resolution)
 {
     if (view_layer_positions.size() != 3 || num_level < 1) {
-        printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
+        printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level);
         return {{},{},{},{}};
     }
 
@@ -534,9 +533,9 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
     std::vector<torch::Tensor> grid_evencorners(grids.size());
     std::vector<torch::Tensor> grid_oddcorners(grids.size());
 
-    texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
-    texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
-    texture_feats[0] = torch::zeros({seq2feat.size() / feat_channel, feat_channel}, float_options);
+    texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), 3}, float_options);
+    texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
+    texture_feats[0] = torch::zeros({static_cast<int64_t>(seq2feat.size() / feat_channel), static_cast<int64_t>(feat_channel)}, float_options);
     float* positions_out_ptr = texture_positions[0].data_ptr<float>();
     memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
     positions_out_ptr = texture_positions[1].data_ptr<float>();
@@ -547,24 +546,23 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
     memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size());
 
     for (int i = 0; i < grids.size(); ++i) {
-        grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
-        long* nptr = grid_neighbors[i].data_ptr<long>();
+        grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), 9}, int64_options);
+        int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
         for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
             nptr[j] = grids[i].seq2neighbor[j];
         }
-        grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
-        grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
-        long* dptr = grid_evencorners[i].data_ptr<long>();
+        grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
+        int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
         for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
             dptr[j] = grids[i].seq2evencorner[j];
         }
-        dptr = grid_oddcorners[i].data_ptr<long>();
+        dptr = grid_oddcorners[i].data_ptr<int64_t>();
         for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
             dptr[j] = grids[i].seq2oddcorner[j];
         }
         if (i + 1 < grids.size()) {
-            grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
-            long* dptr = grid_downsamples[i].data_ptr<long>();
+            grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
+            int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
             for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
                 dptr[j] = grids[i].downsample_seq[j];
             }
diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
index b3ff69f..c02f1f9 100644
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
@@ -1,6 +1,6 @@
 #include "rasterizer.h"
 
-void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
+void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, int64_t* zbuffer, float* d, float occlusion_truncation) {
     float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
     float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
     float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
@@ -18,7 +18,7 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
             if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
                 int pixel = py * width + px;
                 if (zbuffer == 0) {
-                    zbuffer[pixel] = (INT64)(idx + 1);
+                    zbuffer[pixel] = (int64_t)(idx + 1);
                     continue;
                 }
 
@@ -29,7 +29,7 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
                 }
                 
                 int z_quantize = depth * (2<<17);
-                INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
+                int64_t token = (int64_t)z_quantize * MAXINT + (int64_t)(idx + 1);
                 if (depth < depth_thres)
                     continue;
                 zbuffer[pixel] = std::min(zbuffer[pixel], token);
@@ -38,10 +38,10 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
     }
 }
 
-void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
+void barycentricFromImgcoordCPU(float* V, int* F, int* findices, int64_t* zbuffer, int width, int height, int num_vertices, int num_faces,
     float* barycentric_map, int pix)
 {
-    INT64 f = zbuffer[pix] % MAXINT;
+    int64_t f = zbuffer[pix] % MAXINT;
     if (f == (MAXINT-1)) {
         findices[pix] = 0;
         barycentric_map[pix * 3] = 0;
@@ -78,7 +78,7 @@ void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer,
     barycentric_map[pix * 3 + 2] = barycentric[2];
 }
 
-void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
+void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, int64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
 {
     float* vt0_ptr = V + (F[f * 3] * 4);
     float* vt1_ptr = V + (F[f * 3 + 1] * 4);
@@ -99,25 +99,25 @@ std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::Tensor F,
     auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
     auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
     auto findices = torch::zeros({height, width}, options);
-    INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
-    auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
+    int64_t maxint = (int64_t)MAXINT * (int64_t)MAXINT + (MAXINT - 1);
+    auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
 
     if (!use_depth_prior) {
         for (int i = 0; i < num_faces; ++i) {
             rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), 0,
-                (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i); 
+                (int64_t*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i); 
         }
     } else {
         for (int i = 0; i < num_faces; ++i)
             rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
-                (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
+                (int64_t*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
     }
 
     auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
     auto barycentric = torch::zeros({height, width, 3}, float_options);
     for (int i = 0; i < width * height; ++i)
         barycentricFromImgcoordCPU(V.data_ptr<float>(), F.data_ptr<int>(),
-            findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
+            findices.data_ptr<int>(), (int64_t*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
 
     return {findices, barycentric};
 }
diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
index cf4f987..8e198b1 100644
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
@@ -5,8 +5,9 @@
 #include <vector>
 #include <ATen/ATen.h>
 #include <ATen/cuda/CUDAContext.h> // For CUDA context
+#include <cstdint>
 
-#define INT64 unsigned long long
+#define INT64 int64_t
 #define MAXINT 2147483647
 
 __host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {
diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
index 709c1b8..ba6ab91 100644
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
@@ -1,6 +1,6 @@
 #include "rasterizer.h"
 
-__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
+__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, uint64_t* zbuffer, float* d, float occlusion_truncation) {
     float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
     float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
     float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
@@ -18,7 +18,7 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
             if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
                 int pixel = py * width + px;
                 if (zbuffer == 0) {
-                    atomicExch(&zbuffer[pixel], (INT64)(idx + 1));
+                    atomicExch(&zbuffer[pixel], (uint64_t)(idx + 1));
                     continue;
                 }
                 float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
@@ -28,7 +28,7 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
                 }
                 
                 int z_quantize = depth * (2<<17);
-                INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
+                uint64_t token = (uint64_t)z_quantize * MAXINT + (uint64_t)(idx + 1);
                 if (depth < depth_thres)
                     continue;
                 atomicMin(&zbuffer[pixel], token);
@@ -37,13 +37,13 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
     }
 }
 
-__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
+__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, uint64_t* zbuffer, int width, int height, int num_vertices, int num_faces,
     float* barycentric_map)
 {
     int pix = blockIdx.x * blockDim.x + threadIdx.x;
     if (pix >= width * height)
         return;
-    INT64 f = zbuffer[pix] % MAXINT;
+    uint64_t f = zbuffer[pix] % MAXINT;
     if (f == (MAXINT-1)) {
         findices[pix] = 0;
         barycentric_map[pix * 3] = 0;
@@ -80,7 +80,7 @@ __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT6
     barycentric_map[pix * 3 + 2] = barycentric[2];
 }
 
-__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
+__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, uint64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
 {
     int f = blockIdx.x * blockDim.x + threadIdx.x;
     if (f >= num_faces)
@@ -107,21 +107,21 @@ std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F,
     auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
     auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
     auto findices = torch::zeros({height, width}, options);
-    INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
-    auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
+    uint64_t maxint = (uint64_t)MAXINT * (uint64_t)MAXINT + (MAXINT - 1);
+    auto z_min = torch::ones({height, width}, INT64_options) * (uint64_t)maxint;
 
     if (!use_depth_prior) {
         rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), 0,
-            (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces); 
+            (uint64_t*)z_min.data_ptr<uint64_t>(), occlusion_truncation, width, height, num_vertices, num_faces); 
     } else {
         rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
-            (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces); 
+            (uint64_t*)z_min.data_ptr<uint64_t>(), occlusion_truncation, width, height, num_vertices, num_faces); 
     }
 
     auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
     auto barycentric = torch::zeros({height, width, 3}, float_options);
     barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr<float>(), F.data_ptr<int>(),
-        findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
+        findices.data_ptr<int>(), (uint64_t*)z_min.data_ptr<uint64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
 
     return {findices, barycentric};
 }