修复在windows系统中安装custom_rastorizer报错

2025-07-10 17:22:47 +08:00
parent 81b4d9f638
commit 7facf4b459
10 changed files with 89 additions and 46 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/Hunyuan3D-2.1.iml
+++ b/.idea/Hunyuan3D-2.1.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="hunyuan" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="GOOGLE" />
+    <option name="myDocStringFormat" value="Google" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="hunyuan" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Hunyuan3D-2.1.iml" filepath="$PROJECT_DIR$/.idea/Hunyuan3D-2.1.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/grid_neighbor.cpp
@@ -312,7 +312,7 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
    std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution)
 {
    if (view_layer_positions.size() != 3 || num_level < 1) {
-        printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
+        printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level);
        return {{},{},{},{}};
    }

@@ -394,8 +394,8 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
    std::vector<torch::Tensor> grid_evencorners(grids.size());
    std::vector<torch::Tensor> grid_oddcorners(grids.size());

-    texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
-    texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
+    texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), 3}, float_options);
+    texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
    float* positions_out_ptr = texture_positions[0].data_ptr<float>();
    memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
    positions_out_ptr = texture_positions[1].data_ptr<float>();
@@ -404,25 +404,24 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
    }

    for (int i = 0; i < grids.size(); ++i) {
-        grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
-        long* nptr = grid_neighbors[i].data_ptr<long>();
+        grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), 9}, int64_options);
+        int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
        for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
            nptr[j] = grids[i].seq2neighbor[j];
        }

-        grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
-        grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
-        long* dptr = grid_evencorners[i].data_ptr<long>();
+        grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
+        int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
        for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
            dptr[j] = grids[i].seq2evencorner[j];
        }
-        dptr = grid_oddcorners[i].data_ptr<long>();
+        dptr = grid_oddcorners[i].data_ptr<int64_t>();
        for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
            dptr[j] = grids[i].seq2oddcorner[j];
        }            
        if (i + 1 < grids.size()) {
-            grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
-            long* dptr = grid_downsamples[i].data_ptr<long>();
+            grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
+            int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
            for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
                dptr[j] = grids[i].downsample_seq[j];
            }
@@ -439,7 +438,7 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
    int num_level, int resolution)
 {
    if (view_layer_positions.size() != 3 || num_level < 1) {
-        printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
+        printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level);
        return {{},{},{},{}};
    }

@@ -534,9 +533,9 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
    std::vector<torch::Tensor> grid_evencorners(grids.size());
    std::vector<torch::Tensor> grid_oddcorners(grids.size());

-    texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
-    texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
-    texture_feats[0] = torch::zeros({seq2feat.size() / feat_channel, feat_channel}, float_options);
+    texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), 3}, float_options);
+    texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
+    texture_feats[0] = torch::zeros({static_cast<int64_t>(seq2feat.size() / feat_channel), static_cast<int64_t>(feat_channel)}, float_options);
    float* positions_out_ptr = texture_positions[0].data_ptr<float>();
    memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
    positions_out_ptr = texture_positions[1].data_ptr<float>();
@@ -547,24 +546,23 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
    memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size());

    for (int i = 0; i < grids.size(); ++i) {
-        grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
-        long* nptr = grid_neighbors[i].data_ptr<long>();
+        grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), 9}, int64_options);
+        int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
        for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
            nptr[j] = grids[i].seq2neighbor[j];
        }
-        grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
-        grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
-        long* dptr = grid_evencorners[i].data_ptr<long>();
+        grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
+        int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
        for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
            dptr[j] = grids[i].seq2evencorner[j];
        }
-        dptr = grid_oddcorners[i].data_ptr<long>();
+        dptr = grid_oddcorners[i].data_ptr<int64_t>();
        for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
            dptr[j] = grids[i].seq2oddcorner[j];
        }
        if (i + 1 < grids.size()) {
-            grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
-            long* dptr = grid_downsamples[i].data_ptr<long>();
+            grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
+            int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
            for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
                dptr[j] = grids[i].downsample_seq[j];
            }
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.cpp
@@ -1,6 +1,6 @@
 #include "rasterizer.h"

-void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
+void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, int64_t* zbuffer, float* d, float occlusion_truncation) {
    float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
    float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
    float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
@@ -18,7 +18,7 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
            if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
                int pixel = py * width + px;
                if (zbuffer == 0) {
-                    zbuffer[pixel] = (INT64)(idx + 1);
+                    zbuffer[pixel] = (int64_t)(idx + 1);
                    continue;
                }

@@ -29,7 +29,7 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
                }
                
                int z_quantize = depth * (2<<17);
-                INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
+                int64_t token = (int64_t)z_quantize * MAXINT + (int64_t)(idx + 1);
                if (depth < depth_thres)
                    continue;
                zbuffer[pixel] = std::min(zbuffer[pixel], token);
@@ -38,10 +38,10 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
    }
 }

-void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
+void barycentricFromImgcoordCPU(float* V, int* F, int* findices, int64_t* zbuffer, int width, int height, int num_vertices, int num_faces,
    float* barycentric_map, int pix)
 {
-    INT64 f = zbuffer[pix] % MAXINT;
+    int64_t f = zbuffer[pix] % MAXINT;
    if (f == (MAXINT-1)) {
        findices[pix] = 0;
        barycentric_map[pix * 3] = 0;
@@ -78,7 +78,7 @@ void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer,
    barycentric_map[pix * 3 + 2] = barycentric[2];
 }

-void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
+void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, int64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
 {
    float* vt0_ptr = V + (F[f * 3] * 4);
    float* vt1_ptr = V + (F[f * 3 + 1] * 4);
@@ -99,25 +99,25 @@ std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::Tensor F,
    auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
    auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
    auto findices = torch::zeros({height, width}, options);
-    INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
-    auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
+    int64_t maxint = (int64_t)MAXINT * (int64_t)MAXINT + (MAXINT - 1);
+    auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;

    if (!use_depth_prior) {
        for (int i = 0; i < num_faces; ++i) {
            rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), 0,
-                (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i); 
+                (int64_t*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i); 
        }
    } else {
        for (int i = 0; i < num_faces; ++i)
            rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
-                (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
+                (int64_t*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
    }

    auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
    auto barycentric = torch::zeros({height, width, 3}, float_options);
    for (int i = 0; i < width * height; ++i)
        barycentricFromImgcoordCPU(V.data_ptr<float>(), F.data_ptr<int>(),
-            findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
+            findices.data_ptr<int>(), (int64_t*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);

    return {findices, barycentric};
 }
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer.h
@@ -5,8 +5,9 @@
 #include <vector>
 #include <ATen/ATen.h>
 #include <ATen/cuda/CUDAContext.h> // For CUDA context
+#include <cstdint>

-#define INT64 unsigned long long
+#define INT64 int64_t
 #define MAXINT 2147483647

 __host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {
--- a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
+++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel/rasterizer_gpu.cu
@@ -1,6 +1,6 @@
 #include "rasterizer.h"

-__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
+__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, uint64_t* zbuffer, float* d, float occlusion_truncation) {
    float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
    float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
    float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
@@ -18,7 +18,7 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
            if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
                int pixel = py * width + px;
                if (zbuffer == 0) {
-                    atomicExch(&zbuffer[pixel], (INT64)(idx + 1));
+                    atomicExch(&zbuffer[pixel], (uint64_t)(idx + 1));
                    continue;
                }
                float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
@@ -28,7 +28,7 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
                }
                
                int z_quantize = depth * (2<<17);
-                INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
+                uint64_t token = (uint64_t)z_quantize * MAXINT + (uint64_t)(idx + 1);
                if (depth < depth_thres)
                    continue;
                atomicMin(&zbuffer[pixel], token);
@@ -37,13 +37,13 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
    }
 }

-__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
+__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, uint64_t* zbuffer, int width, int height, int num_vertices, int num_faces,
    float* barycentric_map)
 {
    int pix = blockIdx.x * blockDim.x + threadIdx.x;
    if (pix >= width * height)
        return;
-    INT64 f = zbuffer[pix] % MAXINT;
+    uint64_t f = zbuffer[pix] % MAXINT;
    if (f == (MAXINT-1)) {
        findices[pix] = 0;
        barycentric_map[pix * 3] = 0;
@@ -80,7 +80,7 @@ __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT6
    barycentric_map[pix * 3 + 2] = barycentric[2];
 }

-__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
+__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, uint64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
 {
    int f = blockIdx.x * blockDim.x + threadIdx.x;
    if (f >= num_faces)
@@ -107,21 +107,21 @@ std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F,
    auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
    auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
    auto findices = torch::zeros({height, width}, options);
-    INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
-    auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
+    uint64_t maxint = (uint64_t)MAXINT * (uint64_t)MAXINT + (MAXINT - 1);
+    auto z_min = torch::ones({height, width}, INT64_options) * (uint64_t)maxint;

    if (!use_depth_prior) {
        rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), 0,
-            (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces); 
+            (uint64_t*)z_min.data_ptr<uint64_t>(), occlusion_truncation, width, height, num_vertices, num_faces); 
    } else {
        rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
-            (INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces); 
+            (uint64_t*)z_min.data_ptr<uint64_t>(), occlusion_truncation, width, height, num_vertices, num_faces); 
    }

    auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
    auto barycentric = torch::zeros({height, width, 3}, float_options);
    barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr<float>(), F.data_ptr<int>(),
-        findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
+        findices.data_ptr<int>(), (uint64_t*)z_min.data_ptr<uint64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());

    return {findices, barycentric};
 }