修复在windows系统中安装custom_rastorizer报错

This commit is contained in:
qinmaohui
2025-07-10 17:22:47 +08:00
parent 81b4d9f638
commit 7facf4b459
10 changed files with 89 additions and 46 deletions

8
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

12
.idea/Hunyuan3D-2.1.iml generated Normal file
View File

@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="hunyuan" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="GOOGLE" />
<option name="myDocStringFormat" value="Google" />
</component>
</module>

View File

@@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="hunyuan" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Hunyuan3D-2.1.iml" filepath="$PROJECT_DIR$/.idea/Hunyuan3D-2.1.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@@ -312,7 +312,7 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
std::vector<torch::Tensor> view_layer_normals, int num_level, int resolution)
{
if (view_layer_positions.size() != 3 || num_level < 1) {
printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level);
return {{},{},{},{}};
}
@@ -394,8 +394,8 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
std::vector<torch::Tensor> grid_evencorners(grids.size());
std::vector<torch::Tensor> grid_oddcorners(grids.size());
texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), 3}, float_options);
texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
float* positions_out_ptr = texture_positions[0].data_ptr<float>();
memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
positions_out_ptr = texture_positions[1].data_ptr<float>();
@@ -404,25 +404,24 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy(std::vector<torch::Tenso
}
for (int i = 0; i < grids.size(); ++i) {
grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
long* nptr = grid_neighbors[i].data_ptr<long>();
grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), 9}, int64_options);
int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
nptr[j] = grids[i].seq2neighbor[j];
}
grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
long* dptr = grid_evencorners[i].data_ptr<long>();
grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
dptr[j] = grids[i].seq2evencorner[j];
}
dptr = grid_oddcorners[i].data_ptr<long>();
dptr = grid_oddcorners[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
dptr[j] = grids[i].seq2oddcorner[j];
}
if (i + 1 < grids.size()) {
grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
long* dptr = grid_downsamples[i].data_ptr<long>();
grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
dptr[j] = grids[i].downsample_seq[j];
}
@@ -439,7 +438,7 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
int num_level, int resolution)
{
if (view_layer_positions.size() != 3 || num_level < 1) {
printf("Alert! We require 3 layers and at least 1 level! (%d %d)\n", view_layer_positions.size(), num_level);
printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level);
return {{},{},{},{}};
}
@@ -534,9 +533,9 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
std::vector<torch::Tensor> grid_evencorners(grids.size());
std::vector<torch::Tensor> grid_oddcorners(grids.size());
texture_positions[0] = torch::zeros({seq2pos.size() / 3, 3}, float_options);
texture_positions[1] = torch::zeros({seq2pos.size() / 3}, float_options);
texture_feats[0] = torch::zeros({seq2feat.size() / feat_channel, feat_channel}, float_options);
texture_positions[0] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3), 3}, float_options);
texture_positions[1] = torch::zeros({static_cast<int64_t>(seq2pos.size() / 3)}, float_options);
texture_feats[0] = torch::zeros({static_cast<int64_t>(seq2feat.size() / feat_channel), static_cast<int64_t>(feat_channel)}, float_options);
float* positions_out_ptr = texture_positions[0].data_ptr<float>();
memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size());
positions_out_ptr = texture_positions[1].data_ptr<float>();
@@ -547,24 +546,23 @@ std::vector<std::vector<torch::Tensor>> build_hierarchy_with_feat(
memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size());
for (int i = 0; i < grids.size(); ++i) {
grid_neighbors[i] = torch::zeros({grids[i].seq2grid.size(), 9}, int64_options);
long* nptr = grid_neighbors[i].data_ptr<long>();
grid_neighbors[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2grid.size()), 9}, int64_options);
int64_t* nptr = grid_neighbors[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) {
nptr[j] = grids[i].seq2neighbor[j];
}
grid_evencorners[i] = torch::zeros({grids[i].seq2evencorner.size()}, int64_options);
grid_oddcorners[i] = torch::zeros({grids[i].seq2oddcorner.size()}, int64_options);
long* dptr = grid_evencorners[i].data_ptr<long>();
grid_evencorners[i] = torch::zeros({static_cast<int64_t>(grids[i].seq2evencorner.size())}, int64_options);
int64_t* dptr = grid_evencorners[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) {
dptr[j] = grids[i].seq2evencorner[j];
}
dptr = grid_oddcorners[i].data_ptr<long>();
dptr = grid_oddcorners[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) {
dptr[j] = grids[i].seq2oddcorner[j];
}
if (i + 1 < grids.size()) {
grid_downsamples[i] = torch::zeros({grids[i].downsample_seq.size()}, int64_options);
long* dptr = grid_downsamples[i].data_ptr<long>();
grid_downsamples[i] = torch::zeros({static_cast<int64_t>(grids[i].downsample_seq.size())}, int64_options);
int64_t* dptr = grid_downsamples[i].data_ptr<int64_t>();
for (int j = 0; j < grids[i].downsample_seq.size(); ++j) {
dptr[j] = grids[i].downsample_seq[j];
}

View File

@@ -1,6 +1,6 @@
#include "rasterizer.h"
void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, int64_t* zbuffer, float* d, float occlusion_truncation) {
float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
@@ -18,7 +18,7 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
int pixel = py * width + px;
if (zbuffer == 0) {
zbuffer[pixel] = (INT64)(idx + 1);
zbuffer[pixel] = (int64_t)(idx + 1);
continue;
}
@@ -29,7 +29,7 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
}
int z_quantize = depth * (2<<17);
INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
int64_t token = (int64_t)z_quantize * MAXINT + (int64_t)(idx + 1);
if (depth < depth_thres)
continue;
zbuffer[pixel] = std::min(zbuffer[pixel], token);
@@ -38,10 +38,10 @@ void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width
}
}
void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
void barycentricFromImgcoordCPU(float* V, int* F, int* findices, int64_t* zbuffer, int width, int height, int num_vertices, int num_faces,
float* barycentric_map, int pix)
{
INT64 f = zbuffer[pix] % MAXINT;
int64_t f = zbuffer[pix] % MAXINT;
if (f == (MAXINT-1)) {
findices[pix] = 0;
barycentric_map[pix * 3] = 0;
@@ -78,7 +78,7 @@ void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* zbuffer,
barycentric_map[pix * 3 + 2] = barycentric[2];
}
void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, int64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f)
{
float* vt0_ptr = V + (F[f * 3] * 4);
float* vt1_ptr = V + (F[f * 3 + 1] * 4);
@@ -99,25 +99,25 @@ std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::Tensor F,
auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false);
auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false);
auto findices = torch::zeros({height, width}, options);
INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
int64_t maxint = (int64_t)MAXINT * (int64_t)MAXINT + (MAXINT - 1);
auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint;
if (!use_depth_prior) {
for (int i = 0; i < num_faces; ++i) {
rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), 0,
(INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
(int64_t*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
}
} else {
for (int i = 0; i < num_faces; ++i)
rasterizeImagecoordsKernelCPU(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
(INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
(int64_t*)z_min.data_ptr<int64_t>(), occlusion_truncation, width, height, num_vertices, num_faces, i);
}
auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false);
auto barycentric = torch::zeros({height, width, 3}, float_options);
for (int i = 0; i < width * height; ++i)
barycentricFromImgcoordCPU(V.data_ptr<float>(), F.data_ptr<int>(),
findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
findices.data_ptr<int>(), (int64_t*)z_min.data_ptr<int64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>(), i);
return {findices, barycentric};
}

View File

@@ -5,8 +5,9 @@
#include <vector>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> // For CUDA context
#include <cstdint>
#define INT64 unsigned long long
#define INT64 int64_t
#define MAXINT 2147483647
__host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) {

View File

@@ -1,6 +1,6 @@
#include "rasterizer.h"
__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, INT64* zbuffer, float* d, float occlusion_truncation) {
__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, uint64_t* zbuffer, float* d, float occlusion_truncation) {
float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0]));
float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0]));
float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1]));
@@ -18,7 +18,7 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
int pixel = py * width + px;
if (zbuffer == 0) {
atomicExch(&zbuffer[pixel], (INT64)(idx + 1));
atomicExch(&zbuffer[pixel], (uint64_t)(idx + 1));
continue;
}
float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2];
@@ -28,7 +28,7 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
}
int z_quantize = depth * (2<<17);
INT64 token = (INT64)z_quantize * MAXINT + (INT64)(idx + 1);
uint64_t token = (uint64_t)z_quantize * MAXINT + (uint64_t)(idx + 1);
if (depth < depth_thres)
continue;
atomicMin(&zbuffer[pixel], token);
@@ -37,13 +37,13 @@ __device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2
}
}
__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT64* zbuffer, int width, int height, int num_vertices, int num_faces,
__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, uint64_t* zbuffer, int width, int height, int num_vertices, int num_faces,
float* barycentric_map)
{
int pix = blockIdx.x * blockDim.x + threadIdx.x;
if (pix >= width * height)
return;
INT64 f = zbuffer[pix] % MAXINT;
uint64_t f = zbuffer[pix] % MAXINT;
if (f == (MAXINT-1)) {
findices[pix] = 0;
barycentric_map[pix * 3] = 0;
@@ -80,7 +80,7 @@ __global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, INT6
barycentric_map[pix * 3 + 2] = barycentric[2];
}
__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, INT64* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, uint64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces)
{
int f = blockIdx.x * blockDim.x + threadIdx.x;
if (f >= num_faces)
@@ -107,21 +107,21 @@ std::vector<torch::Tensor> rasterize_image_gpu(torch::Tensor V, torch::Tensor F,
auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false);
auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false);
auto findices = torch::zeros({height, width}, options);
INT64 maxint = (INT64)MAXINT * (INT64)MAXINT + (MAXINT - 1);
auto z_min = torch::ones({height, width}, INT64_options) * (long)maxint;
uint64_t maxint = (uint64_t)MAXINT * (uint64_t)MAXINT + (MAXINT - 1);
auto z_min = torch::ones({height, width}, INT64_options) * (uint64_t)maxint;
if (!use_depth_prior) {
rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), 0,
(INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces);
(uint64_t*)z_min.data_ptr<uint64_t>(), occlusion_truncation, width, height, num_vertices, num_faces);
} else {
rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr<float>(), F.data_ptr<int>(), D.data_ptr<float>(),
(INT64*)z_min.data_ptr<long>(), occlusion_truncation, width, height, num_vertices, num_faces);
(uint64_t*)z_min.data_ptr<uint64_t>(), occlusion_truncation, width, height, num_vertices, num_faces);
}
auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false);
auto barycentric = torch::zeros({height, width, 3}, float_options);
barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr<float>(), F.data_ptr<int>(),
findices.data_ptr<int>(), (INT64*)z_min.data_ptr<long>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
findices.data_ptr<int>(), (uint64_t*)z_min.data_ptr<uint64_t>(), width, height, num_vertices, num_faces, barycentric.data_ptr<float>());
return {findices, barycentric};
}