diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/__init__.py b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/grid_neighbor.cpp b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/grid_neighbor.cpp new file mode 100644 index 0000000..cbe21e1 --- /dev/null +++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/grid_neighbor.cpp @@ -0,0 +1,572 @@ +#include "rasterizer.h" +#include + +inline int pos2key(float* p, int resolution) { + int x = (p[0] * 0.5 + 0.5) * resolution; + int y = (p[1] * 0.5 + 0.5) * resolution; + int z = (p[2] * 0.5 + 0.5) * resolution; + return (x * resolution + y) * resolution + z; +} + +inline void key2pos(int key, int resolution, float* p) { + int x = key / resolution / resolution; + int y = key / resolution % resolution; + int z = key % resolution; + p[0] = ((x + 0.5) / resolution - 0.5) * 2; + p[1] = ((y + 0.5) / resolution - 0.5) * 2; + p[2] = ((z + 0.5) / resolution - 0.5) * 2; +} + +inline void key2cornerpos(int key, int resolution, float* p) { + int x = key / resolution / resolution; + int y = key / resolution % resolution; + int z = key % resolution; + p[0] = ((x + 0.75) / resolution - 0.5) * 2; + p[1] = ((y + 0.25) / resolution - 0.5) * 2; + p[2] = ((z + 0.75) / resolution - 0.5) * 2; +} + +inline float* pos_ptr(int l, int i, int j, torch::Tensor t) { + float* pdata = t.data_ptr(); + int height = t.size(1); + int width = t.size(2); + return &pdata[((l * height + i) * width + j) * 4]; +} + +struct Grid +{ + std::vector seq2oddcorner; + std::vector seq2evencorner; + std::vector seq2grid; + std::vector seq2normal; + std::vector seq2neighbor; + std::unordered_map grid2seq; + std::vector downsample_seq; + int num_origin_seq; + int resolution; + int stride; +}; + +inline void pos_from_seq(Grid& grid, int seq, float* p) { + auto k = grid.seq2grid[seq]; + key2pos(k, grid.resolution, p); +} + +inline int fetch_seq(Grid& grid, int l, int i, int j, torch::Tensor pdata) { + float* p = pos_ptr(l, i, j, pdata); + if (p[3] == 0) + return -1; + auto key = pos2key(p, grid.resolution); + int seq = grid.grid2seq[key]; + return seq; +} + +inline int fetch_last_seq(Grid& grid, int i, int j, torch::Tensor pdata) { + int num_layers = pdata.size(0); + int l = 0; + int idx = fetch_seq(grid, l, i, j, pdata); + while (l < num_layers - 1) { + l += 1; + int new_idx = fetch_seq(grid, l, i, j, pdata); + if (new_idx == -1) + break; + idx = new_idx; + } + return idx; +} + +inline int fetch_nearest_seq(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) { + float p[3]; + float max_dist = 1e10; + int best_idx = -1; + int num_layers = pdata.size(0); + for (int l = 0; l < num_layers; ++l) { + int idx = fetch_seq(grid, l, i, j, pdata); + if (idx == -1) + break; + pos_from_seq(grid, idx, p); + float dist = std::abs(d - p[(dim + 2) % 3]); + if (dist < max_dist) { + max_dist = dist; + best_idx = idx; + } + } + return best_idx; +} + +inline int fetch_nearest_seq_layer(Grid& grid, int i, int j, int dim, float d, torch::Tensor pdata) { + float p[3]; + float max_dist = 1e10; + int best_layer = -1; + int num_layers = pdata.size(0); + for (int l = 0; l < num_layers; ++l) { + int idx = fetch_seq(grid, l, i, j, pdata); + if (idx == -1) + break; + pos_from_seq(grid, idx, p); + float dist = std::abs(d - p[(dim + 2) % 3]); + if (dist < max_dist) { + max_dist = dist; + best_layer = l; + } + } + return best_layer; +} + +void FetchNeighbor(Grid& grid, int seq, float* pos, int dim, int boundary_info, std::vector& view_layer_positions, + int* output_indices) +{ + auto t = view_layer_positions[dim]; + int height = t.size(1); + int width = t.size(2); + int top = 0; + int ci = 0; + int cj = 0; + if (dim == 0) { + ci = (pos[1]/2+0.5)*height; + cj = (pos[0]/2+0.5)*width; + } + else if (dim == 1) { + ci = (pos[1]/2+0.5)*height; + cj = (pos[2]/2+0.5)*width; + } + else { + ci = (-pos[2]/2+0.5)*height; + cj = (pos[0]/2+0.5)*width; + } + int stride = grid.stride; + for (int ni = ci + stride; ni >= ci - stride; ni -= stride) { + for (int nj = cj - stride; nj <= cj + stride; nj += stride) { + int idx = -1; + if (ni == ci && nj == cj) + idx = seq; + else if (!(ni < 0 || ni >= height || nj < 0 || nj >= width)) { + if (boundary_info == -1) + idx = fetch_seq(grid, 0, ni, nj, t); + else if (boundary_info == 1) + idx = fetch_last_seq(grid, ni, nj, t); + else + idx = fetch_nearest_seq(grid, ni, nj, dim, pos[(dim + 2) % 3], t); + } + output_indices[top] = idx; + top += 1; + } + } +} + +void DownsampleGrid(Grid& src, Grid& tar) +{ + src.downsample_seq.resize(src.seq2grid.size(), -1); + tar.resolution = src.resolution / 2; + tar.stride = src.stride * 2; + float pos[3]; + std::vector seq2normal_count; + for (int i = 0; i < src.seq2grid.size(); ++i) { + key2pos(src.seq2grid[i], src.resolution, pos); + int k = pos2key(pos, tar.resolution); + int s = seq2normal_count.size(); + if (!tar.grid2seq.count(k)) { + tar.grid2seq[k] = tar.seq2grid.size(); + tar.seq2grid.emplace_back(k); + seq2normal_count.emplace_back(0); + seq2normal_count.emplace_back(0); + seq2normal_count.emplace_back(0); + //tar.seq2normal.emplace_back(src.seq2normal[i]); + } else { + s = tar.grid2seq[k] * 3; + } + seq2normal_count[s + src.seq2normal[i]] += 1; + src.downsample_seq[i] = tar.grid2seq[k]; + } + tar.seq2normal.resize(seq2normal_count.size() / 3); + for (int i = 0; i < seq2normal_count.size(); i += 3) { + int t = 0; + for (int j = 1; j < 3; ++j) { + if (seq2normal_count[i + j] > seq2normal_count[i + t]) + t = j; + } + tar.seq2normal[i / 3] = t; + } +} + +void NeighborGrid(Grid& grid, std::vector view_layer_positions, int v) +{ + grid.seq2evencorner.resize(grid.seq2grid.size(), 0); + grid.seq2oddcorner.resize(grid.seq2grid.size(), 0); + std::unordered_set visited_seq; + for (int vd = 0; vd < 3; ++vd) { + auto t = view_layer_positions[vd]; + auto t0 = view_layer_positions[v]; + int height = t.size(1); + int width = t.size(2); + int num_layers = t.size(0); + int num_view_layers = t0.size(0); + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + for (int l = 0; l < num_layers; ++l) { + int seq = fetch_seq(grid, l, i, j, t); + if (seq == -1) + break; + int dim = grid.seq2normal[seq]; + if (dim != v) + continue; + + float pos[3]; + pos_from_seq(grid, seq, pos); + + int ci = 0; + int cj = 0; + if (dim == 0) { + ci = (pos[1]/2+0.5)*height; + cj = (pos[0]/2+0.5)*width; + } + else if (dim == 1) { + ci = (pos[1]/2+0.5)*height; + cj = (pos[2]/2+0.5)*width; + } + else { + ci = (-pos[2]/2+0.5)*height; + cj = (pos[0]/2+0.5)*width; + } + + if ((ci % (grid.stride * 2) < grid.stride) && (cj % (grid.stride * 2) >= grid.stride)) + grid.seq2evencorner[seq] = 1; + + if ((ci % (grid.stride * 2) >= grid.stride) && (cj % (grid.stride * 2) < grid.stride)) + grid.seq2oddcorner[seq] = 1; + + bool is_boundary = false; + if (vd == v) { + if (l == 0 || l == num_layers - 1) + is_boundary = true; + else { + int seq_new = fetch_seq(grid, l + 1, i, j, t); + if (seq_new == -1) + is_boundary = true; + } + } + int boundary_info = 0; + if (is_boundary && (l == 0)) + boundary_info = -1; + else if (is_boundary) + boundary_info = 1; + if (visited_seq.count(seq)) + continue; + visited_seq.insert(seq); + + FetchNeighbor(grid, seq, pos, dim, boundary_info, view_layer_positions, &grid.seq2neighbor[seq * 9]); + } + } + } + } +} + +void PadGrid(Grid& src, Grid& tar, std::vector& view_layer_positions) { + auto& downsample_seq = src.downsample_seq; + auto& seq2evencorner = src.seq2evencorner; + auto& seq2oddcorner = src.seq2oddcorner; + int indices[9]; + std::vector mapped_even_corners(tar.seq2grid.size(), 0); + std::vector mapped_odd_corners(tar.seq2grid.size(), 0); + for (int i = 0; i < downsample_seq.size(); ++i) { + if (seq2evencorner[i] > 0) { + mapped_even_corners[downsample_seq[i]] = 1; + } + if (seq2oddcorner[i] > 0) { + mapped_odd_corners[downsample_seq[i]] = 1; + } + } + auto& tar_seq2normal = tar.seq2normal; + auto& tar_seq2grid = tar.seq2grid; + for (int i = 0; i < tar_seq2grid.size(); ++i) { + if (mapped_even_corners[i] == 1 && mapped_odd_corners[i] == 1) + continue; + auto k = tar_seq2grid[i]; + float p[3]; + key2cornerpos(k, tar.resolution, p); + + int src_key = pos2key(p, src.resolution); + if (!src.grid2seq.count(src_key)) { + int seq = src.seq2grid.size(); + src.grid2seq[src_key] = seq; + src.seq2evencorner.emplace_back((mapped_even_corners[i] == 0)); + src.seq2oddcorner.emplace_back((mapped_odd_corners[i] == 0)); + src.seq2grid.emplace_back(src_key); + src.seq2normal.emplace_back(tar_seq2normal[i]); + FetchNeighbor(src, seq, p, tar_seq2normal[i], 0, view_layer_positions, indices); + for (int j = 0; j < 9; ++j) { + src.seq2neighbor.emplace_back(indices[j]); + } + src.downsample_seq.emplace_back(i); + } else { + int seq = src.grid2seq[src_key]; + if (mapped_even_corners[i] == 0) + src.seq2evencorner[seq] = 1; + if (mapped_odd_corners[i] == 0) + src.seq2oddcorner[seq] = 1; + } + } +} + +std::vector> build_hierarchy(std::vector view_layer_positions, + std::vector view_layer_normals, int num_level, int resolution) +{ + if (view_layer_positions.size() != 3 || num_level < 1) { + printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level); + return {{},{},{},{}}; + } + + std::vector grids; + grids.resize(num_level); + + std::vector seq2pos; + auto& seq2grid = grids[0].seq2grid; + auto& seq2normal = grids[0].seq2normal; + auto& grid2seq = grids[0].grid2seq; + grids[0].resolution = resolution; + grids[0].stride = 1; + + auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); + auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); + + for (int v = 0; v < 3; ++v) { + int num_layers = view_layer_positions[v].size(0); + int height = view_layer_positions[v].size(1); + int width = view_layer_positions[v].size(2); + float* data = view_layer_positions[v].data_ptr(); + float* data_normal = view_layer_normals[v].data_ptr(); + for (int l = 0; l < num_layers; ++l) { + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + float* p = &data[(i * width + j) * 4]; + float* n = &data_normal[(i * width + j) * 3]; + if (p[3] == 0) + continue; + auto k = pos2key(p, resolution); + if (!grid2seq.count(k)) { + int dim = 0; + for (int d = 0; d < 3; ++d) { + if (std::abs(n[d]) > std::abs(n[dim])) + dim = d; + } + dim = (dim + 1) % 3; + grid2seq[k] = seq2grid.size(); + seq2grid.emplace_back(k); + seq2pos.push_back(p[0]); + seq2pos.push_back(p[1]); + seq2pos.push_back(p[2]); + seq2normal.emplace_back(dim); + } + } + } + data += (height * width * 4); + data_normal += (height * width * 3); + } + } + + for (int i = 0; i < num_level - 1; ++i) { + DownsampleGrid(grids[i], grids[i + 1]); + } + + for (int l = 0; l < num_level; ++l) { + grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1); + grids[l].num_origin_seq = grids[l].seq2grid.size(); + for (int d = 0; d < 3; ++d) { + NeighborGrid(grids[l], view_layer_positions, d); + } + } + + for (int i = num_level - 2; i >= 0; --i) { + PadGrid(grids[i], grids[i + 1], view_layer_positions); + } + for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) { + int k = grids[0].seq2grid[i]; + float p[3]; + key2pos(k, grids[0].resolution, p); + seq2pos.push_back(p[0]); + seq2pos.push_back(p[1]); + seq2pos.push_back(p[2]); + } + + std::vector texture_positions(2); + std::vector grid_neighbors(grids.size()); + std::vector grid_downsamples(grids.size() - 1); + std::vector grid_evencorners(grids.size()); + std::vector grid_oddcorners(grids.size()); + + texture_positions[0] = torch::zeros({static_cast(seq2pos.size() / 3), 3}, float_options); + texture_positions[1] = torch::zeros({static_cast(seq2pos.size() / 3)}, float_options); + float* positions_out_ptr = texture_positions[0].data_ptr(); + memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size()); + positions_out_ptr = texture_positions[1].data_ptr(); + for (int i = 0; i < grids[0].seq2grid.size(); ++i) { + positions_out_ptr[i] = (i < grids[0].num_origin_seq); + } + + for (int i = 0; i < grids.size(); ++i) { + grid_neighbors[i] = torch::zeros({static_cast(grids[i].seq2grid.size()), 9}, int64_options); + int64_t* nptr = grid_neighbors[i].data_ptr(); + for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) { + nptr[j] = grids[i].seq2neighbor[j]; + } + + grid_evencorners[i] = torch::zeros({static_cast(grids[i].seq2evencorner.size())}, int64_options); + int64_t* dptr = grid_evencorners[i].data_ptr(); + for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) { + dptr[j] = grids[i].seq2evencorner[j]; + } + dptr = grid_oddcorners[i].data_ptr(); + for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) { + dptr[j] = grids[i].seq2oddcorner[j]; + } + if (i + 1 < grids.size()) { + grid_downsamples[i] = torch::zeros({static_cast(grids[i].downsample_seq.size())}, int64_options); + int64_t* dptr = grid_downsamples[i].data_ptr(); + for (int j = 0; j < grids[i].downsample_seq.size(); ++j) { + dptr[j] = grids[i].downsample_seq[j]; + } + } + + } + return {texture_positions, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners}; +} + +std::vector> build_hierarchy_with_feat( + std::vector view_layer_positions, + std::vector view_layer_normals, + std::vector view_layer_feats, + int num_level, int resolution) +{ + if (view_layer_positions.size() != 3 || num_level < 1) { + printf("Alert! We require 3 layers and at least 1 level! (%zu %d)\n", view_layer_positions.size(), num_level); + return {{},{},{},{}}; + } + + std::vector grids; + grids.resize(num_level); + + std::vector seq2pos; + std::vector seq2feat; + auto& seq2grid = grids[0].seq2grid; + auto& seq2normal = grids[0].seq2normal; + auto& grid2seq = grids[0].grid2seq; + grids[0].resolution = resolution; + grids[0].stride = 1; + + auto int64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); + auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); + + int feat_channel = 3; + for (int v = 0; v < 3; ++v) { + int num_layers = view_layer_positions[v].size(0); + int height = view_layer_positions[v].size(1); + int width = view_layer_positions[v].size(2); + float* data = view_layer_positions[v].data_ptr(); + float* data_normal = view_layer_normals[v].data_ptr(); + float* data_feat = view_layer_feats[v].data_ptr(); + feat_channel = view_layer_feats[v].size(3); + for (int l = 0; l < num_layers; ++l) { + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + float* p = &data[(i * width + j) * 4]; + float* n = &data_normal[(i * width + j) * 3]; + float* f = &data_feat[(i * width + j) * feat_channel]; + if (p[3] == 0) + continue; + auto k = pos2key(p, resolution); + if (!grid2seq.count(k)) { + int dim = 0; + for (int d = 0; d < 3; ++d) { + if (std::abs(n[d]) > std::abs(n[dim])) + dim = d; + } + dim = (dim + 1) % 3; + grid2seq[k] = seq2grid.size(); + seq2grid.emplace_back(k); + seq2pos.push_back(p[0]); + seq2pos.push_back(p[1]); + seq2pos.push_back(p[2]); + for (int c = 0; c < feat_channel; ++c) { + seq2feat.emplace_back(f[c]); + } + seq2normal.emplace_back(dim); + } + } + } + data += (height * width * 4); + data_normal += (height * width * 3); + data_feat += (height * width * feat_channel); + } + } + + for (int i = 0; i < num_level - 1; ++i) { + DownsampleGrid(grids[i], grids[i + 1]); + } + + for (int l = 0; l < num_level; ++l) { + grids[l].seq2neighbor.resize(grids[l].seq2grid.size() * 9, -1); + grids[l].num_origin_seq = grids[l].seq2grid.size(); + for (int d = 0; d < 3; ++d) { + NeighborGrid(grids[l], view_layer_positions, d); + } + } + + for (int i = num_level - 2; i >= 0; --i) { + PadGrid(grids[i], grids[i + 1], view_layer_positions); + } + for (int i = grids[0].num_origin_seq; i < grids[0].seq2grid.size(); ++i) { + int k = grids[0].seq2grid[i]; + float p[3]; + key2pos(k, grids[0].resolution, p); + seq2pos.push_back(p[0]); + seq2pos.push_back(p[1]); + seq2pos.push_back(p[2]); + for (int c = 0; c < feat_channel; ++c) { + seq2feat.emplace_back(0.5); + } + } + + std::vector texture_positions(2); + std::vector texture_feats(1); + std::vector grid_neighbors(grids.size()); + std::vector grid_downsamples(grids.size() - 1); + std::vector grid_evencorners(grids.size()); + std::vector grid_oddcorners(grids.size()); + + texture_positions[0] = torch::zeros({static_cast(seq2pos.size() / 3), 3}, float_options); + texture_positions[1] = torch::zeros({static_cast(seq2pos.size() / 3)}, float_options); + texture_feats[0] = torch::zeros({static_cast(seq2feat.size() / feat_channel), static_cast(feat_channel)}, float_options); + float* positions_out_ptr = texture_positions[0].data_ptr(); + memcpy(positions_out_ptr, seq2pos.data(), sizeof(float) * seq2pos.size()); + positions_out_ptr = texture_positions[1].data_ptr(); + for (int i = 0; i < grids[0].seq2grid.size(); ++i) { + positions_out_ptr[i] = (i < grids[0].num_origin_seq); + } + float* feats_out_ptr = texture_feats[0].data_ptr(); + memcpy(feats_out_ptr, seq2feat.data(), sizeof(float) * seq2feat.size()); + + for (int i = 0; i < grids.size(); ++i) { + grid_neighbors[i] = torch::zeros({static_cast(grids[i].seq2grid.size()), 9}, int64_options); + int64_t* nptr = grid_neighbors[i].data_ptr(); + for (int j = 0; j < grids[i].seq2neighbor.size(); ++j) { + nptr[j] = grids[i].seq2neighbor[j]; + } + grid_evencorners[i] = torch::zeros({static_cast(grids[i].seq2evencorner.size())}, int64_options); + int64_t* dptr = grid_evencorners[i].data_ptr(); + for (int j = 0; j < grids[i].seq2evencorner.size(); ++j) { + dptr[j] = grids[i].seq2evencorner[j]; + } + dptr = grid_oddcorners[i].data_ptr(); + for (int j = 0; j < grids[i].seq2oddcorner.size(); ++j) { + dptr[j] = grids[i].seq2oddcorner[j]; + } + if (i + 1 < grids.size()) { + grid_downsamples[i] = torch::zeros({static_cast(grids[i].downsample_seq.size())}, int64_options); + int64_t* dptr = grid_downsamples[i].data_ptr(); + for (int j = 0; j < grids[i].downsample_seq.size(); ++j) { + dptr[j] = grids[i].downsample_seq[j]; + } + } + } + return {texture_positions, texture_feats, grid_neighbors, grid_downsamples, grid_evencorners, grid_oddcorners}; +} diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer.cpp b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer.cpp new file mode 100644 index 0000000..c02f1f9 --- /dev/null +++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer.cpp @@ -0,0 +1,139 @@ +#include "rasterizer.h" + +void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, int64_t* zbuffer, float* d, float occlusion_truncation) { + float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0])); + float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0])); + float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1])); + float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1])); + + for (int px = x_min; px < x_max + 1; ++px) { + if (px < 0 || px >= width) + continue; + for (int py = y_min; py < y_max + 1; ++py) { + if (py < 0 || py >= height) + continue; + float vt[2] = {px + 0.5, py + 0.5}; + float baryCentricCoordinate[3]; + calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate); + if (isBarycentricCoordInBounds(baryCentricCoordinate)) { + int pixel = py * width + px; + if (zbuffer == 0) { + zbuffer[pixel] = (int64_t)(idx + 1); + continue; + } + + float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2]; + float depth_thres = 0; + if (d) { + depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation; + } + + int z_quantize = depth * (2<<17); + int64_t token = (int64_t)z_quantize * MAXINT + (int64_t)(idx + 1); + if (depth < depth_thres) + continue; + zbuffer[pixel] = std::min(zbuffer[pixel], token); + } + } + } +} + +void barycentricFromImgcoordCPU(float* V, int* F, int* findices, int64_t* zbuffer, int width, int height, int num_vertices, int num_faces, + float* barycentric_map, int pix) +{ + int64_t f = zbuffer[pix] % MAXINT; + if (f == (MAXINT-1)) { + findices[pix] = 0; + barycentric_map[pix * 3] = 0; + barycentric_map[pix * 3 + 1] = 0; + barycentric_map[pix * 3 + 2] = 0; + return; + } + findices[pix] = f; + f -= 1; + float barycentric[3] = {0, 0, 0}; + if (f >= 0) { + float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f}; + float* vt0_ptr = V + (F[f * 3] * 4); + float* vt1_ptr = V + (F[f * 3 + 1] * 4); + float* vt2_ptr = V + (F[f * 3 + 2] * 4); + + float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f}; + float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f}; + float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f}; + + calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric); + + barycentric[0] = barycentric[0] / vt0_ptr[3]; + barycentric[1] = barycentric[1] / vt1_ptr[3]; + barycentric[2] = barycentric[2] / vt2_ptr[3]; + float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]); + barycentric[0] *= w; + barycentric[1] *= w; + barycentric[2] *= w; + + } + barycentric_map[pix * 3] = barycentric[0]; + barycentric_map[pix * 3 + 1] = barycentric[1]; + barycentric_map[pix * 3 + 2] = barycentric[2]; +} + +void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, int64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces, int f) +{ + float* vt0_ptr = V + (F[f * 3] * 4); + float* vt1_ptr = V + (F[f * 3 + 1] * 4); + float* vt2_ptr = V + (F[f * 3 + 2] * 4); + + float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f}; + float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f}; + float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f}; + + rasterizeTriangleCPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc); +} + +std::vector rasterize_image_cpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, + int width, int height, float occlusion_truncation, int use_depth_prior) +{ + int num_faces = F.size(0); + int num_vertices = V.size(0); + auto options = torch::TensorOptions().dtype(torch::kInt32).requires_grad(false); + auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).requires_grad(false); + auto findices = torch::zeros({height, width}, options); + int64_t maxint = (int64_t)MAXINT * (int64_t)MAXINT + (MAXINT - 1); + auto z_min = torch::ones({height, width}, INT64_options) * (int64_t)maxint; + + if (!use_depth_prior) { + for (int i = 0; i < num_faces; ++i) { + rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), 0, + (int64_t*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i); + } + } else { + for (int i = 0; i < num_faces; ++i) + rasterizeImagecoordsKernelCPU(V.data_ptr(), F.data_ptr(), D.data_ptr(), + (int64_t*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces, i); + } + + auto float_options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(false); + auto barycentric = torch::zeros({height, width, 3}, float_options); + for (int i = 0; i < width * height; ++i) + barycentricFromImgcoordCPU(V.data_ptr(), F.data_ptr(), + findices.data_ptr(), (int64_t*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr(), i); + + return {findices, barycentric}; +} + +std::vector rasterize_image(torch::Tensor V, torch::Tensor F, torch::Tensor D, + int width, int height, float occlusion_truncation, int use_depth_prior) +{ + int device_id = V.get_device(); + if (device_id == -1) + return rasterize_image_cpu(V, F, D, width, height, occlusion_truncation, use_depth_prior); + else + return rasterize_image_gpu(V, F, D, width, height, occlusion_truncation, use_depth_prior); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("rasterize_image", &rasterize_image, "Custom image rasterization"); + m.def("build_hierarchy", &build_hierarchy, "Custom image rasterization"); + m.def("build_hierarchy_with_feat", &build_hierarchy_with_feat, "Custom image rasterization"); +} diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer.h b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer.h new file mode 100644 index 0000000..8e198b1 --- /dev/null +++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer.h @@ -0,0 +1,55 @@ +#ifndef RASTERIZER_H_ +#define RASTERIZER_H_ + +#include +#include +#include +#include // For CUDA context +#include + +#define INT64 int64_t +#define MAXINT 2147483647 + +__host__ __device__ inline float calculateSignedArea2(float* a, float* b, float* c) { + return ((c[0] - a[0]) * (b[1] - a[1]) - (b[0] - a[0]) * (c[1] - a[1])); +} + +__host__ __device__ inline void calculateBarycentricCoordinate(float* a, float* b, float* c, float* p, + float* barycentric) +{ + float beta_tri = calculateSignedArea2(a, p, c); + float gamma_tri = calculateSignedArea2(a, b, p); + float area = calculateSignedArea2(a, b, c); + if (area == 0) { + barycentric[0] = -1.0; + barycentric[1] = -1.0; + barycentric[2] = -1.0; + return; + } + float tri_inv = 1.0 / area; + float beta = beta_tri * tri_inv; + float gamma = gamma_tri * tri_inv; + float alpha = 1.0 - beta - gamma; + barycentric[0] = alpha; + barycentric[1] = beta; + barycentric[2] = gamma; +} + +__host__ __device__ inline bool isBarycentricCoordInBounds(float* barycentricCoord) { + return barycentricCoord[0] >= 0.0 && barycentricCoord[0] <= 1.0 && + barycentricCoord[1] >= 0.0 && barycentricCoord[1] <= 1.0 && + barycentricCoord[2] >= 0.0 && barycentricCoord[2] <= 1.0; +} + +std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, + int width, int height, float occlusion_truncation, int use_depth_prior); + +std::vector> build_hierarchy(std::vector view_layer_positions, std::vector view_layer_normals, int num_level, int resolution); + +std::vector> build_hierarchy_with_feat( + std::vector view_layer_positions, + std::vector view_layer_normals, + std::vector view_layer_feats, + int num_level, int resolution); + +#endif \ No newline at end of file diff --git a/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer_gpu.cu b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer_gpu.cu new file mode 100644 index 0000000..ba6ab91 --- /dev/null +++ b/hy3dpaint/custom_rasterizer/lib/custom_rasterizer_kernel_for_windows/rasterizer_gpu.cu @@ -0,0 +1,127 @@ +#include "rasterizer.h" + +__device__ void rasterizeTriangleGPU(int idx, float* vt0, float* vt1, float* vt2, int width, int height, uint64_t* zbuffer, float* d, float occlusion_truncation) { + float x_min = std::min(vt0[0], std::min(vt1[0],vt2[0])); + float x_max = std::max(vt0[0], std::max(vt1[0],vt2[0])); + float y_min = std::min(vt0[1], std::min(vt1[1],vt2[1])); + float y_max = std::max(vt0[1], std::max(vt1[1],vt2[1])); + + for (int px = x_min; px < x_max + 1; ++px) { + if (px < 0 || px >= width) + continue; + for (int py = y_min; py < y_max + 1; ++py) { + if (py < 0 || py >= height) + continue; + float vt[2] = {px + 0.5f, py + 0.5f}; + float baryCentricCoordinate[3]; + calculateBarycentricCoordinate(vt0, vt1, vt2, vt, baryCentricCoordinate); + if (isBarycentricCoordInBounds(baryCentricCoordinate)) { + int pixel = py * width + px; + if (zbuffer == 0) { + atomicExch(&zbuffer[pixel], (uint64_t)(idx + 1)); + continue; + } + float depth = baryCentricCoordinate[0] * vt0[2] + baryCentricCoordinate[1] * vt1[2] + baryCentricCoordinate[2] * vt2[2]; + float depth_thres = 0; + if (d) { + depth_thres = d[pixel] * 0.49999f + 0.5f + occlusion_truncation; + } + + int z_quantize = depth * (2<<17); + uint64_t token = (uint64_t)z_quantize * MAXINT + (uint64_t)(idx + 1); + if (depth < depth_thres) + continue; + atomicMin(&zbuffer[pixel], token); + } + } + } +} + +__global__ void barycentricFromImgcoordGPU(float* V, int* F, int* findices, uint64_t* zbuffer, int width, int height, int num_vertices, int num_faces, + float* barycentric_map) +{ + int pix = blockIdx.x * blockDim.x + threadIdx.x; + if (pix >= width * height) + return; + uint64_t f = zbuffer[pix] % MAXINT; + if (f == (MAXINT-1)) { + findices[pix] = 0; + barycentric_map[pix * 3] = 0; + barycentric_map[pix * 3 + 1] = 0; + barycentric_map[pix * 3 + 2] = 0; + return; + } + findices[pix] = f; + f -= 1; + float barycentric[3] = {0, 0, 0}; + if (f >= 0) { + float vt[2] = {float(pix % width) + 0.5f, float(pix / width) + 0.5f}; + float* vt0_ptr = V + (F[f * 3] * 4); + float* vt1_ptr = V + (F[f * 3 + 1] * 4); + float* vt2_ptr = V + (F[f * 3 + 2] * 4); + + float vt0[2] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f}; + float vt1[2] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f}; + float vt2[2] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f}; + + calculateBarycentricCoordinate(vt0, vt1, vt2, vt, barycentric); + + barycentric[0] = barycentric[0] / vt0_ptr[3]; + barycentric[1] = barycentric[1] / vt1_ptr[3]; + barycentric[2] = barycentric[2] / vt2_ptr[3]; + float w = 1.0f / (barycentric[0] + barycentric[1] + barycentric[2]); + barycentric[0] *= w; + barycentric[1] *= w; + barycentric[2] *= w; + + } + barycentric_map[pix * 3] = barycentric[0]; + barycentric_map[pix * 3 + 1] = barycentric[1]; + barycentric_map[pix * 3 + 2] = barycentric[2]; +} + +__global__ void rasterizeImagecoordsKernelGPU(float* V, int* F, float* d, uint64_t* zbuffer, float occlusion_trunc, int width, int height, int num_vertices, int num_faces) +{ + int f = blockIdx.x * blockDim.x + threadIdx.x; + if (f >= num_faces) + return; + + float* vt0_ptr = V + (F[f * 3] * 4); + float* vt1_ptr = V + (F[f * 3 + 1] * 4); + float* vt2_ptr = V + (F[f * 3 + 2] * 4); + + float vt0[3] = {(vt0_ptr[0] / vt0_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt0_ptr[1] / vt0_ptr[3]) * (height - 1) + 0.5f, vt0_ptr[2] / vt0_ptr[3] * 0.49999f + 0.5f}; + float vt1[3] = {(vt1_ptr[0] / vt1_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt1_ptr[1] / vt1_ptr[3]) * (height - 1) + 0.5f, vt1_ptr[2] / vt1_ptr[3] * 0.49999f + 0.5f}; + float vt2[3] = {(vt2_ptr[0] / vt2_ptr[3] * 0.5f + 0.5f) * (width - 1) + 0.5f, (0.5f + 0.5f * vt2_ptr[1] / vt2_ptr[3]) * (height - 1) + 0.5f, vt2_ptr[2] / vt2_ptr[3] * 0.49999f + 0.5f}; + + rasterizeTriangleGPU(f, vt0, vt1, vt2, width, height, zbuffer, d, occlusion_trunc); +} + +std::vector rasterize_image_gpu(torch::Tensor V, torch::Tensor F, torch::Tensor D, + int width, int height, float occlusion_truncation, int use_depth_prior) +{ + int device_id = V.get_device(); + cudaSetDevice(device_id); + int num_faces = F.size(0); + int num_vertices = V.size(0); + auto options = torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA, device_id).requires_grad(false); + auto INT64_options = torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA, device_id).requires_grad(false); + auto findices = torch::zeros({height, width}, options); + uint64_t maxint = (uint64_t)MAXINT * (uint64_t)MAXINT + (MAXINT - 1); + auto z_min = torch::ones({height, width}, INT64_options) * (uint64_t)maxint; + + if (!use_depth_prior) { + rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), 0, + (uint64_t*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces); + } else { + rasterizeImagecoordsKernelGPU<<<(num_faces+255)/256,256,0,at::cuda::getCurrentCUDAStream()>>>(V.data_ptr(), F.data_ptr(), D.data_ptr(), + (uint64_t*)z_min.data_ptr(), occlusion_truncation, width, height, num_vertices, num_faces); + } + + auto float_options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA, device_id).requires_grad(false); + auto barycentric = torch::zeros({height, width, 3}, float_options); + barycentricFromImgcoordGPU<<<(width * height + 255)/256, 256>>>(V.data_ptr(), F.data_ptr(), + findices.data_ptr(), (uint64_t*)z_min.data_ptr(), width, height, num_vertices, num_faces, barycentric.data_ptr()); + + return {findices, barycentric}; +}