Skip to content

Commit 891d858

Browse files
🐛 take 'dataset scale' into account when using orthographic camera
1 parent f956e61 commit 891d858

File tree

4 files changed

+34
-15
lines changed

4 files changed

+34
-15
lines changed

include/neural-graphics-primitives/common_device.cuh

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,8 @@ inline __host__ __device__ Ray pixel_to_ray(
264264
const ECameraMode camera_mode = ECameraMode::Perspective,
265265
const CameraDistortion& camera_distortion = {},
266266
const float* __restrict__ distortion_data = nullptr,
267-
const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero()
267+
const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero(),
268+
const float dataset_scale = 1.f
268269
) {
269270
Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp);
270271
Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());
@@ -273,12 +274,16 @@ inline __host__ __device__ Ray pixel_to_ray(
273274
Eigen::Vector3f dir;
274275
Eigen::Vector3f head_pos;
275276
if(camera_mode == ECameraMode::Orthographic){
277+
// 'dataset_scale' argument is only required by the orthographic camera.
278+
// The focal length of Environment and Perspective cameras isn't affected by the change of dataset_scale,
279+
// because all rays originate from the same point
276280
dir = {0.f, 0.f, 1.f}; // Camera forward
277281
head_pos = {
278282
(uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(),
279283
(uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(),
280284
0.0f
281285
};
286+
head_pos *= dataset_scale;
282287
head_pos += shift;
283288
dir -= shift / parallax_shift.z(); // we could use focus_z here in the denominator. for now, we pack m_scale in here.
284289
}
@@ -342,7 +347,8 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel(
342347
const Eigen::Vector2f& screen_center,
343348
const Eigen::Vector3f& parallax_shift,
344349
const ECameraMode camera_mode,
345-
const CameraDistortion& camera_distortion = {}
350+
const CameraDistortion& camera_distortion = {},
351+
const float dataset_scale = 1.f
346352
) {
347353
// We get 'pos' as an input. We have pos = origin + alpha*dir, with unknown alpha
348354
// tmp_dir = R^-1*(pos-t)
@@ -356,7 +362,8 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel(
356362
// origin = R*(head_pos+shift) + t
357363
tmp_dir -= shift;
358364
const Eigen::Vector3f head_dir_minus_shift = Eigen::Vector3f(0.f, 0.f, 1.f) - shift/parallax_shift.z();
359-
const Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1
365+
Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1
366+
head_pos /= dataset_scale;
360367
return {
361368
head_pos.x() * focal_length.x() + screen_center.x() * resolution.x(),
362369
head_pos.y() * focal_length.y() + screen_center.y() * resolution.y(),
@@ -412,7 +419,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
412419
const bool snap_to_pixel_centers,
413420
const float depth,
414421
const ECameraMode camera_mode,
415-
const CameraDistortion& camera_distortion = {}
422+
const CameraDistortion& camera_distortion = {},
423+
const float dataset_scale = 1.f
416424
) {
417425
Ray ray = pixel_to_ray(
418426
sample_index,
@@ -428,7 +436,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
428436
camera_mode,
429437
camera_distortion,
430438
nullptr,
431-
Eigen::Vector2i::Zero()
439+
Eigen::Vector2i::Zero(),
440+
dataset_scale
432441
);
433442

434443
Eigen::Vector2f prev_pixel = pos_to_pixel(
@@ -439,7 +448,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
439448
screen_center,
440449
parallax_shift,
441450
camera_mode,
442-
camera_distortion
451+
camera_distortion,
452+
dataset_scale
443453
);
444454

445455
return prev_pixel - (pixel.cast<float>() + ld_random_pixel_offset(sample_index));

include/neural-graphics-primitives/testbed.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ class Testbed {
157157
float cone_angle_constant,
158158
ERenderMode render_mode,
159159
ECameraMode camera_mode,
160-
cudaStream_t stream
160+
cudaStream_t stream,
161+
float dataset_scale
161162
);
162163

163164
uint32_t trace(

src/testbed.cu

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2338,7 +2338,8 @@ __global__ void dlss_prep_kernel(
23382338
const Vector2f image_pos,
23392339
const Vector2f prev_image_pos,
23402340
const Vector2i image_resolution,
2341-
const ECameraMode camera_mode
2341+
const ECameraMode camera_mode,
2342+
const float dataset_scale = 1.f
23422343
) {
23432344
uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
23442345
uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
@@ -2377,7 +2378,8 @@ __global__ void dlss_prep_kernel(
23772378
snap_to_pixel_centers,
23782379
depth,
23792380
camera_mode,
2380-
camera_distortion
2381+
camera_distortion,
2382+
dataset_scale
23812383
);
23822384

23832385
surf2Dwrite(make_float2(mvec.x(), mvec.y()), mvec_surface, x_orig * sizeof(float2), y_orig);
@@ -2540,7 +2542,8 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr
25402542
m_image.pos,
25412543
m_image.prev_pos,
25422544
m_image.resolution,
2543-
m_camera_mode
2545+
m_camera_mode,
2546+
m_nerf.training.dataset.scale
25442547
);
25452548

25462549
render_buffer.set_dlss_sharpening(m_dlss_sharpening);

src/testbed_nerf.cu

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,7 +1781,8 @@ __global__ void init_rays_with_payload_kernel_nerf(
17811781
const float* __restrict__ distortion_data,
17821782
const Vector2i distortion_resolution,
17831783
ERenderMode render_mode,
1784-
ECameraMode camera_mode
1784+
ECameraMode camera_mode,
1785+
float dataset_scale
17851786
) {
17861787
uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
17871788
uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
@@ -1815,7 +1816,8 @@ __global__ void init_rays_with_payload_kernel_nerf(
18151816
camera_mode,
18161817
camera_distortion,
18171818
distortion_data,
1818-
distortion_resolution
1819+
distortion_resolution,
1820+
dataset_scale
18191821
);
18201822

18211823
NerfPayload& payload = payloads[idx];
@@ -1963,7 +1965,8 @@ void Testbed::NerfTracer::init_rays_from_camera(
19631965
float cone_angle_constant,
19641966
ERenderMode render_mode,
19651967
ECameraMode camera_mode,
1966-
cudaStream_t stream
1968+
cudaStream_t stream,
1969+
float dataset_scale
19671970
) {
19681971
// Make sure we have enough memory reserved to render at the requested resolution
19691972
size_t n_pixels = (size_t)resolution.x() * resolution.y();
@@ -1994,7 +1997,8 @@ void Testbed::NerfTracer::init_rays_from_camera(
19941997
distortion_data,
19951998
distortion_resolution,
19961999
render_mode,
1997-
camera_mode
2000+
camera_mode,
2001+
dataset_scale
19982002
);
19992003

20002004
m_n_rays_initialized = resolution.x() * resolution.y();
@@ -2258,7 +2262,8 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r
22582262
m_nerf.cone_angle_constant,
22592263
render_mode,
22602264
m_camera_mode,
2261-
stream
2265+
stream,
2266+
m_nerf.training.dataset.scale
22622267
);
22632268

22642269
uint32_t n_hit;

0 commit comments

Comments
 (0)