🐛 take 'dataset scale' into account when using orthographic camera

ThomasParistech · ThomasParistech · commit 891d858a63b9 · 2022-06-23T14:59:24.000+02:00
diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh
@@ -264,7 +264,8 @@ inline __host__ __device__ Ray pixel_to_ray(
 	const ECameraMode camera_mode = ECameraMode::Perspective,
 	const CameraDistortion& camera_distortion = {},
 	const float* __restrict__ distortion_data = nullptr,
-	const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero()
+	const Eigen::Vector2i distortion_resolution = Eigen::Vector2i::Zero(),
+	const float dataset_scale = 1.f
 ) {
 	Eigen::Vector2f offset = ld_random_pixel_offset(snap_to_pixel_centers ? 0 : spp);
 	Eigen::Vector2f uv = (pixel.cast<float>() + offset).cwiseQuotient(resolution.cast<float>());
@@ -273,12 +274,16 @@ inline __host__ __device__ Ray pixel_to_ray(
 	Eigen::Vector3f dir;
 	Eigen::Vector3f head_pos;
 	if(camera_mode == ECameraMode::Orthographic){
+		// 'dataset_scale' argument is only required by the orthographic camera.
+		// The focal length of Environment and Perspective cameras isn't affected by the change of dataset_scale,
+		// because all rays originate from the same point
 		dir = {0.f, 0.f, 1.f}; // Camera forward
 		head_pos = {
 			(uv.x() - screen_center.x()) * (float)resolution.x() / focal_length.x(),
 			(uv.y() - screen_center.y()) * (float)resolution.y() / focal_length.y(),
 			0.0f
 		};
+		head_pos *= dataset_scale;
 		head_pos += shift;
    		dir -= shift / parallax_shift.z(); // we could use focus_z here in the denominator. for now, we pack m_scale in here.
 	}
@@ -342,7 +347,8 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel(
 	const Eigen::Vector2f& screen_center,
 	const Eigen::Vector3f& parallax_shift,
 	const ECameraMode camera_mode,
-	const CameraDistortion& camera_distortion = {}
+	const CameraDistortion& camera_distortion = {},
+	const float dataset_scale = 1.f
 ) {
 	// We get 'pos' as an input. We have pos = origin + alpha*dir, with unknown alpha
 	// tmp_dir = R^-1*(pos-t)
@@ -356,7 +362,8 @@ inline __host__ __device__ Eigen::Vector2f pos_to_pixel(
 		// origin = R*(head_pos+shift) + t
 		tmp_dir -= shift;
 		const Eigen::Vector3f head_dir_minus_shift = Eigen::Vector3f(0.f, 0.f, 1.f) - shift/parallax_shift.z();
-		const Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1
+		Eigen::Vector3f head_pos = tmp_dir - tmp_dir.z() * head_dir_minus_shift; // Gives head_pos.z=0 since head_dir_minus_shift.z=1
+		head_pos /= dataset_scale;
 		return {
 			head_pos.x() * focal_length.x() + screen_center.x() * resolution.x(),
 			head_pos.y() * focal_length.y() + screen_center.y() * resolution.y(),
@@ -412,7 +419,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
 	const bool snap_to_pixel_centers,
 	const float depth,
 	const ECameraMode camera_mode,
-	const CameraDistortion& camera_distortion = {}
+	const CameraDistortion& camera_distortion = {},
+	const float dataset_scale = 1.f
 ) {
 	Ray ray = pixel_to_ray(
 		sample_index,
@@ -428,7 +436,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
 		camera_mode,
 		camera_distortion,
 		nullptr,
-		Eigen::Vector2i::Zero()
+		Eigen::Vector2i::Zero(),
+		dataset_scale
 	);
 
 	Eigen::Vector2f prev_pixel = pos_to_pixel(
@@ -439,7 +448,8 @@ inline __host__ __device__ Eigen::Vector2f motion_vector_3d(
 		screen_center,
 		parallax_shift,
 		camera_mode,
-		camera_distortion
+		camera_distortion,
+		dataset_scale
 	);
 
 	return prev_pixel - (pixel.cast<float>() + ld_random_pixel_offset(sample_index));
diff --git a/include/neural-graphics-primitives/testbed.h b/include/neural-graphics-primitives/testbed.h
@@ -157,7 +157,8 @@ class Testbed {
 			float cone_angle_constant,
 			ERenderMode render_mode,
 			ECameraMode camera_mode,
-			cudaStream_t stream
+			cudaStream_t stream,
+			float dataset_scale
 		);
 
 		uint32_t trace(
diff --git a/src/testbed.cu b/src/testbed.cu
@@ -2338,7 +2338,8 @@ __global__ void dlss_prep_kernel(
 	const Vector2f image_pos,
 	const Vector2f prev_image_pos,
 	const Vector2i image_resolution,
-	const ECameraMode camera_mode
+	const ECameraMode camera_mode,
+	const float dataset_scale = 1.f
 ) {
 	uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
 	uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
@@ -2377,7 +2378,8 @@ __global__ void dlss_prep_kernel(
 		snap_to_pixel_centers,
 		depth,
 		camera_mode,
-		camera_distortion
+		camera_distortion,
+		dataset_scale
 	);
 
 	surf2Dwrite(make_float2(mvec.x(), mvec.y()), mvec_surface, x_orig * sizeof(float2), y_orig);
@@ -2540,7 +2542,8 @@ void Testbed::render_frame(const Matrix<float, 3, 4>& camera_matrix0, const Matr
 			m_image.pos,
 			m_image.prev_pos,
 			m_image.resolution,
-			m_camera_mode
+			m_camera_mode,
+			m_nerf.training.dataset.scale
 		);
 
 		render_buffer.set_dlss_sharpening(m_dlss_sharpening);
diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu
@@ -1781,7 +1781,8 @@ __global__ void init_rays_with_payload_kernel_nerf(
 	const float* __restrict__ distortion_data,
 	const Vector2i distortion_resolution,
 	ERenderMode render_mode,
-	ECameraMode camera_mode
+	ECameraMode camera_mode,
+	float dataset_scale
 ) {
 	uint32_t x = threadIdx.x + blockDim.x * blockIdx.x;
 	uint32_t y = threadIdx.y + blockDim.y * blockIdx.y;
@@ -1815,7 +1816,8 @@ __global__ void init_rays_with_payload_kernel_nerf(
 		camera_mode,
 		camera_distortion,
 		distortion_data,
-		distortion_resolution
+		distortion_resolution,
+		dataset_scale
 	);
 
 	NerfPayload& payload = payloads[idx];
@@ -1963,7 +1965,8 @@ void Testbed::NerfTracer::init_rays_from_camera(
 	float cone_angle_constant,
 	ERenderMode render_mode,
 	ECameraMode camera_mode,
-	cudaStream_t stream
+	cudaStream_t stream,
+	float dataset_scale
 ) {
 	// Make sure we have enough memory reserved to render at the requested resolution
 	size_t n_pixels = (size_t)resolution.x() * resolution.y();
@@ -1994,7 +1997,8 @@ void Testbed::NerfTracer::init_rays_from_camera(
 		distortion_data,
 		distortion_resolution,
 		render_mode,
-		camera_mode
+		camera_mode,
+		dataset_scale
 	);
 
 	m_n_rays_initialized = resolution.x() * resolution.y();
@@ -2258,7 +2262,8 @@ void Testbed::render_nerf(CudaRenderBuffer& render_buffer, const Vector2i& max_r
 		m_nerf.cone_angle_constant,
 		render_mode,
 		m_camera_mode,
-		stream
+		stream,
+		m_nerf.training.dataset.scale
 	);
 
 	uint32_t n_hit;