Source code for pyetsimul.core.camera

"""Pinhole camera model for eye tracking simulation.

Implements camera projection, pan-tilt, and image capture for synthetic eye tracking experiments.
Supports both simple pinhole cameras and realistic cameras with distortion from OpenCV calibration.
"""

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

import cv2
import numpy as np

from pyetsimul.log import info, table

from ..camera_noise import GlintNoiseConfig, apply_glint_noise
from ..optics.glint_size import compute_glint_diameter
from ..types import (
    CameraImage,
    CameraMatrix,
    Point2D,
    Position3D,
    ProjectionResult,
    RotationMatrix,
    TransformationMatrix,
)
from .default_configs import CameraDefaults
from .light import Light

if TYPE_CHECKING:
    from .eye import Eye


[docs] @dataclass class Camera: """Pinhole camera model for eye tracking. Key components: - trans: Camera to world transformation matrix - rest_trans: Rest position transformation (for pan-tilt cameras) - camera_matrix: CameraMatrix with focal_length and resolution properties - dist_coeffs: OpenCV distortion coefficients (default: no distortion) - err: Random error amount (default: 0.0) - err_type: Error distribution type ('gaussian' or 'uniform') - glint_noise_config: GlintNoiseConfig for corneal reflection detection noise (default: None) Usage: - Default pinhole: Camera() - Custom pinhole: c = Camera(); c.camera_matrix.focal_length = 1000 - Realistic camera: Camera(camera_matrix=CameraMatrix(matrix), dist_coeffs=coeffs) """ camera_matrix: CameraMatrix = field(default_factory=CameraMatrix) dist_coeffs: np.ndarray | None = None err: float = CameraDefaults.MEASUREMENT_ERROR err_type: str = "gaussian" glint_noise_config: GlintNoiseConfig | None = None name: str | None = None trans: TransformationMatrix = field(default_factory=TransformationMatrix.identity) rest_trans: TransformationMatrix = field(init=False) # Internal field to track where camera is pointing (set by point_at method) _pointing_at: Position3D | None = field(default=None, init=False) def __post_init__(self) -> None: """Initialize camera with default values.""" # Store rest position for pan-tilt operations self.rest_trans = self.trans.copy() if self.dist_coeffs is None: self.dist_coeffs = np.zeros(5) else: self.dist_coeffs = np.asarray(self.dist_coeffs, dtype=np.float64) @property def orientation(self) -> RotationMatrix: """Get/set the camera's orientation (3x3 rotation matrix).""" return self.trans.get_rotation() @orientation.setter def orientation(self, value: RotationMatrix) -> None: """Set the camera's orientation matrix. Args: value: 3x3 rotation matrix (must be right-handed with determinant = +1) Raises: ValueError: If the matrix is not right-handed (det ≠ +1) """ # RotationMatrix type already validates during construction self.trans[:3, :3] = value @property def position(self) -> Position3D: """Get/set the camera's position (3D vector).""" return Position3D.from_array(self.trans[:3, 3]) @position.setter def position(self, value: Position3D) -> None: self.trans[:3, 3] = np.array(value)[:3] # Extract x,y,z from homogeneous coordinates @property def pointing_at(self) -> Position3D | None: """Get the position that the camera is currently pointing at. Returns None if point_at() has never been called. """ return self._pointing_at
[docs] def project(self, pos: Position3D | list[Position3D]) -> ProjectionResult: """Projects points in space onto the camera's image plane. Transforms 3D positions to camera coordinates and projects to image plane. Adds random error based on camera settings and validates image bounds. Uses OpenCV for realistic camera projection when pinhole_mode=False. Args: pos: 3D positions to project. Can be: - Single Position3D object - list of Position3D objects Returns: ProjectionResult containing: - image_points: 2xn matrix of image coordinates (NaN for invalid points) - distances: 1xn array of distances from camera along optical axis - valid_mask: 1xn boolean array indicating points within image bounds """ # Convert input to homogeneous coordinates matrix if isinstance(pos, Position3D): # Single position pos_homogeneous = np.array(pos).reshape(-1, 1) elif isinstance(pos, list) and all(isinstance(p, Position3D) for p in pos): # list of positions pos_homogeneous = np.column_stack([np.array(p) for p in pos]) else: raise ValueError(f"Position must be Position3D or list of Position3D objects, got: {type(pos)}") # Transform to camera coordinates pos_camera = np.linalg.solve(self.trans, pos_homogeneous) # Calculate distances along optical axis dist = -pos_camera[2, :] points_3d = pos_camera[:3, :].T points_3d_opencv = points_3d.copy() points_3d_opencv[:, 2] = -points_3d_opencv[:, 2] # Flip Z coordinate system points_3d_opencv = points_3d_opencv.reshape(-1, 1, 3).astype(np.float64) points_2d, _ = cv2.projectPoints( points_3d_opencv, np.zeros(3), np.zeros(3), self.camera_matrix.matrix, self.dist_coeffs ) # Convert to center-origin coordinate system cx = self.camera_matrix.matrix[0, 2] cy = self.camera_matrix.matrix[1, 2] x = points_2d.reshape(-1, 2).T x[0, :] -= cx x[1, :] -= cy # Add error based on error type rng = np.random.default_rng() if self.err_type == "uniform": x += self.err * (2 * rng.random((2, pos_camera.shape[1])) - 1) elif self.err_type == "gaussian": x += self.err * rng.normal(0, 1, (pos_camera.shape[1], 2)).T else: raise ValueError(f"Unknown error type: {self.err_type}") # Check which points are within image bounds and in front of camera # Small tolerance for floating-point boundary comparisons resolution = self.camera_matrix.resolution boundary_tol = 1e-10 condition = ( (x[0, :] >= -resolution.x / 2 - boundary_tol) & (x[0, :] <= resolution.x / 2 + boundary_tol) & (x[1, :] >= -resolution.y / 2 - boundary_tol) & (x[1, :] <= resolution.y / 2 + boundary_tol) & (dist > 0) # Points must be in front of camera ) # Set out-of-bounds points to NaN invalid_mask = ~condition x[:, invalid_mask] = np.nan return ProjectionResult(image_points=x, distances=dist, valid_mask=condition)
[docs] def unproject( self, image_points: Point2D | list[Point2D], distance: float | np.ndarray ) -> Position3D | list[Position3D]: """Unprojects points on the image plane back into 3D space. Reconstructs 3D positions from 2D image points at specified distance. Uses inverse projection to map image coordinates to world coordinates. Uses OpenCV for realistic camera unprojection when pinhole_mode=False. Args: image_points: 2D image points. Can be: - Single Point2D object - list of Point2D objects distance: Distance from camera along optical axis Returns: Position3D object(s) in world coordinates """ # Convert input to numpy array format if isinstance(image_points, Point2D): # Single point x = np.array([[image_points.x], [image_points.y]]) single_point = True elif isinstance(image_points, list) and all(isinstance(p, Point2D) for p in image_points): # list of points x = np.array([[p.x for p in image_points], [p.y for p in image_points]]) single_point = False else: raise ValueError(f"Image points must be Point2D or list of Point2D objects, got: {type(image_points)}") n = x.shape[1] # Convert distance to numpy array if needed d = np.full(n, distance) if isinstance(distance, (int, float)) else np.asarray(distance) # Convert from center-origin to top-left-origin coordinate system cx = self.camera_matrix.matrix[0, 2] cy = self.camera_matrix.matrix[1, 2] x_opencv = x.copy().astype(np.float64) x_opencv[0, :] += cx x_opencv[1, :] += cy points_2d_normalized = cv2.undistortPoints( x_opencv.T.reshape(-1, 1, 2).astype(np.float64), self.camera_matrix.matrix, self.dist_coeffs ) points_3d = points_2d_normalized.reshape(-1, 2) * d.reshape(-1, 1) camera_coords = np.column_stack([ points_3d[:, 0], points_3d[:, 1], -d, # Camera faces -Z axis np.ones(len(points_3d)), ]).T # Transform to world coordinates world_coords = self.trans @ camera_coords # Convert result back to Position3D objects if single_point: return Position3D.from_array(world_coords[:, 0]) return [Position3D.from_array(world_coords[:, i]) for i in range(n)]
[docs] def pan_tilt(self, look_at: Position3D, world_frame: RotationMatrix | None = None) -> None: """Pans and tilts a camera towards a certain location. Orients camera to look directly at specified point in world coordinates. Modifies transformation matrix around camera's coordinate system origin. Args: look_at: Point to look at in world coordinates world_frame: Optional world coordinate frame for camera orientation """ # Convert to homogeneous coordinates for transformation look_at_homogeneous = np.array(look_at) # Transform look_at point to camera's rest coordinate system axis_homogeneous = np.linalg.solve(self.rest_trans, look_at_homogeneous) # Extract and normalize the 3D direction vector axis = axis_homogeneous[:3] / np.linalg.norm(axis_homogeneous[:3]) # Calculate pan and tilt angles # Handle special case where both axis[2] and axis[0] are zero alpha = 0.0 if abs(axis[2]) < 1e-10 and abs(axis[0]) < 1e-10 else np.pi / 2 - np.arctan2(-axis[2], axis[0]) beta = np.arcsin(axis[1]) # Construct pan matrix (rotation around Y axis) pan_matrix = np.array([ [np.cos(alpha), 0, -np.sin(alpha), 0], [0, 1, 0, 0], [np.sin(alpha), 0, np.cos(alpha), 0], [0, 0, 0, 1], ]) # Construct tilt matrix (rotation around X axis) tilt_matrix = np.array([ [1, 0, 0, 0], [0, np.cos(beta), -np.sin(beta), 0], [0, np.sin(beta), np.cos(beta), 0], [0, 0, 0, 1], ]) # Apply pan and tilt transformations self.trans = TransformationMatrix(self.rest_trans @ pan_matrix @ tilt_matrix) # If world_frame is specified, align camera with world coordinate frame if world_frame is not None: self._align_with_world_frame(world_frame)
def _align_with_world_frame(self, world_frame: RotationMatrix) -> None: """Align camera orientation with world coordinate frame while preserving viewing direction.""" # Get current camera viewing direction (where camera is pointing) current_rotation = self.trans[:3, :3] viewing_direction = -current_rotation[:, 2] # Camera looks along -Z viewing_direction /= np.linalg.norm(viewing_direction) # Use the world's up vector as a reference to define the camera's orientation world_up = -world_frame[:, 2] # Create the camera's x-axis (perpendicular to viewing direction and world up) camera_x = np.cross(viewing_direction, world_up) # If viewing direction is parallel to world up, use world x-axis as fallback if np.linalg.norm(camera_x) < 1e-6: world_x = world_frame[:, 0] camera_x = np.cross(viewing_direction, world_x) camera_x /= np.linalg.norm(camera_x) # Compute camera y-axis orthogonal to x-axis and viewing direction camera_y = np.cross(camera_x, viewing_direction) # Construct orthogonal rotation matrix: [x, y, -viewing] as columns camera_rotation = np.column_stack((camera_x, camera_y, -viewing_direction)) # Update camera's orientation self.trans[:3, :3] = camera_rotation
[docs] def point_at(self, target_point: Position3D, world_frame: RotationMatrix | None = None) -> None: """Points camera towards a certain location. Changes camera's rest position to point at specified target. Updates both rest_trans and trans matrices accordingly. Differs from pan_tilt() by modifying the rest position. Args: target_point: Point to point at in world coordinates world_frame: Optional world coordinate frame for camera orientation """ # Store the target point for later reference self._pointing_at = target_point # Store current transformation as rest position self.rest_trans = self.trans.copy() # Pan and tilt towards the target point self.pan_tilt(target_point, world_frame) # Update rest position to the new orientation self.rest_trans = self.trans.copy()
[docs] def point_at_binocular(self, left_eye_pos: Position3D, right_eye_pos: Position3D) -> None: """Point camera at the midpoint between two eyes, rolled so the inter-eye axis is horizontal. Computes the correct camera roll from the inter-eye vector so that both eyes appear horizontally aligned in the camera image. Delegates to point_at(). Args: left_eye_pos: Position of the left eye in world coordinates. right_eye_pos: Position of the right eye in world coordinates. Raises: ValueError: If the camera lies on the inter-eye line (roll is undefined). """ midpoint = left_eye_pos + (right_eye_pos - left_eye_pos) * 0.5 # Inter-eye vector defines the camera's horizontal axis inter_eye = np.array(right_eye_pos)[:3] - np.array(left_eye_pos)[:3] inter_eye /= np.linalg.norm(inter_eye) # Viewing direction: camera to midpoint view_dir = np.array(midpoint)[:3] - np.array(self.position)[:3] view_dir /= np.linalg.norm(view_dir) # Camera "up" is perpendicular to both inter-eye axis and viewing direction up = np.cross(inter_eye, view_dir) up_norm = np.linalg.norm(up) if up_norm < 1e-10: msg = "Camera lies on the inter-eye line — roll is undefined for point_at_binocular." raise ValueError(msg) up /= up_norm # Build world frame: inter-eye as x, derived forward as y, -up as z forward = np.cross(-up, inter_eye) forward /= np.linalg.norm(forward) world_frame = RotationMatrix(np.column_stack([inter_eye, forward, -up])) self.point_at(midpoint, world_frame)
[docs] def take_image( self, eye: "Eye", lights: list[Light] | None = None, use_refraction: bool = True, center_method: str = "ellipse", ) -> CameraImage: """Computes the image of an eye seen by a camera. Generates synthetic eye image with corneal reflections and pupil detection. Uses light sources to create corneal reflections (CRs) on the cornea. Args: eye: Eye object lights: list of light source objects (optional, if None no CRs are computed) use_refraction: Whether to use refraction model for pupil (default True) center_method: Method to use for pupil center detection (default "ellipse") Options: "ellipse", "center_of_mass" Returns: CameraImage object containing corneal reflections, pupil boundary, and pupil center """ # Find the corneal reflections for each light (if lights provided) corneal_reflections: list[Point2D | None] = [] glint_sizes_px: list[float | None] = [] has_any_glint_size = False if lights is not None: for light in lights: # Find 3D corneal reflection position cr_3d = eye.find_cr(light, self) if cr_3d is None: corneal_reflections.append(None) glint_sizes_px.append(None) else: # Project to camera image coordinates using refactored interface projection_result = self.project(cr_3d) if np.any(np.isnan(projection_result.image_points)): corneal_reflections.append(None) glint_sizes_px.append(None) else: # Convert to Point2D cr_2d = Point2D( x=float(projection_result.image_points[0, 0]), y=float(projection_result.image_points[1, 0]), ) # Add noise to glint position if self.glint_noise_config is not None: cr_2d_noisy = apply_glint_noise(cr_2d, self.glint_noise_config) else: cr_2d_noisy = cr_2d corneal_reflections.append(cr_2d_noisy) # Compute glint size if the light has a physical diameter if light.diameter is not None: glint_diameter_3d = compute_glint_diameter( light.position, cr_3d, eye.cornea, eye.trans, light.diameter ) distance_to_cr = float(projection_result.distances[0]) glint_size = self.camera_matrix.focal_length * glint_diameter_3d / distance_to_cr glint_sizes_px.append(glint_size) has_any_glint_size = True else: glint_sizes_px.append(None) # Get pupil boundary and center pupil_boundary, pupil_center = eye.get_pupil_in_camera_image( self, use_refraction=use_refraction, center_method=center_method ) return CameraImage( corneal_reflections=corneal_reflections, pupil_boundary=pupil_boundary, pupil_center=pupil_center, resolution=self.camera_matrix.resolution, glint_sizes_px=glint_sizes_px if has_any_glint_size else None, )
def __str__(self) -> str: """Basic string representation of the camera.""" res = self.camera_matrix.resolution pos = self.position return f"Camera(pos=({pos.x:.1f}, {pos.y:.1f}, {pos.z:.1f})mm, f={self.camera_matrix.focal_length:.0f}px, {res.x}x{res.y})"
[docs] def pprint(self) -> None: """Print detailed camera parameters in a formatted table.""" pos = self.position matrix = self.camera_matrix.matrix res = self.camera_matrix.resolution # Prepare distortion coefficients display if self.dist_coeffs is not None: dist_info = f"k1={self.dist_coeffs[0]:.3f}" if len(self.dist_coeffs) > 1: dist_info += f", k2={self.dist_coeffs[1]:.3f}" if len(self.dist_coeffs) > 4: dist_info += f", p1={self.dist_coeffs[3]:.3f}, p2={self.dist_coeffs[4]:.3f}" else: dist_info = "None (pinhole)" data = [ ["Focal length (px)", f"{self.camera_matrix.focal_length:.1f}"], ["Resolution", f"{res.x} x {res.y}"], ["Principal point (px)", f"({matrix[0, 2]:.1f}, {matrix[1, 2]:.1f})"], ["Position (x,y,z) mm", f"({pos.x:.1f}, {pos.y:.1f}, {pos.z:.1f})"], ["Distortion coefficients", dist_info], ["Measurement error", f"{self.err:.4f}"], ["Error type", self.err_type], ["Glint noise", "Enabled" if self.glint_noise_config else "Disabled"], ] headers = ["Parameter", "Value"] info("Camera Parameters:") table(data, headers=headers, tablefmt="grid")
[docs] def serialize(self) -> dict: """Serialize to dictionary representation.""" return { "position": self.position.serialize(), "orientation": self.orientation.tolist(), "focal_length": float(self.camera_matrix.focal_length), "resolution": self.camera_matrix.resolution.serialize(), "camera_matrix": self.camera_matrix.matrix.tolist(), "distortion_coefficients": self.dist_coeffs.tolist() if self.dist_coeffs is not None else None, "measurement_error": float(self.err), "error_type": self.err_type, "name": self.name, "pointing_at": self._pointing_at.serialize() if self._pointing_at else None, "rest_transformation": self.rest_trans.tolist(), "glint_noise_config": self.glint_noise_config.serialize() if self.glint_noise_config else None, }
[docs] @classmethod def deserialize(cls, data: dict) -> "Camera": """Deserialize from dictionary representation.""" # Create camera with basic parameters camera = cls(err=data["measurement_error"], err_type=data["error_type"], name=data["name"]) # Restore position and orientation camera.position = Position3D.deserialize(data["position"]) camera.orientation = RotationMatrix(np.array(data["orientation"])) # Restore camera matrix camera.camera_matrix = CameraMatrix(np.array(data["camera_matrix"])) # Restore distortion coefficients camera.dist_coeffs = np.array(data["distortion_coefficients"]) # Restore rest transformation camera.rest_trans = TransformationMatrix(np.array(data["rest_transformation"])) # Restore pointing direction if data["pointing_at"]: camera._pointing_at = Position3D.deserialize(data["pointing_at"]) # Restore glint noise config if data["glint_noise_config"]: camera.glint_noise_config = GlintNoiseConfig.deserialize(data["glint_noise_config"]) return camera