Source code for pyetsimul.core.camera

"""Pinhole camera model for eye tracking simulation.

Implements camera projection, pan-tilt, and image capture for synthetic eye tracking experiments.
Supports both simple pinhole cameras and realistic cameras with distortion from OpenCV calibration.
"""

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

import cv2
import numpy as np

from pyetsimul.log import info, table

from ..camera_noise import GlintNoiseConfig, apply_glint_noise
from ..optics.glint_size import compute_glint_diameter
from ..types import (
    CameraImage,
    CameraMatrix,
    Point2D,
    Position3D,
    ProjectionResult,
    RotationMatrix,
    TransformationMatrix,
)
from .default_configs import CameraDefaults
from .light import Light

if TYPE_CHECKING:
    from .eye import Eye



[docs]
@dataclass
class Camera:
    """Pinhole camera model for eye tracking.

    Key components:
    - trans: Camera to world transformation matrix
    - rest_trans: Rest position transformation (for pan-tilt cameras)
    - camera_matrix: CameraMatrix with focal_length and resolution properties
    - dist_coeffs: OpenCV distortion coefficients (default: no distortion)
    - err: Random error amount (default: 0.0)
    - err_type: Error distribution type ('gaussian' or 'uniform')
    - glint_noise_config: GlintNoiseConfig for corneal reflection detection noise (default: None)

    Usage:
    - Default pinhole: Camera()
    - Custom pinhole: c = Camera(); c.camera_matrix.focal_length = 1000
    - Realistic camera: Camera(camera_matrix=CameraMatrix(matrix), dist_coeffs=coeffs)
    """

    camera_matrix: CameraMatrix = field(default_factory=CameraMatrix)
    dist_coeffs: np.ndarray | None = None
    err: float = CameraDefaults.MEASUREMENT_ERROR
    err_type: str = "gaussian"
    glint_noise_config: GlintNoiseConfig | None = None
    name: str | None = None
    trans: TransformationMatrix = field(default_factory=TransformationMatrix.identity)
    rest_trans: TransformationMatrix = field(init=False)

    # Internal field to track where camera is pointing (set by point_at method)
    _pointing_at: Position3D | None = field(default=None, init=False)

    def __post_init__(self) -> None:
        """Initialize camera with default values."""
        # Store rest position for pan-tilt operations
        self.rest_trans = self.trans.copy()

        if self.dist_coeffs is None:
            self.dist_coeffs = np.zeros(5)
        else:
            self.dist_coeffs = np.asarray(self.dist_coeffs, dtype=np.float64)

    @property
    def orientation(self) -> RotationMatrix:
        """Get/set the camera's orientation (3x3 rotation matrix)."""
        return self.trans.get_rotation()

    @orientation.setter
    def orientation(self, value: RotationMatrix) -> None:
        """Set the camera's orientation matrix.

        Args:
            value: 3x3 rotation matrix (must be right-handed with determinant = +1)

        Raises:
            ValueError: If the matrix is not right-handed (det ≠ +1)

        """
        # RotationMatrix type already validates during construction

        self.trans[:3, :3] = value

    @property
    def position(self) -> Position3D:
        """Get/set the camera's position (3D vector)."""
        return Position3D.from_array(self.trans[:3, 3])

    @position.setter
    def position(self, value: Position3D) -> None:
        self.trans[:3, 3] = np.array(value)[:3]  # Extract x,y,z from homogeneous coordinates

    @property
    def pointing_at(self) -> Position3D | None:
        """Get the position that the camera is currently pointing at.

        Returns None if point_at() has never been called.
        """
        return self._pointing_at


[docs]
    def project(self, pos: Position3D | list[Position3D]) -> ProjectionResult:
        """Projects points in space onto the camera's image plane.

        Transforms 3D positions to camera coordinates and projects to image plane.
        Adds random error based on camera settings and validates image bounds.
        Uses OpenCV for realistic camera projection when pinhole_mode=False.

        Args:
            pos: 3D positions to project. Can be:
                - Single Position3D object
                - list of Position3D objects

        Returns:
            ProjectionResult containing:
            - image_points: 2xn matrix of image coordinates (NaN for invalid points)
            - distances: 1xn array of distances from camera along optical axis
            - valid_mask: 1xn boolean array indicating points within image bounds

        """
        # Convert input to homogeneous coordinates matrix
        if isinstance(pos, Position3D):
            # Single position
            pos_homogeneous = np.array(pos).reshape(-1, 1)
        elif isinstance(pos, list) and all(isinstance(p, Position3D) for p in pos):
            # list of positions
            pos_homogeneous = np.column_stack([np.array(p) for p in pos])
        else:
            raise ValueError(f"Position must be Position3D or list of Position3D objects, got: {type(pos)}")

        # Transform to camera coordinates
        pos_camera = np.linalg.solve(self.trans, pos_homogeneous)

        # Calculate distances along optical axis
        dist = -pos_camera[2, :]

        points_3d = pos_camera[:3, :].T
        points_3d_opencv = points_3d.copy()
        points_3d_opencv[:, 2] = -points_3d_opencv[:, 2]  # Flip Z coordinate system
        points_3d_opencv = points_3d_opencv.reshape(-1, 1, 3).astype(np.float64)

        points_2d, _ = cv2.projectPoints(
            points_3d_opencv, np.zeros(3), np.zeros(3), self.camera_matrix.matrix, self.dist_coeffs
        )

        # Convert to center-origin coordinate system
        cx = self.camera_matrix.matrix[0, 2]
        cy = self.camera_matrix.matrix[1, 2]
        x = points_2d.reshape(-1, 2).T
        x[0, :] -= cx
        x[1, :] -= cy

        # Add error based on error type
        rng = np.random.default_rng()
        if self.err_type == "uniform":
            x += self.err * (2 * rng.random((2, pos_camera.shape[1])) - 1)
        elif self.err_type == "gaussian":
            x += self.err * rng.normal(0, 1, (pos_camera.shape[1], 2)).T
        else:
            raise ValueError(f"Unknown error type: {self.err_type}")

        # Check which points are within image bounds and in front of camera
        # Small tolerance for floating-point boundary comparisons
        resolution = self.camera_matrix.resolution
        boundary_tol = 1e-10
        condition = (
            (x[0, :] >= -resolution.x / 2 - boundary_tol)
            & (x[0, :] <= resolution.x / 2 + boundary_tol)
            & (x[1, :] >= -resolution.y / 2 - boundary_tol)
            & (x[1, :] <= resolution.y / 2 + boundary_tol)
            & (dist > 0)  # Points must be in front of camera
        )

        # Set out-of-bounds points to NaN
        invalid_mask = ~condition
        x[:, invalid_mask] = np.nan

        return ProjectionResult(image_points=x, distances=dist, valid_mask=condition)



[docs]
    def unproject(
        self, image_points: Point2D | list[Point2D], distance: float | np.ndarray
    ) -> Position3D | list[Position3D]:
        """Unprojects points on the image plane back into 3D space.

        Reconstructs 3D positions from 2D image points at specified distance.
        Uses inverse projection to map image coordinates to world coordinates.
        Uses OpenCV for realistic camera unprojection when pinhole_mode=False.

        Args:
            image_points: 2D image points. Can be:
                - Single Point2D object
                - list of Point2D objects
            distance: Distance from camera along optical axis

        Returns:
            Position3D object(s) in world coordinates

        """
        # Convert input to numpy array format
        if isinstance(image_points, Point2D):
            # Single point
            x = np.array([[image_points.x], [image_points.y]])
            single_point = True
        elif isinstance(image_points, list) and all(isinstance(p, Point2D) for p in image_points):
            # list of points
            x = np.array([[p.x for p in image_points], [p.y for p in image_points]])
            single_point = False
        else:
            raise ValueError(f"Image points must be Point2D or list of Point2D objects, got: {type(image_points)}")

        n = x.shape[1]

        # Convert distance to numpy array if needed
        d = np.full(n, distance) if isinstance(distance, (int, float)) else np.asarray(distance)

        # Convert from center-origin to top-left-origin coordinate system
        cx = self.camera_matrix.matrix[0, 2]
        cy = self.camera_matrix.matrix[1, 2]
        x_opencv = x.copy().astype(np.float64)
        x_opencv[0, :] += cx
        x_opencv[1, :] += cy

        points_2d_normalized = cv2.undistortPoints(
            x_opencv.T.reshape(-1, 1, 2).astype(np.float64), self.camera_matrix.matrix, self.dist_coeffs
        )

        points_3d = points_2d_normalized.reshape(-1, 2) * d.reshape(-1, 1)

        camera_coords = np.column_stack([
            points_3d[:, 0],
            points_3d[:, 1],
            -d,  # Camera faces -Z axis
            np.ones(len(points_3d)),
        ]).T

        # Transform to world coordinates
        world_coords = self.trans @ camera_coords

        # Convert result back to Position3D objects
        if single_point:
            return Position3D.from_array(world_coords[:, 0])
        return [Position3D.from_array(world_coords[:, i]) for i in range(n)]



[docs]
    def pan_tilt(self, look_at: Position3D, world_frame: RotationMatrix | None = None) -> None:
        """Pans and tilts a camera towards a certain location.

        Orients camera to look directly at specified point in world coordinates.
        Modifies transformation matrix around camera's coordinate system origin.

        Args:
            look_at: Point to look at in world coordinates
            world_frame: Optional world coordinate frame for camera orientation

        """
        # Convert to homogeneous coordinates for transformation
        look_at_homogeneous = np.array(look_at)

        # Transform look_at point to camera's rest coordinate system
        axis_homogeneous = np.linalg.solve(self.rest_trans, look_at_homogeneous)

        # Extract and normalize the 3D direction vector
        axis = axis_homogeneous[:3] / np.linalg.norm(axis_homogeneous[:3])

        # Calculate pan and tilt angles
        # Handle special case where both axis[2] and axis[0] are zero
        alpha = 0.0 if abs(axis[2]) < 1e-10 and abs(axis[0]) < 1e-10 else np.pi / 2 - np.arctan2(-axis[2], axis[0])
        beta = np.arcsin(axis[1])

        # Construct pan matrix (rotation around Y axis)
        pan_matrix = np.array([
            [np.cos(alpha), 0, -np.sin(alpha), 0],
            [0, 1, 0, 0],
            [np.sin(alpha), 0, np.cos(alpha), 0],
            [0, 0, 0, 1],
        ])

        # Construct tilt matrix (rotation around X axis)
        tilt_matrix = np.array([
            [1, 0, 0, 0],
            [0, np.cos(beta), -np.sin(beta), 0],
            [0, np.sin(beta), np.cos(beta), 0],
            [0, 0, 0, 1],
        ])

        # Apply pan and tilt transformations
        self.trans = TransformationMatrix(self.rest_trans @ pan_matrix @ tilt_matrix)

        # If world_frame is specified, align camera with world coordinate frame
        if world_frame is not None:
            self._align_with_world_frame(world_frame)


    def _align_with_world_frame(self, world_frame: RotationMatrix) -> None:
        """Align camera orientation with world coordinate frame while preserving viewing direction."""
        # Get current camera viewing direction (where camera is pointing)
        current_rotation = self.trans[:3, :3]
        viewing_direction = -current_rotation[:, 2]  # Camera looks along -Z
        viewing_direction /= np.linalg.norm(viewing_direction)

        # Use the world's up vector as a reference to define the camera's orientation
        world_up = -world_frame[:, 2]

        # Create the camera's x-axis (perpendicular to viewing direction and world up)
        camera_x = np.cross(viewing_direction, world_up)

        # If viewing direction is parallel to world up, use world x-axis as fallback
        if np.linalg.norm(camera_x) < 1e-6:
            world_x = world_frame[:, 0]
            camera_x = np.cross(viewing_direction, world_x)

        camera_x /= np.linalg.norm(camera_x)

        # Compute camera y-axis orthogonal to x-axis and viewing direction
        camera_y = np.cross(camera_x, viewing_direction)

        # Construct orthogonal rotation matrix: [x, y, -viewing] as columns
        camera_rotation = np.column_stack((camera_x, camera_y, -viewing_direction))

        # Update camera's orientation
        self.trans[:3, :3] = camera_rotation


[docs]
    def point_at(self, target_point: Position3D, world_frame: RotationMatrix | None = None) -> None:
        """Points camera towards a certain location.

        Changes camera's rest position to point at specified target.
        Updates both rest_trans and trans matrices accordingly.
        Differs from pan_tilt() by modifying the rest position.

        Args:
            target_point: Point to point at in world coordinates
            world_frame: Optional world coordinate frame for camera orientation

        """
        # Store the target point for later reference
        self._pointing_at = target_point

        # Store current transformation as rest position
        self.rest_trans = self.trans.copy()

        # Pan and tilt towards the target point
        self.pan_tilt(target_point, world_frame)

        # Update rest position to the new orientation
        self.rest_trans = self.trans.copy()



[docs]
    def point_at_binocular(self, left_eye_pos: Position3D, right_eye_pos: Position3D) -> None:
        """Point camera at the midpoint between two eyes, rolled so the inter-eye axis is horizontal.

        Computes the correct camera roll from the inter-eye vector so that both eyes
        appear horizontally aligned in the camera image. Delegates to point_at().

        Args:
            left_eye_pos: Position of the left eye in world coordinates.
            right_eye_pos: Position of the right eye in world coordinates.

        Raises:
            ValueError: If the camera lies on the inter-eye line (roll is undefined).

        """
        midpoint = left_eye_pos + (right_eye_pos - left_eye_pos) * 0.5

        # Inter-eye vector defines the camera's horizontal axis
        inter_eye = np.array(right_eye_pos)[:3] - np.array(left_eye_pos)[:3]
        inter_eye /= np.linalg.norm(inter_eye)

        # Viewing direction: camera to midpoint
        view_dir = np.array(midpoint)[:3] - np.array(self.position)[:3]
        view_dir /= np.linalg.norm(view_dir)

        # Camera "up" is perpendicular to both inter-eye axis and viewing direction
        up = np.cross(inter_eye, view_dir)
        up_norm = np.linalg.norm(up)
        if up_norm < 1e-10:
            msg = "Camera lies on the inter-eye line — roll is undefined for point_at_binocular."
            raise ValueError(msg)
        up /= up_norm

        # Build world frame: inter-eye as x, derived forward as y, -up as z
        forward = np.cross(-up, inter_eye)
        forward /= np.linalg.norm(forward)
        world_frame = RotationMatrix(np.column_stack([inter_eye, forward, -up]))

        self.point_at(midpoint, world_frame)



[docs]
    def take_image(
        self,
        eye: "Eye",
        lights: list[Light] | None = None,
        use_refraction: bool = True,
        center_method: str = "ellipse",
    ) -> CameraImage:
        """Computes the image of an eye seen by a camera.

        Generates synthetic eye image with corneal reflections and pupil detection.
        Uses light sources to create corneal reflections (CRs) on the cornea.

        Args:
            eye: Eye object
            lights: list of light source objects (optional, if None no CRs are computed)
            use_refraction: Whether to use refraction model for pupil (default True)
            center_method: Method to use for pupil center detection (default "ellipse")
                          Options: "ellipse", "center_of_mass"

        Returns:
            CameraImage object containing corneal reflections, pupil boundary, and pupil center

        """
        # Find the corneal reflections for each light (if lights provided)
        corneal_reflections: list[Point2D | None] = []
        glint_sizes_px: list[float | None] = []
        has_any_glint_size = False
        if lights is not None:
            for light in lights:
                # Find 3D corneal reflection position
                cr_3d = eye.find_cr(light, self)

                if cr_3d is None:
                    corneal_reflections.append(None)
                    glint_sizes_px.append(None)
                else:
                    # Project to camera image coordinates using refactored interface
                    projection_result = self.project(cr_3d)
                    if np.any(np.isnan(projection_result.image_points)):
                        corneal_reflections.append(None)
                        glint_sizes_px.append(None)
                    else:
                        # Convert to Point2D
                        cr_2d = Point2D(
                            x=float(projection_result.image_points[0, 0]),
                            y=float(projection_result.image_points[1, 0]),
                        )
                        # Add noise to glint position
                        if self.glint_noise_config is not None:
                            cr_2d_noisy = apply_glint_noise(cr_2d, self.glint_noise_config)
                        else:
                            cr_2d_noisy = cr_2d
                        corneal_reflections.append(cr_2d_noisy)

                        # Compute glint size if the light has a physical diameter
                        if light.diameter is not None:
                            glint_diameter_3d = compute_glint_diameter(
                                light.position, cr_3d, eye.cornea, eye.trans, light.diameter
                            )
                            distance_to_cr = float(projection_result.distances[0])
                            glint_size = self.camera_matrix.focal_length * glint_diameter_3d / distance_to_cr
                            glint_sizes_px.append(glint_size)
                            has_any_glint_size = True
                        else:
                            glint_sizes_px.append(None)

        # Get pupil boundary and center
        pupil_boundary, pupil_center = eye.get_pupil_in_camera_image(
            self, use_refraction=use_refraction, center_method=center_method
        )

        return CameraImage(
            corneal_reflections=corneal_reflections,
            pupil_boundary=pupil_boundary,
            pupil_center=pupil_center,
            resolution=self.camera_matrix.resolution,
            glint_sizes_px=glint_sizes_px if has_any_glint_size else None,
        )


    def __str__(self) -> str:
        """Basic string representation of the camera."""
        res = self.camera_matrix.resolution
        pos = self.position
        return f"Camera(pos=({pos.x:.1f}, {pos.y:.1f}, {pos.z:.1f})mm, f={self.camera_matrix.focal_length:.0f}px, {res.x}x{res.y})"


[docs]
    def pprint(self) -> None:
        """Print detailed camera parameters in a formatted table."""
        pos = self.position
        matrix = self.camera_matrix.matrix
        res = self.camera_matrix.resolution

        # Prepare distortion coefficients display
        if self.dist_coeffs is not None:
            dist_info = f"k1={self.dist_coeffs[0]:.3f}"
            if len(self.dist_coeffs) > 1:
                dist_info += f", k2={self.dist_coeffs[1]:.3f}"
            if len(self.dist_coeffs) > 4:
                dist_info += f", p1={self.dist_coeffs[3]:.3f}, p2={self.dist_coeffs[4]:.3f}"
        else:
            dist_info = "None (pinhole)"

        data = [
            ["Focal length (px)", f"{self.camera_matrix.focal_length:.1f}"],
            ["Resolution", f"{res.x} x {res.y}"],
            ["Principal point (px)", f"({matrix[0, 2]:.1f}, {matrix[1, 2]:.1f})"],
            ["Position (x,y,z) mm", f"({pos.x:.1f}, {pos.y:.1f}, {pos.z:.1f})"],
            ["Distortion coefficients", dist_info],
            ["Measurement error", f"{self.err:.4f}"],
            ["Error type", self.err_type],
            ["Glint noise", "Enabled" if self.glint_noise_config else "Disabled"],
        ]

        headers = ["Parameter", "Value"]
        info("Camera Parameters:")
        table(data, headers=headers, tablefmt="grid")



[docs]
    def serialize(self) -> dict:
        """Serialize to dictionary representation."""
        return {
            "position": self.position.serialize(),
            "orientation": self.orientation.tolist(),
            "focal_length": float(self.camera_matrix.focal_length),
            "resolution": self.camera_matrix.resolution.serialize(),
            "camera_matrix": self.camera_matrix.matrix.tolist(),
            "distortion_coefficients": self.dist_coeffs.tolist() if self.dist_coeffs is not None else None,
            "measurement_error": float(self.err),
            "error_type": self.err_type,
            "name": self.name,
            "pointing_at": self._pointing_at.serialize() if self._pointing_at else None,
            "rest_transformation": self.rest_trans.tolist(),
            "glint_noise_config": self.glint_noise_config.serialize() if self.glint_noise_config else None,
        }



[docs]
    @classmethod
    def deserialize(cls, data: dict) -> "Camera":
        """Deserialize from dictionary representation."""
        # Create camera with basic parameters
        camera = cls(err=data["measurement_error"], err_type=data["error_type"], name=data["name"])

        # Restore position and orientation
        camera.position = Position3D.deserialize(data["position"])
        camera.orientation = RotationMatrix(np.array(data["orientation"]))

        # Restore camera matrix
        camera.camera_matrix = CameraMatrix(np.array(data["camera_matrix"]))

        # Restore distortion coefficients
        camera.dist_coeffs = np.array(data["distortion_coefficients"])

        # Restore rest transformation
        camera.rest_trans = TransformationMatrix(np.array(data["rest_transformation"]))

        # Restore pointing direction
        if data["pointing_at"]:
            camera._pointing_at = Position3D.deserialize(data["pointing_at"])

        # Restore glint noise config
        if data["glint_noise_config"]:
            camera.glint_noise_config = GlintNoiseConfig.deserialize(data["glint_noise_config"])

        return camera