Source code for sksurgerycalibration.algorithms.triangulate

#  -*- coding: utf-8 -*-

""" Functions for Triangulation using Harley method """

import cv2
# import time
import numpy as np
import sksurgerycore.transforms.matrix as stm


def _triangulate_point_using_svd(p1_array,
                                 p2_array,
                                 u1_array,
                                 u2_array,
                                 w1_const,
                                 w2_const):
    """
    Function for Internal Triangulate Point Using SVD

    (u1_array, p1_array) is the reference pair containing normalized image coordinates (x, y) and the corresponding camera matrix.
    (u2_array, p2_array) is the second pair.

    :param p1_array: [3x4] ndarray
    :param p2_array: [3x4] ndarray
    :param u1_array: [3x1] ndarray
    :param u2_array: [3x1] ndarray
    :param w1_const: constant value
    :param w2_const: constant value

    :return x_array:  [4x1] ndarray
    """

    # Build matrix A for homogeneous equation system Ax = 0
    # Assume X = (x,y,z,1), for Linear-LS method
    # Which turns it into a AX = B system, where A is 4x3, X is 3x1 and B is 4x1

    a_array = np.zeros((4, 3), dtype=np.double)
    a_array[0, 0] = (u1_array[0] * p1_array[2, 0] - p1_array[0, 0]) / w1_const
    a_array[0, 1] = (u1_array[0] * p1_array[2, 1] - p1_array[0, 1]) / w1_const
    a_array[0, 2] = (u1_array[0] * p1_array[2, 2] - p1_array[0, 2]) / w1_const
    a_array[1, 0] = (u1_array[1] * p1_array[2, 0] - p1_array[1, 0]) / w1_const
    a_array[1, 1] = (u1_array[1] * p1_array[2, 1] - p1_array[1, 1]) / w1_const
    a_array[1, 2] = (u1_array[1] * p1_array[2, 2] - p1_array[1, 2]) / w1_const
    a_array[2, 0] = (u2_array[0] * p2_array[2, 0] - p2_array[0, 0]) / w2_const
    a_array[2, 1] = (u2_array[0] * p2_array[2, 1] - p2_array[0, 1]) / w2_const
    a_array[2, 2] = (u2_array[0] * p2_array[2, 2] - p2_array[0, 2]) / w2_const
    a_array[3, 0] = (u2_array[1] * p2_array[2, 0] - p2_array[1, 0]) / w2_const
    a_array[3, 1] = (u2_array[1] * p2_array[2, 1] - p2_array[1, 1]) / w2_const
    a_array[3, 2] = (u2_array[1] * p2_array[2, 2] - p2_array[1, 2]) / w2_const

    b_array = np.zeros((4, 1), dtype=np.double)
    b_array[0] = -(u1_array[0] * p1_array[2, 3] - p1_array[0, 3]) / w1_const
    b_array[1] = -(u1_array[1] * p1_array[2, 3] - p1_array[1, 3]) / w1_const
    b_array[2] = -(u2_array[0] * p2_array[2, 3] - p2_array[0, 3]) / w2_const
    b_array[3] = -(u2_array[1] * p2_array[2, 3] - p2_array[1, 3]) / w2_const

    # start = time.time_ns()
    x_array = cv2.solve(a_array, b_array, flags=cv2.DECOMP_SVD)
    # x_array, _, _, _ = np.linalg.lstsq(a_array, b_array, rcond=-1) #Alternatively
    # print(f'{ time.time_ns() - start } nsecs')

    return x_array[1]  # for cv2.solve #x_array#for np.linalg.lstsq


def _iter_triangulate_point_w_svd(p1_array,
                                  p2_array,
                                  u1_array,
                                  u2_array):
    """
    Function for Iterative Triangulate Point Using SVD

    (u1_array, p1_array) is the reference pair containing normalized image coordinates (x, y) and the corresponding camera matrix.
    (u2_array, p2_array) is the second pair.

    :param p1_array: [3x4] ndarray
    :param p2_array: [3x4] ndarray
    :param u1_array: [3x1] ndarray
    :param u2_array: [3x1] ndarray

    :return result:
    """

    epsilon = 0.00000000001
    w1_const = 1
    w2_const = 1
    x_array = np.zeros((4, 1), dtype=np.double)
    result = np.zeros((3, 1), dtype=np.double)

    # Hartley suggests 10 iterations at most
    for dummy_idx in range(10):
        x_array_ = _triangulate_point_using_svd(p1_array, p2_array, u1_array, u2_array, w1_const, w2_const)
        x_array[0] = x_array_[0]
        x_array[1] = x_array_[1]
        x_array[2] = x_array_[2]
        x_array[3] = 1.0

        p2x1 = p1_array[2, :].dot(x_array)
        p2x2 = p2_array[2, :].dot(x_array)
        # p2x1 and p2x2 should not be zero to avoid RuntimeWarning: invalid value encountered in true_divide
        if (abs(w1_const - p2x1) <= epsilon and abs(w2_const - p2x2) <= epsilon):
            break

        w1_const = p2x1
        w2_const = p2x2

    result[0] = x_array[0]
    result[1] = x_array[1]
    result[2] = x_array[2]

    return result


[docs]def new_e2_array(e2_array, r2_array, left_to_right_trans_vector):
    """

    Function to create a new_e2_array which concatenate left_to_right_trans_vector into the last column of e2_array.
    Notes. new_e2_array() is used in triangulate_points_hartley() to avoid too many variables in one method (see R0914)

    :param e2_array: [4x4] narray
    :param r2_array: = left_to_right_rotation_matrix: [3x3] narray
    :param left_to_right_trans_vector: [3x1] narray

    :return e2_array: [4x4] narray
    """
    for row_idx in range(0, 3):
        for col_idx in range(0, 3):
            e2_array[row_idx, col_idx] = r2_array[row_idx, col_idx]
        e2_array[row_idx, 3] = left_to_right_trans_vector[row_idx]

    return e2_array


[docs]def l2r_to_p2d(p2d, l2r):
    """

    Function to convert l2r array to p2d array, which removes last row of l2r to create p2d.
    Notes. l2r_to_p2d() is used in triangulate_points_hartley() to avoid too many variables in one method (see R0914).

    :param p2d: [3x4] narray
    :param l2r: [4x4] narray

    :return p2d: [3x4] narray
    """

    for dummy_row_index in range(0, 3):
        for dummy_col_index in range(0, 4):
            p2d[dummy_row_index, dummy_col_index] = l2r[dummy_row_index, dummy_col_index]

    return p2d


[docs]def triangulate_points_hartley(input_undistorted_points,
                               left_camera_intrinsic_params,
                               right_camera_intrinsic_params,
                               left_to_right_rotation_matrix,
                               left_to_right_trans_vector
                               ):
    """
    Function to compute triangulation of points using Harley

    :param input_undistorted_points: [4x4] narray
    :param left_camera_intrinsic_params: [3x3] narray
    :param right_camera_intrinsic_params: [3x3] narray
    :param left_to_right_rotation_matrix: [3x3] narray
    :param left_to_right_trans_vector: [3x1] narray

    :return outputPoints: [4x3] narray

    References
    ----------
    * Hartley, Richard I., and Peter Sturm. "Triangulation." Computer vision and image understanding 68, no. 2 (1997): 146-157.
    * Prince, Simon JD. Computer vision: models, learning, and inference. Cambridge University Press, 2012.

    :return outputPoints:
    """
    number_of_points = input_undistorted_points.shape[0]
    output_points = np.zeros((number_of_points, 3), dtype=np.double)
    k1_array = left_camera_intrinsic_params
    k2_array = right_camera_intrinsic_params
    _r1_array = np.eye(3, dtype=np.double)  # (unused-variable)
    r2_array = left_to_right_rotation_matrix
    e1_array = np.eye(4, dtype=np.double)
    e2_array = np.eye(4, dtype=np.double)
    p1d = np.zeros((3, 4), dtype=np.double)
    p2d = np.zeros((3, 4), dtype=np.double)

    e2_array = new_e2_array(e2_array, r2_array, left_to_right_trans_vector)
    # Inverting intrinsic params to convert from pixels to normalised image coordinates.
    k1inv = np.linalg.inv(k1_array)
    k2inv = np.linalg.inv(k2_array)

    # Computing coordinates relative to left camera.
    e1inv = np.linalg.inv(e1_array)
    l2r = np.matmul(e2_array, e1inv)

    # The projection matrix, is just the extrinsic parameters, as our coordinates will be in a normalised camera space.
    # P1 should be identity, so that reconstructed coordinates are in Left Camera Space, to P2 should reflect
    # a right to left transform.
    # Prince, Simon JD. Computer vision: models, learning, and inference. Cambridge University Press, 2012.
    p1d[0, 0] = 1
    p1d[0, 1] = 0
    p1d[0, 2] = 0
    p1d[0, 3] = 0
    p1d[1, 0] = 0
    p1d[1, 1] = 1
    p1d[1, 2] = 0
    p1d[1, 3] = 0
    p1d[2, 0] = 0
    p1d[2, 1] = 0
    p1d[2, 2] = 1
    p1d[2, 3] = 0

    p2d = l2r_to_p2d(p2d, l2r)

    u1_array = np.zeros((3, 1), dtype=np.double)
    u2_array = np.zeros((3, 1), dtype=np.double)

    for dummy_index in range(0, number_of_points):
        u1_array[0, 0] = input_undistorted_points[dummy_index, 0]
        u1_array[1, 0] = input_undistorted_points[dummy_index, 1]
        u1_array[2, 0] = 1

        u2_array[0, 0] = input_undistorted_points[dummy_index, 2]
        u2_array[1, 0] = input_undistorted_points[dummy_index, 3]
        u2_array[2, 0] = 1

        # Converting to normalised image points
        u1t = np.matmul(k1inv, u1_array)
        u2t = np.matmul(k2inv, u2_array)

        # array shapes for input args _iter_triangulate_point_w_svd( [3, 4]; [3, 4]; [3, 1]; [3, 1] )
        reconstructed_point = _iter_triangulate_point_w_svd(p1d, p2d, u1t, u2t)

        output_points[dummy_index, 0] = reconstructed_point[0]
        output_points[dummy_index, 1] = reconstructed_point[1]
        output_points[dummy_index, 2] = reconstructed_point[2]

    return output_points


[docs]def triangulate_points_opencv(input_undistorted_points,
                              left_camera_intrinsic_params,
                              right_camera_intrinsic_params,
                              left_to_right_rotation_matrix,
                              left_to_right_trans_vector):
    """
    Function to compute triangulation of points using Harley with cv2.triangulatePoints

    :param input_undistorted_points: [4x4] narray

    :param left_camera_intrinsic_params: [3x3] narray
    :param right_camera_intrinsic_params: [3x3] narray
    :param left_to_right_rotation_matrix: [3x3] narray
    :param left_to_right_trans_vector: [3x1] narray

    :return output_points: [4x3] narray

    Other related variables:
        left_undistorted, right_undistorted: point image positions in 2 cameras
        left_undistorted[4x2], right_undistorted[4x2] from input_undistorted_points [4x4]

    References
    ----------
    Hartley, Richard I., and Peter Sturm. "Triangulation." Computer vision and image understanding 68, no. 2 (1997): 146-157.

    """

    l2r_mat = stm.construct_rigid_transformation(left_to_right_rotation_matrix, left_to_right_trans_vector)

    # The projection matrix, is just the extrinsic parameters, as our coordinates will be in a normalised camera space.
    # P1 should be identity, so that reconstructed coordinates are in Left Camera Space, to P2 should reflect
    # a right to left transform.
    # Prince, Simon JD. Computer vision: models, learning, and inference. Cambridge University Press, 2012.
    p1mat = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]], dtype=np.double)

    p2mat = np.zeros((3, 4), dtype=np.double)
    p2mat = l2r_to_p2d(p2mat, l2r_mat)

    number_of_points = input_undistorted_points.shape[0]
    output_points = np.zeros((number_of_points, 3), dtype=np.double)

    # Inverting intrinsic params to convert from pixels to normalised image coordinates.
    k1inv = np.linalg.inv(left_camera_intrinsic_params)
    k2inv = np.linalg.inv(right_camera_intrinsic_params)

    u1_array = np.zeros((3, 1), dtype=np.double)
    u2_array = np.zeros((3, 1), dtype=np.double)

    for dummy_index in range(0, number_of_points):
        u1_array[0, 0] = input_undistorted_points[dummy_index, 0]
        u1_array[1, 0] = input_undistorted_points[dummy_index, 1]
        u1_array[2, 0] = 1

        u2_array[0, 0] = input_undistorted_points[dummy_index, 2]
        u2_array[1, 0] = input_undistorted_points[dummy_index, 3]
        u2_array[2, 0] = 1

        # Converting to normalised image points
        u1t = np.matmul(k1inv, u1_array)
        u2t = np.matmul(k2inv, u2_array)

        # array shapes for input args cv2.triangulatePoints( [3, 4]; [3, 4]; [2, 1]; [2, 1] )
        reconstructed_point = cv2.triangulatePoints(p1mat, p2mat, u1t[:2], u2t[:2])
        reconstructed_point /= reconstructed_point[3]  # Homogenize

        output_points[dummy_index, 0] = reconstructed_point[0]
        output_points[dummy_index, 1] = reconstructed_point[1]
        output_points[dummy_index, 2] = reconstructed_point[2]

    return output_points