kinectToPly.py

#!/usr/bin/env python
import math

from optparse import OptionParser


def getColors(rgb):
    '''Return a (width, height, 3) matrix where each entry in the matrix
    is a pixel color value (r, g, b) as a floating point number.

    * rgb -- The RGB array from the kinect

    '''
    width, height, _ = rgb.shape

    # Convert the colors to a float array
    rgb = rgb.astype(numpy.float32)
    rgb.resize((width*height, 3))

    return rgb


def getDepthMatrix():
    '''Get the matrix to apply to the kinect depth data.'''
    dm = -0.0030711016
    do = 3.3309495161

    return numpy.array([
            [dm, do],
            ])

def getXyzMatrix():
    '''Get the matrix to map each point in the kinect image
    to world coordinates.

    '''
    fx_d = 1.0 / 5.9421434211923247e+02
    fy_d = 1.0 / 5.9104053696870778e+02
    cx_d = 3.3930780975300314e+02
    cy_d = 2.4273913761751615e+02

    return numpy.array([
            [fx_d, 0, 0, -(cx_d * fx_d)],
            [0, fy_d, 0, -(cy_d * fy_d)],
            [0, 0, 1, 0],
            [0, 0, 0, 0],
            ])


def allDepthToWorld(depth, RxT):
    '''Convert the kinect depth data to world coordinates and apply the
    given transformation to rotate and translate the point cloud to the
    kinect's current position and orientation.

    * depth -- The kinect depth data
    * RxT -- The rotation and translation matrix

    '''
    # Test new (faster) conversion
    x, y = numpy.mgrid[:480,:640]

    # Threshold the depth data
    overIndices = depth.flatten() >= 2047

    # First, convert the depth values
    C = numpy.vstack((depth.flatten(), 0*x.flatten()+1))
    D = numpy.dot(getDepthMatrix(), C)
    D = 1.0 / D

    # Set depth of all indices over the limit to zero
    D[0][overIndices] = 0.0

    # Build a 4xN matrix of the x, y, d, w data
    C = numpy.vstack((x.flatten(), y.flatten(), D.flatten(), 0*x.flatten()+1))
    X, Y, D, _ = numpy.dot(getXyzMatrix(), C)
    X *= D
    Y *= D
    xyz = numpy.vstack((X, Y, D)).transpose()

    # Translate and rotate the coordinates according to the
    # kinect's position and orientation
    C = numpy.vstack((X.flatten(), Y.flatten(), D.flatten(), 0*x.flatten()+1))
    X, Y, D, _ = numpy.dot(RxT, C)

    D *= -1  # Invert depth for better viewing in MeshLab
    xyz = numpy.vstack((X, Y, D)).transpose()

    return xyz


class Ply:
    '''The Ply class provides the ability to write a point cloud represented
    by two arrays: an array of points (num points, 3), and an array of colors
    (num points, 3) to a PLY file.

    '''

    def __init__(self, points, colors):
        '''
        * points -- The matrix of points (num points, 3)
        * colors -- The matrix of colors (num points, 3)

        '''
        self.__points = points
        self.__colors = colors

    def write(self, filename):
        '''Write the point cloud data to a PLY file of the given name.

        * filename -- The PLY file

        '''
        # Write the headers
        lines = self.__getLinesForHeader()

        fd = open(filename, "w")
        for line in lines:
            fd.write("%s\n" % line)

        # Write the points
        self.__writePoints(fd, self.__points, self.__colors)

        fd.close()

    def __getLinesForHeader(self):
        '''Get the list of lines for the PLY header.'''
        lines = [
            "ply",
            "format ascii 1.0",
            "comment generated by: kinectToPly",
            "element vertex %s" % len(self.__points),
            "property float x",
            "property float y",
            "property float z",
            "property uchar red",
            "property uchar green",
            "property uchar blue",
            "end_header",
            ]

        return lines

    def __writePoints(self, fd, points, colors):
        '''Write the point cloud points to a file.

        * fd -- The file descriptor
        * points -- The matrix of points (num points, 3)
        * colors -- The matrix of colors (num points, 3)

        '''
        # Stack the two arrays together
        stacked = numpy.column_stack((points, colors))

        # Write the array to the file
        numpy.savetxt(
            fd,
            stacked,
            delimiter='\n',
            fmt="%f %f %f %d %d %d")


if __name__ == '__main__':
    # Handle command line arguments
    usage = "Syntax: %prog [output PLY filename] (options)"
    parser = OptionParser(usage)
    parser.add_option(
        "--bilateral", dest="bilateral", default=False, action="store_true",
        help="apply a bilateral filter to the depth data")
    parser.add_option(
        "--x", dest="x", default=0.0, type="float",
        help="x position (meters)")
    parser.add_option(
        "--y", dest="y", default=0.0, type="float",
        help="y position (meters)")
    parser.add_option(
        "--theta", dest="theta", default=0.0, type="float",
        help="rotation value (degrees)")

    (options, args) = parser.parse_args()

    # Make sure the required arguments are given
    if len(args) != 1:
        parser.print_help()
        exit(1)

    # Delay imports until after arguments/options have been validated
    import numpy
    import freenect

    # Grab command line arguments
    outputFilename = args[0]
    x, y, theta = options.x, options.y, math.radians(options.theta)

    # Mesh lab axes:
    #    X is vertical
    #    Y is right
    #    Z is into the screen

    # Kinect's rotation is around the X axis for the point cloud
    RxT = numpy.array([
            [1, 0, 0, 0],  # Kinect's z pos
            [0, math.cos(theta), -math.sin(theta), -y],  # Kinect's y pos
            [0, math.sin(theta), math.cos(theta), x],  # Kinect's x pos
            [0, 0, 0, 0],
            ])

    # Grab kinect data
    try:
        (depth, _) = freenect.sync_get_depth()
        (rgb, _) = freenect.sync_get_video()
    except:
        # Handle a missing kinect
        print "Error: Unable to retrieve kinect data"
        exit(2)

    # Conver the RGB image into a matrix of pixel values
    colors = getColors(rgb)

    # Apply a bilateral filter to the depth image
    if options.bilateral:
        import cv2  # Only import when needed

        depth = numpy.array(depth, dtype=numpy.float32)
        depth = cv2.bilateralFilter(depth, 5, 150, 150)
        rgb = cv2.bilateralFilter(rgb, 5, 150, 150)

    # Convert the depth data to world data
    data = allDepthToWorld(depth, RxT)

    # Output the point cloud to a PLY file
    ply = Ply(data, colors)
    ply.write(outputFilename)