%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.cluster import KMeans
from IPython.display import Image, display

1 a)¶

First I wrote a generic find_intersection function based on https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection

def find_intersection(l1, l2):
    p1, p2 = l1
    p3, p4 = l2
    x1, y1 = p1
    x2, y2 = p2
    x3, y3 = p3
    x4, y4 = p4
    denom = float((x1 - x2)*(y3 - y4) -
                  (y1 - y2)*(x3 - x4))
    k1 = (x1*y2 - y1*x2)
    k2 = (x3*y4 - y3*x4)
    px = (k1 * (x3-x4) - (x1-x2)*k2)/denom
    py = (k1 * (y3-y4) - (y1-y2)*k2)/denom
    return np.array((px, py))

Next, I defined my lines and found the intersection between them.

p1 = np.array((-30.0, -50.0))
p2 = np.array((120.0, -30.0))
p3 = np.array((100.0, 50.0))
p4 = np.array((-30.0, 60.0))
l1 = np.array((p1, p4))
l2 = np.array((p2, p3))
l3 = np.array((p1, p2))
l4 = np.array((p4, p3))
int1 = find_intersection(l1, l2)
int2 = find_intersection(l3, l4)
print("First vanishing point: " + str(int1))
print("Second vanishing point: "+ str(int2))

First vanishing point: [ -30.  570.]
Second vanishing point: [ 493.17073171   19.75609756]

The horizon line is the line connecting these two points. Below is a figure showing this. The blue line is the horizon line (up to the intersections. In reality, it extends far past this)

plt.figure()
plt.plot(*zip(*[p1,p2,p3,p4,p1]), marker="o", color="black")
plt.plot(*zip(*[p1, int1]), color="green")
plt.plot(*zip(*[p2, int1]), color="green")
plt.plot(*zip(*[p4, int2]), color="red")
plt.plot(*zip(*[p1, int2]), color="red")
plt.plot(*zip(*[int1, int2]), color="blue", marker="o")

[<matplotlib.lines.Line2D at 0x7f8ba9d8a4d0>]

1 b)¶

We can find the mid point of the rectangle use the intersection of the diagonals (call the image coordinates of this center u, v). We then can calculate the normal from the vanishing points on the image plane. This makes sense. If you lifted the plane along its normal until it looked like a line (due to being inline with the camera), you'd have a plane parallel with the the original plane, but it would have lines on the plane that, as vectors starting at the focal point, intersect at the image plane at the vanishing point. Thus, the cross product of those edges would be parallel with the normal of the original plane.

Below is the code to first calculate the center and normal (as well as display a representitive of them).

focal_length = np.sqrt(-np.dot(int1, int2))

print("""I noticed that the focal length given was wrong.
If it were a rectangle, vanishing points would be orthogonal.
Read my answer for 1.d for the reasoning why.
Thus, the focal length is whatever would make that true for the given vanshing points.""")
print("The focal length is actually: " + str(focal_length)+"\n")

def hom(p):
    x, y = p
    return np.array([x,y,focal_length])
def normalize(v):
    v = np.array(v,dtype=np.float32)
    return v/np.sqrt((v**2).sum())

def unhom(v):
    x,y,w = v
    return np.array([x/w, y/w])


diag1 = p1,p3
diag2 = p2,p4
u,v = find_intersection(diag1, diag2)
plt.figure()
plt.plot(*zip(*[p1,p2,p3,p4,p1]), marker="o", color="black")
plt.plot(*zip(*diag1), marker="o", color="orange")
plt.plot(*zip(*diag2), marker="o", color="orange")
plt.plot([u,u], [v,v], marker="o", color="orange")
normal = -normalize(np.cross(normalize(hom(int1)), normalize(hom(int2))))
end = -normalize(unhom(normal)) # Project into 2d
plt.plot(*zip(*[(u,v),end*50 + (u,v)]))
print("The normal unit vector of the field is: " + str(normal))

I noticed that the focal length given was wrong.
If it were a rectangle, vanishing points would be orthogonal.
Read my answer for 1.d for the reasoning why.
Thus, the focal length is whatever would make that true for the given vanshing points.
The focal length is actually: 59.4486866252

The normal unit vector of the field is: [-0.11465844 -0.10901701  0.98740512]

1 c)¶

If we didn't know it was a rectangle, we could know nothing other than it is a quadrilateral. Since we know it is at least a parallelagram, we can figure out the relative lengths of the sides as well as find similar shapes.

We know the direction cosines (which is just a normalized vector in the direction of the line) of the lines in camera space via the equation given on the slides (the normalized version of the vector (x_inf, y_inf, focal_length)).

Since we know the lines to the corners of the rectangle, as well as the direction of the lines from each corner in camera space, we can calculate the relative lengths of the edges of the rectangle via the fact that the dot product of the representitive vectors of the points gives the angle at the camera, and the dot product of the direction of the edge with the representitive vectors (being careful to orient the vectors in the correct direction before taking the dot product) gives the angles of the triangle defined as such, we can calculate the relative lengths of the sides that share a point. Since we know it's a rectangle, they all share that point.

I do this calculation below. (I print it 4 times to make sure it's correct, and take the mean to try and cancel out some of the error)

def get_direction_cosine(vp):
    return normalize(hom(vp))

def find_relative_base_len(vp_dir, vp_base, vp_end):
    """Pretend that the length of the line from the camera to the vp_base point is 1. 
    Calculate the rest of the relative lengths."""
    a1 = np.arccos(np.dot(vp_dir, -normalize(hom(vp_base))))
    a2 = np.arccos(np.dot(vp_dir, normalize(hom(vp_end))))
    a3 = np.arccos(np.dot(normalize(hom(vp_base)), normalize(hom(vp_end))))
    if abs(3.14159 - (a1 + a2 + a3)) > 0.001:
        # Sanity check
        raise ValueError("Not a triangle.")
    # a/sin(angle_opposite_a) = b/sin(angle_opposite_b);
    # a = sin(angle_opposite_a) * b/sin(angle_opposite_b);
    return np.sin(a3)* 1.0/np.sin(a2)

ratio = 0

# left edge and bottom edge (bottom left corner)
short_rel = find_relative_base_len(get_direction_cosine(int1), p1, p4)
long_rel = find_relative_base_len(get_direction_cosine(int2), p1, p2)
ratio += long_rel/short_rel
print(long_rel/short_rel)

# right edge and bottom edge (bottom right corner)
short_rel = find_relative_base_len(get_direction_cosine(int1), p2, p3)
long_rel = find_relative_base_len(-get_direction_cosine(int2), p2, p1)
ratio += long_rel/short_rel
print(long_rel/short_rel)

# right edge and top edge (top right corner)
short_rel = find_relative_base_len(-get_direction_cosine(int1), p3, p2)
long_rel = find_relative_base_len(-get_direction_cosine(int2), p3, p4)
ratio += long_rel/short_rel
print(long_rel/short_rel)

# left edge and top edge (top left corner)
short_rel = find_relative_base_len(-get_direction_cosine(int1), p4, p1)
long_rel = find_relative_base_len(get_direction_cosine(int2), p4, p3)
ratio += long_rel/short_rel
print(long_rel/short_rel)

ratio = ratio/4.0

print("Ratio of the long edge to the short edge: " + str(ratio))

1.61441821017
1.61441802912
1.61441815646
1.61441799855
Ratio of the long edge to the short edge: 1.61441809857

1 d)¶

Since we know the aspect ratio, the lengths of the long edges are easy to calculate (just the ratio * 100m). Finding the 3D positions also isn't hard: we just do the trigonometry backwards to get the length of the line from a corner to the camera which, multiplied by the normal vector in the direction pointing to that corner gives us the 3D coordinates! Do this for all of the corners, and you have the coordinates of the rectangle.

This is actually where I discovered that it wasn't a rectangle: the diagonals weren't equal in length to each other. I thought I was doing something wrong for hours, but given the proofs that are listed at http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/BEARDSLEY/node5.html, specifically that the vectors from the camera to the vanishing points are parallel to the lines in camera space, a rectangle must have orthogonal vanshing point vectors. The shape you gave us is a parallelogram at the focal length 2000. This is why I use a focal length as calculated above.

def find_actual_projection_length(vp_dir, vp_base, vp_end, base_len):
    """Pretend that the length of the line from the camera to the vp_base point is 1. 
    Calculate the rest of the relative lengths."""
    a1 = np.arccos(np.dot(vp_dir, -normalize(hom(vp_base))))
    a2 = np.arccos(np.dot(vp_dir, normalize(hom(vp_end))))
    a3 = np.arccos(np.dot(normalize(hom(vp_base)), normalize(hom(vp_end))))
    if abs(3.14159 - (a1 + a2 + a3)) > 0.001:
        # Sanity check
        raise ValueError("Not a triangle.")
    # a/sin(angle_opposite_a) = b/sin(angle_opposite_b);
    # a = sin(angle_opposite_a) * b/sin(angle_opposite_b);
    return (base_len/np.sin(a3)) * np.sin(a2)

p1_real = normalize(hom(p1)) * find_actual_projection_length(get_direction_cosine(int1), p1, p4, 100.0)
p2_real = normalize(hom(p2)) * find_actual_projection_length(get_direction_cosine(int1), p2, p3, 100.0)
p3_real = normalize(hom(p3)) * find_actual_projection_length(-get_direction_cosine(int2), p3, p4, 100.0*ratio)
p4_real = normalize(hom(p4)) * find_actual_projection_length(get_direction_cosine(int2), p4, p3, 100.0*ratio)
print("The units below are meters. (0,0,0) is the location of the camera.")
print("Bottom left: " + str(p1_real))
print("Bottom right: " + str(p2_real))
print("Top right: " + str(p3_real))
print("Top left: " + str(p4_real))


print("\nSanity checks follow")

# Should be 100
d = p1_real - p4_real
print(np.sqrt(np.dot(d,d)))



# Should be 100
d = p2_real - p3_real
print(np.sqrt(np.dot(d,d)))

# Should be ~161.442
d = p1_real - p2_real
print(np.sqrt(np.dot(d,d)))

# Should be ~161.442
d = p3_real - p4_real
print(np.sqrt(np.dot(d,d)))

print("\nThese two values should be equal")
# These two values should be equal
print(np.sqrt(100**2 + (100*ratio)**2))
d = (p4_real-p1_real) - (p2_real-p1_real)
print(np.sqrt(np.dot(d,d)))

The units below are meters. (0,0,0) is the location of the camera.
Bottom left: [-24.23708534 -40.3951416   48.02876282]
Bottom right: [ 135.9178009   -33.97945023   67.3344574 ]
Top right: [ 130.69018555   65.34509277   77.69360352]
Top left: [-29.46469116  58.92938232  58.38790894]

Sanity checks follow
100.0
100.0
161.442
161.442

These two values should be equal
189.903812416
189.904

1 e)¶

I'm assuming the focal length, despite being smaller in pixels, is still 200mm. After 0.1 seconds, the camera will have moved 1 meter in the z direction. We can compute the new posistions of the points on the image plane using plane-line intersections. (https://en.wikipedia.org/wiki/Line%E2%80%93plane_intersection) The motion (on the image plane) is the vector between the corrisponding points in the rectangles. The green rectangle is the image of the rectangle at t_0 + 0.1s.

new_camera_pos = np.array([0, 0, 1])
p1d = normalize(p1_real - new_camera_pos)
p2d = normalize(p2_real - new_camera_pos)
p3d = normalize(p3_real - new_camera_pos)
p4d = normalize(p4_real - new_camera_pos)

norm = np.array([0,0,1])
p0 = np.array([0,0,focal_length])
l0 = np.array([0,0,0])

d = np.dot((p0 - l0),norm)/np.dot(p1d,norm)
p1p = d * p1d + l0

d = np.dot((p0 - l0),norm)/np.dot(p2d,norm)
p2p = d * p2d + l0

d = np.dot((p0 - l0),norm)/np.dot(p3d,norm)
p3p = d * p3d + l0

d = np.dot((p0 - l0),norm)/np.dot(p4d,norm)
p4p = d * p4d + l0


plt.plot(*zip(*[p1,p2,p3,p4,p1]), marker="o", color="black")
plt.plot(*zip(*(np.array([p1p,p2p,p3p,p4p,p1p])[:,:2])), marker="o", color="green")

[<matplotlib.lines.Line2D at 0x7f8ba9cd3150>]

1 f)¶

The position of focal expansion is (0,0). I am not sure what you are asking by equation of optical flow, so I don't know how to answer this question.

def get_optical_vectors(p_start, p_dir, dist, n):
    res = []
    for i in range(n):
        r = i/float(n-1) * dist
        op = p_dir * r + p_start
        d = np.dot((p0 - l0),norm)/np.dot(normalize(op),norm)
        opt = d * normalize(op) + l0
        
        ne = normalize(op - new_camera_pos)
        d = np.dot((p0 - l0),norm)/np.dot(ne,norm)
        net = d * ne + l0
        di = net[:2] - opt[:2]
        
        res.append((opt[0],opt[1],di[0],di[1]))
    return res

arrows = get_optical_vectors(p1_real, get_direction_cosine(int1), 100, 10)
X,Y,U,V = zip(*arrows)
plt.quiver(X,Y,U,V, scale=30)
arrows = get_optical_vectors(p1_real, get_direction_cosine(int2), 100*ratio, 10)
X,Y,U,V = zip(*arrows)
plt.quiver(X,Y,U,V, scale=30)
arrows = get_optical_vectors(p3_real, -get_direction_cosine(int1), 100, 10)
X,Y,U,V = zip(*arrows)
plt.quiver(X,Y,U,V, scale=30)
arrows = get_optical_vectors(p3_real, -get_direction_cosine(int2), 100*ratio, 10)
X,Y,U,V = zip(*arrows)
plt.quiver(X,Y,U,V, scale=30)
plt.plot(*zip(*[p1,p2,p3,p4,p1]), marker="o", color="black")
plt.plot(*zip(*(np.array([p1p,p2p,p3p,p4p,p1p])[:,:2])), marker="o", color="green")

[<matplotlib.lines.Line2D at 0x7f8ba8337610>]

2¶

The parameter needed is how many clusters to search for. Below is working code to do the clustering. I use average color for the entire cluster.

img = cv2.imread("face.png")
display(Image(filename="face.png"))
def get_colors_only(img1):
    out_arr = []
    inds = []
    for i in range(img1.shape[0]):
        for j in range(img1.shape[1]):
            seg = (img1[i,j].astype(np.float32))/255.0
            out_arr.append(seg)
            inds.append((i,j))
    return out_arr,inds

def get_averages(color_list, inds, cluster_labels, img1):
    outimg = img1.copy()
    colors = np.array(color_list)
    inds = np.array(inds)
    mind = max(cluster_labels)
    for i in range(mind+1):
        avg = (colors[cluster_labels == i].mean(axis=0)*255.0)[0:3]
        for ind in inds[cluster_labels==i]:
            outimg[ind[0],ind[1]] = avg
    return outimg
        
def k_means_color(clusters, img1, img_segmenter):
    color_arr, inds = img_segmenter(img1)
    labels = KMeans(n_clusters=clusters).fit_predict(color_arr)
    disp = get_averages(color_arr, inds, labels, img1)
    return disp
    
img = k_means_color(14, img, get_colors_only)
cv2.imwrite("face1.png", img)
Image(filename="face1.png")

It's not connected because the clustering algorithm only takes into account colors, not locations. Therefore similar clusters of colored points are chosen, independant of position. The way to group the pixels by location is to make the coordinates included in the distance function.

def get_colors_and_coords(img1):
    out_arr = []
    inds = []
    for i in range(img1.shape[0]):
        for j in range(img1.shape[1]):
            seg = list((img1[i,j].astype(np.float32))/255.0)
            seg = seg+[i/float(img1.shape[0]), j/float(img1.shape[1])]
            out_arr.append(np.array(seg, dtype=np.float32))
            inds.append((i,j))
    return out_arr,inds

img2 = k_means_color(14,img, get_colors_and_coords)
cv2.imwrite("face2.png", img2)
Image(filename="face2.png")