Facebook Interview Question
Software Engineer InternsCountry: United States
Interview Type: Phone Interview
Time Complexity: O(nlogk); Space Complexity: O(k)
public class FindNClosestPoints {
private class Point implements Comparable<Point>{
int value1;
int value2;
double distance;
Point(int value1, int value2) {
super();
this.value1 = value1;
this.value2 = value2;
}
@Override
public int compareTo(Point that) {
if(that.distance > this.distance){
return 1;
} else if(that.distance < this.distance){
return -1;
}
return 0;
}
}
public Point[] findNClosePoint(Point points[], Point from, int k){
Queue<Point> maxHeap = new PriorityQueue<Point>(k);
int index = 0;
for(; index < k; index++){
points[index].distance = euclideanDistance(points[index], from);
maxHeap.offer(points[index]);
}
for(; index < points.length; index++){
points[index].distance = euclideanDistance(points[index], from);
if(maxHeap.peek().distance > points[index].distance){
maxHeap.poll();
maxHeap.offer(points[index]);
}
}
return maxHeap.toArray(new Point[k]);
}
private double euclideanDistance(Point point1, Point point2){
return Math.sqrt(Math.pow(point2.value1 - point1.value1, 2) + Math.pow(point2.value2 - point1.value2, 2));
}
}
This solution works for any point not just (5,5)
import java.util.Arrays;
/**
* @author upen on 10/24/15.
*/
public class ClosestPoints {
public static void main(String[] args) {
Solution sl = new Solution();
Point[] points = {new Point(-2, -4), new Point(0, 0), new Point(10, 15),
new Point(5, 6), new Point(7, 8), new Point(-10, -30)};
Point[] output = sl.findClosest(points, new Point(5, 5), 2);
for (int i=0; i<2; i++){
System.out.println(output[i].toString());
}
}
}
class Solution {
public Point[] findClosest(Point[] points, Point point, int k) {
int len = points.length;
for (int i = 0; i < len; i++) {
Point currPoint = points[i];
currPoint.setDistance(findDistance(point, currPoint));
}
Arrays.sort(points);
return Arrays.copyOf(points, k);
}
public double findDistance(Point p1, Point p2) {
int squareOfX = (p2.getX() - p1.getX()) * (p2.getX() - p1.getX());
int squareOfY = (p2.getY() - p1.getY()) * (p2.getY() - p1.getY());
return Math.sqrt(squareOfX + squareOfY);
}
}
class Point implements Comparable<Point> {
/**
* The x position of the point
*/
private int mX;
/**
* The y position of the point
*/
private int mY;
/**
* distance form the (5,5)
*/
private double mDistance;
public Point(int x, int y) {
this.mX = x;
this.mY = y;
}
public int getX() {
return mX;
}
public void setX(int mX) {
this.mX = mX;
}
public int getY() {
return mY;
}
public void setY(int mY) {
this.mY = mY;
}
public double getDistance() {
return mDistance;
}
public void setDistance(double mDistance) {
this.mDistance = mDistance;
}
@Override
public int compareTo(Point other) {
return (int) (mDistance - other.mDistance);
}
@Override
public String toString() {
return "(" + mX + ", " + mY +")";
}
}
O(n log k) complexity and O(k) memory.
class PointDist{
int dist;
int[] point;
}
public static int[][] findKClosest(int[][] points, int k){
PriorityQueue<PointDist> closest = new PriorityQueue<PointDist>(new Comparator<PointDist>(){
public int compare(PointDist p1, PointDist p2){
return p2.dist - p1.dist;
}
});
for(int[] point : points){
int dist = Math.sqrt(Math.abs(point[0] - 5]) + Math.abs(point[1] -5));
PointDist pointDist = new PointDist();
pointDist.dist = dist;
pointDist.point = point;
closest.add(pointDist);
if(closest.size() > k){
closest.poll();
}
}
int[][] results = new int[closest.size()][];
for(int i = 0; i < results.length; i++){
results[i] = closest.poll().point;
}
return results;
}
Edit: small changes based on Asaf's comments.
your solution suggest that point (6,6) is of same distance as (5,7) which is not true.
other than that the time complexity is O (n * lg n) as each operation in the priority queue takes O (lg n) time.
@Asaf
You are absolutely right about the distance thing. That's what I get for trying to rush.
As for the Overall complexity, I think we are both partially correct- since the PriorityQueue will never have more than k+1 things in it, the complexity incurred by the queue is O(k log k). Therefore, the overall complexity of the solution would be O(n + k log k) and the memory cost would be O(k)
Edited the comment accordingly.
PHP Solution
class Point {
public $x, $y;
function __construct($x, $y) {
$this->x = $x;
$this->y = $y;
}
}
class Points extends SplPriorityQueue {
/** @var Point */
private $zeroPoint;
public function __construct(Point $zeroPoint) {
$this->zeroPoint = $zeroPoint;
}
public function addPoint(Point $point) {
$distance = sqrt(abs($point->x - $this->zeroPoint->x) + abs($point->y - $this->zeroPoint->y));
parent::insert($point, $distance);
}
/** ASC sorting */
public function compare($priority1, $priority2) {
if ($priority1 == $priority2) {
return 0;
}
return $priority1 > $priority2 ? -1 : 1;
}
}
$k = 2;
$points = new Points(new Point(5, 5));
$points->addPoint(new Point(-2, -4));
$points->addPoint(new Point(-2, -4));
$points->addPoint(new Point(0, 0));
$points->addPoint(new Point(10, 15));
$points->addPoint(new Point(5, 6));
$points->addPoint(new Point(7, 8));
$points->addPoint(new Point(-10, -30));
while ($k > 0) {
$point = $points->current();
echo "x:$point->x, y:$point->y\n";
$points->next();
$k--;
}
exit;
import java.util.*;
import java.lang.*;
import java.io.*;
class Closest {
public static void main (String[] args) throws java.lang.Exception {
List<Point> points = new ArrayList<Point>();
points.add(new Point(-2, -4));
points.add(new Point(0, 0));
points.add(new Point(10, 15));
points.add(new Point(5, 6));
points.add(new Point(7, 8));
points.add(new Point(-10, -30));
System.out.println(closest(new Point(5, 5), points, 2));
}
private static List<Point> closest(Point x, List<Point> points, int n) {
List<Point> closest = new ArrayList<Point>();
Map<Double, Point> set = new TreeMap<Double, Point>();
for (Point p : points) {
double dist = getDistance(x, p);
System.out.println(p + "\t=>" + dist);
set.put(dist, p);
}
int counter = 0;
for (Map.Entry<Double, Point> entry : set.entrySet()) {
if (counter == n) {
break;
}
closest.add(entry.getValue());
counter++;
}
return closest;
}
private static double getDistance(Point a, Point b) {
return Math.sqrt(Math.pow((a.x - b.x), 2) + Math.pow((a.y - b.y), 2));
}
static class Point {
public int x;
public int y;
public Point(int x, int y) {
this.x = x;
this.y = y;
}
public String toString() {
return "("+x+","+y+")";
}
}
}
http: // ideone.com/3bNBga
Compute the distance from each Point in the input List of Points to the Point (5,5).
Then add each Point with its distance from (5,5) to an Heap that will always have the element with the minimum distance at the root of the Heap. Remove the root of the heap (for k times) and add it to the list of the closest Points.
You can use PriorityQueue structure as heap; you have to implement Comparable and define the compareTo method in the class Point in order to make the Points comparable by their distance from (5,5). Otherwise you can use an external Comparator. Time complexity is O(nlogn+klogk), that is: add n times (one for each Point in the input list) to the heap (O(logn) insertion complexity in the heap) plus O(klogk) to add the k elements closets to (5,5) to the final list of closest point, because you need to repeat k times the remove operation from the heap (remove the min element from the heap takes O(logk) as we need to rearrange the heap).
import java.util.*;
class Point implements Comparable<Point> {
int x;
int y;
double distance;
public Point(int x, int y) {
this.x=x;
this.y=y;
}
public int compareTo(Point p) {
if(this.distance>p.distance) {
return 1;
}
else if(this.distance<p.distance) {
return -1;
}
else {
return 0;
}
}
public String toString() {
return "("+this.x+","+this.y+"):"+this.distance;
}
}
public class KClosestPoints {
public static double computeDistance(Point p1, Point p2) {
return (Math.sqrt(Math.pow(p1.x-p2.x, 2)+Math.pow(p1.y-p2.y, 2)));
}
public static List<Point> kClosest(List<Point> l, int k) {
if(k<0) {
System.out.println("K must be a nonnegative number");
return null;
}
if(l.size()==0) {
System.out.println("Empty input list");
return new ArrayList<Point>();
}
Point p5 = new Point(5,5);
List<Point> closest = new ArrayList<Point>();
PriorityQueue<Point> heap = new PriorityQueue<Point>();
for(Point p: l) {
p.distance=computeDistance(p,p5);
heap.add(p);
}
for(int i=0;i<k;i++) {
closest.add(heap.remove());
}
return closest;
}
public static void main(String[] args) {
Point p1 = new Point(-2,4);
Point p2 = new Point(0,0);
Point p3 = new Point(10,15);
Point p4 = new Point(5,6);
Point p5 = new Point(7,8);
Point p6 = new Point(-10,30);
List<Point> input = new LinkedList<Point>();
input.add(p1);
input.add(p2);
input.add(p3);
input.add(p4);
input.add(p5);
input.add(p6);
//System.out.println(input);
List<Point> closest = kClosest(input,2);
System.out.println(closest);
}
}
Well I can do it in a "single query line of code" which gives a n*Log(n) but there might be a better solution to do a linear selection for the top k if K < Log(n).
Si this algorithm worst running time is either:
{ n*log(n) for k > log(n)
{ n*k for k < log(n)
Note: I don't need to calculate square root nor the power just the Absolute value for comparisons but I did it anyway.
Point
{
public int x {get;set}
public int y {get;set;}
public Point(int ix, int iy)
{
this.x = ix;
this.y = iy;
}
static IEnumerable<Point> ClosestKPoints(IEnumerable<Point> point, int k)
{
Point target = new Point(5,5);
// Calculate which method is better for performance as it depends on the input
// Sorting n * log(n) if k > log(n)
// Linear selection n * k if k < log(n)
if(k > Math.Log(point.Count, 2))
{
// Cheated here a little bit by using Linq instead of sorting myself but
// I think is ok as other people used PriorityQueues and I did not
// Other ways could be using a heap which is what the PriortyQueues uses
return (from p in points
order by Math.Sqrt(
Math.Power(p.x-target.x, 2) +
Math.Power(p.y-target.y,2))
select p).Take(k)
}
else
{
var tuplet = (from p in points
select
new Tuple<Point, int> {
item1 = p,
item2 = Math.Sqrt(
Math.Power(p.x-target.x, 2) +
Math.Power(p.y-target.y,2))
}
List<Point> result = new List<Point>();
while(k > 0)
{
Tuple<Point, int> max = new Tuple<Point, int>(){item1=null, item2= int.MinValue};
foreach(var t in tuplet)
{
if(t.item2 > max.item2)
max = t;
}
result.Add(max);
k--;
}
return result;
}
}
/*
Write a function that takes the following inputs and gives the following outputs.
Input: A list of points in 2-dimensional space, and an integer k
Output: The k input points closest to (5, 5), using Euclidean distance
Example:
Input: {(-2, -4), (0, 0), (10, 15), (5, 6), (7, 8), (-10, -30)}, k = 2
Output: {(5, 6), (7, 8)}
*/
import java.util.*;
class Solution {
public static void main(String[] args) {
Point target = new Point(5, 5);
Point[] points = {new Point(-2, -4), new Point(0, 0), new Point(10, 15), new Point(5, 6), new Point(7, 8), new Point(-10, -30)};
TreeSet<Point> pointSet = new TreeSet<Point>(new PointComparator(target));
for (Point p : points) {
pointSet.add(p);
}
int i = 0;
while (i < 2) {
System.out.println(pointSet.pollFirst());
i++;
}
}
}
class PointComparator implements Comparator<Point> {
public final Point target;
public PointComparator(Point target) {
this.target = target;
}
@Override
public int compare(Point p1, Point p2) {
return p1.distanceTo(target) - p2.distanceTo(target) < 0 ? -1 : 1;
}
}
class Point {
int x;
int y;
public Point(int x, int y) {
this.x = x;
this.y = y;
}
public double distanceTo(Point other) {
double a = (double) other.x - (double) this.x;
double b = (double) other.y - (double) this.y;
a *= a;
b *= b;
return Math.abs(Math.sqrt(a) + Math.sqrt(b));
}
public String toString() {
return "("+x+", "+y+")";
}
}
#include <vector>
struct Point
{
int x;
int y;
Point(int _x, int _y) : x(_x), y(_y) {}
};
struct PointDist
{
float distsq;
Point p;
PointDist(float _distsq, Point _p) : distsq(_distsq), p(_p) {}
};
const Point REF_POINT(5, 5);
void printclosestpoints(Point input[], int inputsize, int k);
int main()
{
Point input[] = { Point(-2, -4), Point(0, 0), Point(10, 15), Point(5, 6), Point(7, 8), Point(-10, -30) };
printclosestpoints(input, sizeof(input) / sizeof(input[0]), 2);
}
void printclosestpoints(Point input[], int inputsize, int k)
{
std::vector<PointDist> results;
std::vector<PointDist>::const_iterator it;
for (int i = 0; i < inputsize; ++i)
{
int dx = input[i].x - REF_POINT.x;
int dy = input[i].y - REF_POINT.y;
float distsq = dx * dx + dy * dy;
it = results.begin();
while (it != results.end() && distsq > it->distsq)
{
++it;
}
if (it != results.end() || results.size() < k)
{
results.insert(it, PointDist(distsq, input[i]));
if (results.size() > k)
{
results.erase(results.begin() + k);
}
}
}
for (it = results.begin(); it != results.end(); ++it)
{
printf("(%d, %d)\n", it->p.x, it->p.y, it->distsq);
}
}
#include<iostream>
#include<vector>
#include<math.h>
using namespace std;
double distance(pair<int,int> p) {
return sqrt((p.first - 5) * (p.first - 5) + (p.second - 5) * (p.second - 5));
}
class Compare {
public :
bool operator()(pair<int,int> p1, pair<int,int> p2) {
return distance(p1) > distance(p2);
}
};
int main() {
vector<pair<int,int> > v;
v.push_back(make_pair(-10,10));
v.push_back(make_pair(-2,-4));
v.push_back(make_pair(0,0));
v.push_back(make_pair(10,15));
v.push_back(make_pair(5,6));
v.push_back(make_pair(7,8));
priority_queue<pair<int,int>, vector<pair<int,int> >, Compare> q(v.begin(), v.end());
int k = 2;
for(int i = 0; i < k; i++) {
pair<int,int> p = q.top();
q.pop();
cout<<"("<<p.first<<", "<<p.second<<")"<<endl;
}
}
None of this is new information to the thread. I'm simply presenting the solutions in Python with minimal boilerplate.
First, the naive O(n log n) time, O(n) space solution, assuming .sort is O(n log n).
def dist(x,y):
return sqrt((x-5)**2 + (y-5)**2)
def solution(points, k):
points_dists = map(lambda (x,y): (dist(x,y),x,y), points)
points_dists.sort(lambda a,b: a[0]<b[0])
return map(lambda i: (i[1],i[2]), points_dists[:k])
You can improve the time from O(n log n) to O(n log k) by using a binary __max__ heap.
# given: binary_heap class
def cmp_max_heap(a,b): return a[0] > b[0] # [0] selects d in (d,x,y)
def solution(points,k):
points_dists = map(lambda (x,y): (dist(x,y),x,y), points)
max_heap = binary_heap(points_dists[:k], cmp_max_heap)
for (d,x,y) in points_dists[k:]:
if max_heap.dominant() > d:
max_heap.delete_dominant()
max_heap.add((d,x,y))
return map(lambda (_d,x,y): (x,y), max_heap)
You can improve on this further towards O(n) with a rank selection algorithm (like quickselect mentioned above), but you trade off by introducing the risk of worst case O(n^2) complexity.
solution with C++11
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
void solve( vector< pair<int, int> > v, int k)
{
auto sqr = [] (int x) {return x*x;} ;
sort( v.begin(), v.end(), [sqr] ( const pair<int, int>& x, const pair<int, int>& y) { return sqr(x.first -5) + sqr(x.second-5) < sqr(y.first-5) + sqr(y.second -5); } );
for (int i=0; i < k && i < v.size(); i++)
cout << "(" << v[i].first << "," << v[i].second << ") ";
cout << endl;
}
int main()
{
solve( {{-2, -4}, {0, 0}, {10, 15}, {5, 6}, {7, 8}, {-10, -30}}, 2);
}
public class Solution {
Pair point = new Pair(5, 5);
int dist(Pair p1, Pair p2) {
return (int)Math.sqrt(Math.pow((p2.x - p1.x), 2) + Math.pow((p2.y - p1.y), 2));
}
class Obj {
int dist;
int index;
Obj(int d, int i) {
dist = d;
index = i;
}
}
int departion(LinkedList<Obj> dilist, int l, int h) {
Obj pivot = dilist.get(l);
while (l < h) {
while (l < h && dilist.get(h).dist >= pivot.dist) {
h--;
}
dilist.set(l, dilist.get(h));
while (l < h && dilist.get(l).dist <= pivot.dist) {
l++;
}
dilist.set(h, dilist.get(l));
}
dilist.set(l, pivot);
return l;
}
void qsort(LinkedList<Obj> dilist, int l, int h) {
if (l < h) {
int m = departion(dilist, l, h);
qsort(dilist, l, m - 1);
qsort(dilist, m + 1, h);
}
}
void sort(LinkedList<Obj> dilist) {
qsort(dilist, 0, dilist.size() - 1);
}
LinkedList<Pair> find(LinkedList<Pair> list, int k) {
LinkedList<Obj> dilist = new LinkedList<Obj>();
for (int i = 0; i < list.size(); i++) {
int d = dist(point, list.get(i));
Obj o = new Obj(d, i);
dilist.add(o);
}
sort(dilist);
LinkedList<Pair> ret = new LinkedList<Pair>();
for (int i = 0; i < k; i++) {
ret.add(list.get(dilist.get(i).index));
}
return ret;
}
}
Here is the C++ solution using quick sort to sort.
Running time is O( N log N). I wish there is a faster way.
double eucidean(int x, int y)
{
return sqrt(pow((double)(x -5), 2.0)+pow((double)(y - 5), 2.0));
}
vector<point> find_closest(int** input, int len, int limit)
{
vector<point> result;
vector<point> points;
for (int i = 0; i <len; i++)
points.push_back(point(input[i][0], input[i][1], eucidean(input[i][0], input[i][1])));
quick_sort(points, 0, len-1);
for (int j = 0; j < limit; j++)
result.push_back(points[j]);
return result;
}
Calculate the distance and put the result in a Max Heap in the end take K elemets from the Heap.
C# Implementation Use a SortedSet to keep the point sorted according to its Distance
The code complicates a little because C# doesn't have a native Heap
public class Point
{
public int X { get; private set; }
public int Y { get; private set; }
public Point(int x, int y)
{
this.X = x;
this.Y = y;
}
public int Distance(Point p)
{
if (p == null)
throw new Exception();
double diffX = p.X - this.X;
double diffY = p.Y - this.Y;
return (int)((diffX * diffX) + (diffY * diffY));
}
public override string ToString()
{
return string.Format("({0}, {1})", this.X, this.Y);
}
}
private class PointSortHelper
{
public Point Point;
public int Distance;
public int Index;
}
private class PointSortComparer : IComparer<PointSortHelper>
{
public int Compare(PointSortHelper x, PointSortHelper y)
{
int n = x.Distance.CompareTo(y.Distance);
if (n != 0)
return n;
return x.Index.CompareTo(y.Index);
}
}
public static IEnumerable<Point> FindClosePoints(List<Point> points, Point p, int k)
{
var sortedList = new SortedSet<PointSortHelper>(new PointSortComparer());
int index = 0;
foreach (var point in points)
{
int distance = p.Distance(point);
var helper = new PointSortHelper() { Point = point, Distance = distance, Index = index};
index ++;
sortedList.Add(helper);
}
var result = new List<Point>();
foreach (var item in sortedList)
{
result.Add(item.Point);
k--;
if (k == 0)
break;
}
return result;
}
// this codes runs in nlogn using o(n) space
#include "stdio.h"
#include <iostream>
#include <set>
#include <string>
#include <algorithm>
#include <map>
#include <list>
#include <vector>
#include "math.h"
using namespace std;
typedef pair<int,int> ii;
typedef pair <ii,double> iid;
bool comp(iid a,iid b){
if (a.second <b.second) return true;
else return false;
}
double dist(int a,int b )
{
return sqrt((a-5)*(a-5)+(b-5)*(b-5));
}
int main() {
// this codes runs in nlogn using o(n) space
int a,b;
vector<iid> vec;
int n,k;
scanf ("%d %d\n",&n,&k);
for (int i=0;i<n;i++){
scanf ("%d %d\n",&a,&b);
vec.push_back(iid(ii(a,b),dist(a,b)));
}
//nlogn
sort(vec.begin(),vec.end(),comp);
for (int i = 0; i < k; ++i)
{
printf("(%d , %d)\n",vec[i].first.first,vec[i].first.second );
}
return 0;
}
This can be done in O(n) space and O(n) time:
- sthndr January 03, 20151) calculate the Euclidean distances from (5, 5): O(n) space and time
2) Use Quickselect to find the k smallest values: O(n) time. The key is Quickselect - check out the wikipedia page on it for algorithm details and proof that it is O(n) time.