Google Interview Question
Software Engineer / DevelopersCountry: United States
This algorithm is O(n log n) where n is the number of intervals. We first sort the array on the end points. Call this array C. Then we build two arrays (with size equal to C.size() ) A and B where A[i] shows how many intervals gets opened in from C[A.size()-1] down to C[i+1] and B[i] shows how many intervals got closed from C[0] up to C[i]. Trivially we can build both A and B in O(n). Now for each interval I with left point in C[i] and right point in C[j], the number of intervals intersecting interval I would be (n - A[j] -B[i]).
If nothing has been said about the order of the algorithm and none of the above solutions seems to work, here is one solution (with O(n^2) :) )
n = number of the intervals
1. create a n x n matrix with all zeros
0 0 0
0 0 0
0 0 0
2. starting with the first interval and checking its intersection with the next intervals we have:
0 1 1
1 0 0
1 0 0
3. going on the second interval and checking with the next intervals and so on in this case we have:
0 1 1
1 0 0
1 0 0
4. summing up all the rows (or columns) we have
1 | 0 1 1 | = 2
2 | 1 0 0 | = 1
3 | 1 0 0 | = 1
so the first interval is the interval that has the most intersections = (1,6)
My solution is O(n * log(n)) with the help of sorting and binary indexed tree.
1. sort the interval array by 'start' first, and 'end' later: (1, 6), (2, 3), (4, 11).
2. for the ith interval, find the first interval v[j] in v[i + 1 : n - 1], such that v[i] and v[j] doesn't overlap. it's binary search.
3. that means all intervals from v[i + 1] to v[j - 1] overlap with v[i].
4. use an extra integer array to record how many intervals overlap with each v[i], as count[i].
5. add count[i] by (j - 1) - (i + 1) + 1, this operation is O(log(n)).
6. from i + 1 to j - 1, add each count[...] by 1, this operation is O(log(n)) too.
7. binary indexed tree makes it possible for step 5 and 6 to be done efficiently.
8. time complexity is O(n * log(n) + n * log(n) + n * log(n)) = O(n * log(n)). both in sorting, searching and counting.
9. space complexity is O(n), from the BIT.
#include <algorithm>
#include <iostream>
#include <vector>
using namespace std;
class BinaryIndexedTree {
public:
BinaryIndexedTree(int _n = 1): n(_n) {
v.resize(n + 1);
};
// add val to all elements from v[1] to v[x]
void addAll(int x, int val) {
while (x >= 1) {
v[x] += val;
x -= lowBit(x);
}
};
// add val to all elements from v[x] to v[y]
void addInterval(int x, int y, int val) {
if (x > y) {
addInterval(y, x, val);
return;
}
addAll(x - 1, -val);
addAll(y, val);
};
// return v[x]
int sum(int x) {
int res = 0;
while (x <= n) {
res += v[x];
x += lowBit(x);
}
return res;
};
~BinaryIndexedTree() {
v.clear();
};
private:
int n;
vector<int> v;
int lowBit(int x) {
return x & -x;
};
};
struct Interval {
int start;
int end;
Interval(int _start = 0, int _end = 0): start(_start), end(_end) {};
bool operator < (const Interval &other) {
if (start != other.start) {
return start < other.start;
} else {
return end < other.end;
}
};
friend ostream& operator << (ostream &cout, const Interval &i) {
cout << '(' << i.start << ',' << i.end << ')';
return cout;
};
};
Interval solve(vector<Interval> &v)
{
int n = (int)v.size();
if (n == 0) {
return Interval(0, 0);
} else if (n == 1) {
return v[0];
}
sort(v.begin(), v.end());
BinaryIndexedTree bit(n);
int i, j;
int ll, rr, mm;
for (i = 0; i < n - 1; ++i) {
if (v[i + 1].start >= v[i].end) {
// no overlapping
continue;
}
if (v[n - 1].start < v[i].end) {
// all overlapped
j = n - 1;
} else {
ll = i + 1;
rr = n - 1;
while (rr - ll > 1) {
mm = (ll + rr) / 2;
if (v[mm].start < v[i].end) {
ll = mm;
} else {
rr = mm;
}
}
j = ll;
}
// from [i + 1, j], they all overlap with v[i].
bit.addInterval(i + 2, j + 1, 1);
bit.addInterval(i + 1, i + 1, j - i);
}
int ri;
int res, mres;
ri = 0;
mres = bit.sum(1);
for (i = 1; i < n; ++i) {
res = bit.sum(i + 1);
ri = res > mres ? i : ri;
}
return v[ri];
}
int main()
{
int i;
int n;
vector<Interval> v;
Interval res;
while (cin >> n && n > 0) {
v.resize(n);
for (i = 0; i < n; ++i) {
cin >> v[i].start >> v[i].end;
}
res = solve(v);
cout << res << endl;
v.clear();
}
return 0;
}
/*
// A simple test for the BIT above.
int main()
{
string cmd;
int n;
BinaryIndexedTree *bit = nullptr;
int x, y, val;
int i;
while (cin >> n && n > 0) {
bit = new BinaryIndexedTree(n);
while (true) {
for (i = 1; i <= n; ++i) {
cout << bit->sum(i) << ' ';
}
cout << endl;
cin >> cmd;
if (cmd == "e") {
break;
} else if (cmd == "a") {
cin >> x >> val;
bit->addAll(x, val);
} else if (cmd == "ai") {
cin >> x >> y >> val;
bit->addInterval(x, y, val);
}
}
delete bit;
bit = nullptr;
}
return 0;
}
*/
public class IntervalOverlap {
class Interval {
int min;
int max;
public Interval(int min, int max){
this.min = min;
this.max = max;
}
@Override
public boolean equals(Object o){
if(o == null) return false;
if(o instanceof Interval){
Interval interval = (Interval) o;
if(interval.max == this.max && interval.min == this.min) return true;
}
return false;
}
@Override
public int hashCode(){
return min + ",".hashCode() + max;
}
public boolean isOverlap(Interval interval){
if(this.min > interval.max ) return false;
if(this.max < interval.min) return false;
return true;
}
@Override
public String toString(){
return "[" + min + ", " + max + "]";
}
}
// O(n * n) not that good
public HashMap<Interval, List<Interval>> matchOverlappedInterval(List<Interval> intervals){
HashMap<Interval, List<Interval>> mapIntervals = new HashMap<Interval, List<Interval>>();
for(int i = 0 ; i < intervals.size()-1; i++){
mapIntervals.put(intervals.get(i), new ArrayList<Interval>());
for(int j = i+1; j < intervals.size(); j++){
if(intervals.get(i).isOverlap(intervals.get(j))){
System.out.println(intervals.get(i) + " overlaps " + intervals.get(j));
mapIntervals.get(intervals.get(i)).add(intervals.get(j));
}
}
}
return mapIntervals;
}
public void printMapIntervals(HashMap<Interval, List<Interval>> mapIntervals){
for(Interval interval: mapIntervals.keySet()){
for(Interval i : mapIntervals.get(interval)){
System.out.println(interval + ", " + i);
System.out.println(i + ", " + interval);
}
}
}
public static void main(String[] args) {
IntervalOverlap io = new IntervalOverlap();
List<Interval> intervals = new ArrayList<Interval>();
intervals.add(io.new Interval(1,3));
intervals.add(io.new Interval(2,4));
intervals.add(io.new Interval(3,6));
intervals.add(io.new Interval(7,8));
io.printMapIntervals(io.matchOverlappedInterval(intervals));
}
}
Simple O(N) algorithm that works good in ranges are not too long. Create an array of integers and for each input range increment value at the start range index and decrement at the end range index. For example, for (1, 6), (2, 3), (4, 11), (1, 6) we will have array of 11 integers with following values: [2, 1, -1, 1, 0, -2, 0, 0, 0, 0, -1]. Then we go through all elements of this array and calculate sum of all elements before current (sum[x] = sum[x - 1] + arr[x]): [2, 3, 2, 3, 3, 1, 1, 1, 1, 1, 0]. Max value here is the maximum number of intersected ranges. Of course, we don't need to create sum array, we can just store last sum value and max sum value, putting first into second if it becomes greater.
package google;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class MaxIntervalCount {
@Test
public void test() {
IntervalCounter ic = new IntervalCounter(-5, 3);
ic.addRange(-2, 3);
ic.addRange(-1, 2);
ic.addRange(1, 2);
ic.addRange(3, 3);
ic.addRange(-5, -4);
ic.addRange(-3, 0);
ic.addRange(-1, 0);
assertEquals(4, ic.maxRangesCount());
}
private class IntervalCounter {
int[] changes;
int min, max;
private IntervalCounter(int min, int max) {
if (min > max) {
throw new IllegalArgumentException("Min range value is greater than max.");
}
changes = new int[max - min + 1];
this.min = min;
this.max = max;
}
public void addRange(int s, int e) {
if (s > e) {
throw new IllegalArgumentException("Range start is greater than end.");
}
if (s < min || e > max) {
throw new IllegalArgumentException("Range is not within general range.");
}
changes[s - min]++;
changes[e - min]--;
}
public int maxRangesCount() {
int max = 0;
int sum = 0;
for (int ch : changes) {
sum += ch;
if (max < sum) {
max = sum;
}
}
return max;
}
}
}
Solution with sorting of start and end indexes separately is much better that solution with array of open / closed ranges I proposed earlier. It doesn't require predefined min/max range values and works well with large ranges.
package google;
import org.junit.Test;
import java.util.*;
import static org.junit.Assert.assertEquals;
public class MaxIntervalCount {
@Test
public void test() {
IntervalCounter2 ic = new IntervalCounter2();
ic.addRange(-2, 3);
ic.addRange(-1, 2);
ic.addRange(1, 2);
ic.addRange(3, 3);
ic.addRange(-5, -4);
ic.addRange(-3, 0);
ic.addRange(-1, 0);
assertEquals(4, ic.maxRangesCount());
}
private static class IntervalCounter2 {
List<Integer> starts = new ArrayList<>();
List<Integer> ends = new ArrayList<>();
public void addRange(int s, int e) {
if (s > e) {
throw new IllegalArgumentException("Range start is greater than end.");
}
starts.add(s);
ends.add(e);
}
public int maxRangesCount() {
int max = 0;
int sum = 0;
Collections.sort(starts);
Collections.sort(ends);
int ranges = starts.size();
int s = 0;
int e = 0;
while (s < ranges) {
int startIdx = starts.get(s);
int endIdx = ends.get(e);
if (startIdx <= endIdx) {
sum++;
s++;
} else {
sum--;
e++;
}
if (sum > max) {
max = sum;
}
}
return max;
}
}
}
I know I'm late to the party, but I have a O(nlogn) working algorithm in C++.
I put this here, because I think my code is easy to read.
1) I create a calendar of events (start/end of interval)
2) Sort events by time of occurence
3) Browse calendar and note how many intervals are opened at any time
#include <iostream>
#include <vector>
#include <map>
#include <unordered_set>
#include <queue>
#include <tuple>
using namespace std;
class Interval {
public:
int start, end, intersections = 0;
Interval(const int & s, const int & e) : start(s), end(e) {}
};
typedef tuple<Interval *, int, bool> event;
typedef pair<int, int> ii;
// Comparer can be adjusted to sort start events first (using the 3rd item in e1/e2)
// The effect would be, that:
// When one interval starts where the other ends,
// it will count as an intersection too
struct calendarComparer {
bool operator()(const event & e1, const event & e2) {
return get<1>(e1) > get<1>(e2);
}
};
ii getInterval(const vector<pair<int, int>> & intervals) {
// true = start of an interval
// false = end
priority_queue<event, vector<event>, calendarComparer> calendar;
// I create calendar, which is being sorted as things are inserted
for(auto i : intervals) {
Interval * newInterval = new Interval(i.first, i.second);
calendar.push(event(newInterval, i.first, true)); // start
calendar.push(event(newInterval, i.second, false)); // end
}
// Set of currently opened intervals
unordered_set<Interval *> opened;
int max = INT_MIN;
ii best;
// how many intervals are there opened at the moment?
int counter = 0;
// For every event...
while(!calendar.empty()) {
event e = calendar.top();
calendar.pop();
Interval * interval = get<0>(e);
if(get<2>(e)) { // start interval event
++counter;
interval->intersections = counter;
// for every opened interval, increase the intersection counter
for(auto i : opened)
++i->intersections;
opened.insert(interval);
} else { // end interval event
--counter;
opened.erase(opened.find(interval));
// note the best result?
if(interval->intersections > max) {
max = interval->intersections;
best = ii(interval->start, interval->end);
}
delete interval;
}
}
return best;
}
int main() {
pair<int, int> result = getInterval({
pair<int, int>(2, 3),
pair<int, int>(4, 11),
pair<int, int>(1, 6)
});
cout << result.first << ", " << result.second << endl;
}
O(NlogN) solution
1) sort the pairs based on their start time : complexity O(NlogN)
2) iterate over start times: s=startTime, e=corresponding end time
for each (s,e) find s' such that s'<e (using binary search)
number of overlapped timeranges with (s,e) N = index of s' - index is s + 1
return maximum N encountered
O(nlgn) solution with O(n) extra space.
This solution probably has some trouble with duplicate value end/start points but it can be improved to support these as well.
template <typename K, typename V>
using map = boost::unordered_map<K, V>;
struct interval_t
{
int s;
int e;
};
struct point_t
{
int v;
bool s;
int iid;
};
interval_t get_max_intersect(const std::vector<interval_t>& intervals)
{
if (intervals.empty()) return { 0, 0 };
map<int, interval_t> iid_map;
std::vector<point_t> order;
int id = 0;
for (const auto& i : intervals)
{
iid_map[id] = i;
order.push_back({ i.s, true, id });
order.push_back({ i.e, false, id });
}
std::sort(order.begin(), order.end(),
[](const point_t& p1, const point_t& p2) { return p1.v < p2.v; });
int count = 0, max_count = 0, max_id = 0;
for (const auto& p : order)
{
if (p.s)
{
++count;
}
else
{
if (count > max_count)
{
max_count = count;
max_id = p.iid;
}
--count;
}
}
return iid_map[max_id];
}
void test_get_max_intersect()
{
std::vector<interval_t> intervals;
intervals.push_back({ 2, 3 });
intervals.push_back({ 4, 11 });
interval_t expected = { 1, 6 };
intervals.push_back(expected);
interval_t result = get_max_intersect(intervals);
assert(result.s == expected.s && result.e == expected.e);
intervals.push_back({ -3, 1 });
intervals.push_back({ -5, 8 });
intervals.push_back({ 9, 11 });
intervals.push_back({ 11, 13 });
expected = { 4, 12 };
intervals.push_back(expected);
result = get_max_intersect(intervals);
assert(result.s == expected.s && result.e == expected.e);
}
x, y: interval.
x is not intersected with y:
x.second (end) <= y.first (start)
or x.first >= y.second.
the idea is so simple.
so an interval x. find the number of interval y where y.second <= x.first or y.first >= x.second.
y.second <= x.first : O(logN) if we have a vector of sorted start
y.first >= x.second: O(logN) if we have a vector of sorted end.
int maxint1(const std::vector< std::pair<int, int> > &its) //N^2
{
std::vector<int> starts;
std::vector<int> ends;
for (int i = 0; i != (int) its.size(); ++i) starts.push_back(its[i].first);
for (int i = 0; i != (int) its.size(); ++i) ends.push_back(its[i].second);
std::sort(starts.begin(), starts.end());
std::sort(ends.begin(), ends.end());
int maxnc = 0;
int maxid = 0;
for (int i = 0; i != (int) its.size(); ++i)
{
int nleft = std::lower_bound(ends.begin(), ends.end(), its[i].first) - ends.begin();
int nright = std::upper_bound(starts.begin(), starts.end(), its[i].second) - starts.begin();
// std::cout << nleft << " " << nright << " " << (int) its.size() - nright << "\n";
int nc1 = its.size() - nleft - (its.size() - nright);
int nc = 0;
for (int j = 0; j != (int) its.size(); ++j)
if (match(its[i], its[j])) nc ++;
if (nc > maxnc) { maxnc = nc; maxid = i;}
if (nc != nc1) {
std::cout << "error " << nc << " " << nc1 << "\n";
}
}
std::cout << maxnc << " " << maxid << "\n";
return maxnc;
}
No need for interval trees.
You can sort all endpoints into a single array , or use separate sorted arrays as below:
// sort start points into s[ ]
// sort end points into e[ ]
int sidx=0, eidx=0, numint=0, max=0;
while(eidx < N)
{
if( s[ sidx ]< e[ eidx ] )
{
numint++;
sidx++;
if( numint > max ) max = numint;
}
else if( e[ eidx ] < s[ sidx ] )
{
eidx++, numint--;
}
else
sidx++, eidx++;
}
It is the same, yes.
You can sort and put them in one array (but mark them as start or end) OR you can sort and put them into two different arrays (no need to mark start vs. end).
And the algorithm is O(nlogn) because sorting is nlogn then final sweep is n.
How could you do the final sweep in o(n)? I can't think of any way to do it in O(n). Could you please elaborate more?
The sorting is nlogn ( if you sort the endpoints all together it is (2n)log(2n) which is still O(nlogn).
Now, you have 2 arrays of n endpoints OR you have 1 array of 2n endpoints.
So you have to look at 2n endpoints in the final part of the algorithm, right?
For each endpoint you look at, there is a constant upperlimit of operations you have to do... O(1) for each endpoint.
So in total it is O(n*logn) + O(1)*O(n) => O(nlogn)
You are supposed to not just find the maximum number, but find the overlapping intervals that result in the particular max. How does your algorithm do that?
My algorithm is O(nlogn + m*n), where m is the maximum number of intersections with any single interval.
Sort the interval by both start points and end points. Keep track of the active segments. When reaching a start point, increment the intersection counts of all active intervals, and set the new interval's intersection count to the number of active intervals (excluding itself). When reaching an end point, remove the interval from the active intervals.
So worst case would be O(n^2). Can we do better than this?
I just don't want to tell because you seem to be willing to get it, Just couple of hints:
1. Consider that the whole space is boken to intervals of adjusten point, does not matter if it was begin or end of some interval
2. What heuristics you need to gather for each such interval (simple DS?)
3. Consider second pass (just n) where you do certain operations on those small intervals to build final heuristic for each input interval
Think about an active set of Intervals between any two consecutive points in the input (not intervals).
For the interval, make a union of all active sets cooresponding to the subintervals it consists from.
Mona, pretty clever, but what is the worst case number of counts would you have to update per interval
@CT I am not sure what you mean by count. After sorting C, we pass it from its ending to its beginning to build A, then from its beginning to its ending to build B. Now we should go through each interval's end points (one way to know the end points for each interval is to store them in a hash table:we build the hash table by passing C for one more time, mapping the interval's id to (i, j) where i is the index of its left point in C and j is the index of its right end point in C. now for the left point i we can retrieve its right point j in O(1)) and calculate n - A[j]-B[i]. The maximum of this number among all intervals would be the answer.
Adding up on your method. Create a data structure, end point { int value, int associatedLineID} .If we just sort all the end points into one single array (nlogn). And then use your approach to check who is active (based on associatedLineID), and increment the count of overlapped lines of the associated lines.
package amazon;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
public class Lab2 {
public static void main(String[] args) {
line maxLine = null;
int maxCount = 0;
ArrayList<line> ar = new ArrayList<line>();
ar.add(new line(1,6));
ar.add(new line(2,3));
ar.add(new line(3,4));
ar.add(new line(4,11));
System.out.println(ar.get(0));
System.out.println(ar.get(1));
System.out.println(ar.get(2));
int len = ar.size();
HashMap hs = new HashMap();
for(int i = 0; i < (len - 1) ; i++)
{
line l = ar.get(i);
int count = 0;
for(int j = i + 1; j < (len - 1); j ++)
{
line l2 = ar.get(j);
if((l.start < l2.start) && (l.last > l2.last))
{
count = count + 1;
}
if(count > maxCount)
{
maxLine = l;
maxCount = count;
}
}
}
System.out.println("The line is " + maxLine + maxCount);
}
}
class line
{
int start;
int last;
public line(int start, int last) {
super();
this.start = start;
this.last = last;
}
@Override
public String toString() {
return "(" + start + "," + last + ")";
}
}
Say point records are as {X,Y}.
- vishwaraj.anand00 February 23, 20141. We sort all Xi's and all Yi's on the number line [Some DS] and maintain entry and exit counts for each such Xi and Yi, all initialized to zeros.
2. For each record {Xi, Yi}, we add 1 in entry count of Xi and add 1 in exit count of Yi.
3. Now for the number line, we have at each interval a count of active number of intersections.
4. The max number of that is the answer. While finding the max, if we track the intervals where we are getting that value, we could also get the range of the required intersection.