Google Interview Question
Software EngineersCountry: United States
public static void main(String[] args) {
// set up data
ArrayList<Tuple<String, Integer>> MoviesAndFrequency = new ArrayList(
Arrays.asList(new Tuple("abc", 10), new Tuple("def", 15), new Tuple("ghi", 10),
new Tuple("abc", 12), new Tuple("xyz", 100)));
for (Tuple T : MoviesAndFrequency)
T.displayTuple();
HashMap<String, Integer> MovieHash = new HashMap<>();
for (Tuple T : MoviesAndFrequency) {
if (MovieHash.containsKey(T.getX()))
MovieHash.put(T.getX().toString(), MovieHash.get(T.getX().toString()) + (Integer) T.getY());
else
MovieHash.put(T.getX().toString(), (Integer) T.getY());
}
System.out.println("\n" + MovieHash.toString() + "\n");
Map<String, Integer> sorted = MovieHash.entrySet()
.stream()
.sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
.collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2,
LinkedHashMap::new));
System.out.println(sorted.toString());
}
public static void main(String[] args) {
// set up data
ArrayList<Tuple<String, Integer>> MoviesAndFrequency = new ArrayList(
Arrays.asList(new Tuple("abc", 10), new Tuple("def", 15), new Tuple("ghi", 10),
new Tuple("abc", 12), new Tuple("xyz", 100)));
for (Tuple T : MoviesAndFrequency)
T.displayTuple();
HashMap<String, Integer> MovieHash = new HashMap<>();
for (Tuple T : MoviesAndFrequency) {
if (MovieHash.containsKey(T.getX()))
MovieHash.put(T.getX().toString(), MovieHash.get(T.getX().toString()) + (Integer) T.getY());
else
MovieHash.put(T.getX().toString(), (Integer) T.getY());
}
System.out.println("\n" + MovieHash.toString() + "\n");
Map<String, Integer> sorted = MovieHash.entrySet()
.stream()
.sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
.collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2,
LinkedHashMap::new));
System.out.println(sorted.toString());
}
public static void main(String[] args) {
// set up data
ArrayList<Tuple<String, Integer>> MoviesAndFrequency = new ArrayList(
Arrays.asList(new Tuple("abc", 10), new Tuple("def", 15), new Tuple("ghi", 10),
new Tuple("abc", 12), new Tuple("xyz", 100)));
for (Tuple T : MoviesAndFrequency)
T.displayTuple();
HashMap<String, Integer> MovieHash = new HashMap<>();
for (Tuple T : MoviesAndFrequency) {
if (MovieHash.containsKey(T.getX()))
MovieHash.put(T.getX().toString(), MovieHash.get(T.getX().toString()) + (Integer) T.getY());
else
MovieHash.put(T.getX().toString(), (Integer) T.getY());
}
System.out.println("\n" + MovieHash.toString() + "\n");
Map<String, Integer> sorted = MovieHash.entrySet()
.stream()
.sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
.collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2,
LinkedHashMap::new));
System.out.println(sorted.toString());
}
→ traverse through the videos
→ add or update the video you have come across in the results array and add or update the value in a hashtable
→ Initially have an array of size 10, and add the updated value of the video according to insertion sort.
def mostRatedVideos(videos):
result = [None]*10
hashtable = {}
for video in videos:
# On first entry add directly, if it doesnt exist in the hashtable update the rate
if(video[0] in hashtable):
hashtable[video[0]] += video[1]
else:
hashtable[video[0]] = video[1]
index = 0
swapping = False
indexOfVideo = -1
while(index < 10):
if(result[index] == None):
result[index] = video[0]
break
if(hashtable[result[index]] > hashtable[video[0]] ):
index += 1
continue
else:
swapping = True
if (video[0] in result):
indexOfVideo = result.index(video[0])
break
index += 1
if(swapping and indexOfVideo == -1):
result[index+1:10] = result[index:9]
result[index] = video[0]
elif(swapping):
result[index+1:indexOfVideo+1] = result[index:indexOfVideo]
result[index] = video[0]
return result
ratedVids = mostRatedVideos([('abc', 10), ('apple', 9), ("banana", 45), ("fanugreek", 1), ("abc", 100), ("dragonfruit", 6), ('frank', 12), ('apricote', 7), ("cherry", 4), ("grape", 17), ("apple", 11), ("dragonfruit", 6)])
print(ratedVids)
L = [['abc', 10], ['def', 15], ['ghi', 10], ['abc', 12], ['xyz', 100]]
g = group( L ) as { $.o[0] } as { sum( $.value ) as { $.o[1] } }
l = list(g) as { [ $.key, $.value ] }
sortd( l ) where { sign( $.l[1] - $.r[1] ) }
println( str(l , "\n") as { str($.o, "->") } )
Produces:
xyz->100
abc->22
def->15
ghi->10
Use Min Heap, min heap allows you to check with O(1), is the new entry in top 10 rates at the given time t.
For solving this problem, we could use min heap and hashmap. HashMap for locating existing video entry in heap in O(1) time. Asymptotically, O(10), linear search in heap of size 10 (implemented using array) is O(1). But for a considerable size of heap, we could use hashmap.
1. Use Hashmap [<Key>Clip, <Value>WatchRate] to store each entry, and append the watch rate as and when key matches the existing collection.
2. Iterate over the HashMap and build MaxHeap with <Value>WatchRate.
3. Pop the max heap and fill a vector & return.
4. Complexity = (Step 1)N + (Step 2) N log N + (Step 3) N => N Log N
No need to sort full array. We can do in O(k*n) time, where k =10
def bubbleSort(arr,k):
n = len(arr)
# Traverse through all array elements
for i in range(k):
# Last i elements are already in place
for j in range(0, n-i-1):
# traverse the array from 0 to n-i-1
# Swap if the element found is greater
# than the next element
if arr[j][1] > arr[j+1][1] :
arr[j], arr[j+1] = arr[j+1], arr[j]
arr = [('abc', 100),('abc', 105), ('def', 15), ('ghi', 10), ('abc', 12),
('xyz', 9),('xyz', 19),('xyz', 25),('xyz', 1),('ghi', 11),('xyz', 200),('xyz', 9)]
k =10
bubbleSort(arr,k)
print( arr[::-1][:k] )
One way to doing is using Bubble sort O(n*K). Using min heap, we can do it in O(nlogK)
#Bucket Sort
def bubbleSort(arr,k):
n = len(arr)
# Traverse through all array elements
for i in range(k):
# Last i elements are already in place
for j in range(0, n-i-1):
# traverse the array from 0 to n-i-1
# Swap if the element found is greater
# than the next element
if arr[j][1] > arr[j+1][1] :
arr[j], arr[j+1] = arr[j+1], arr[j]
arr = [('abc', 100),('abc', 105), ('def', 15), ('ghi', 10), ('abc', 12),
('xyz', 9),('xyz', 19),('xyz', 25),('xyz', 1),('ghi', 11),('xyz', 200),('xyz', 9)]
k =10
bubbleSort(arr,k)
print( arr[::-1][:k] )
import collections
def findTopTen (aList):
d = collections.defaultdict(list)
final_d = {}
for element in aList:
movie, rate = element[0],element[1]
d[movie].append (rate)
print (d)
for movie, rate in d.items():
final_d[movie] = max(rate)
res =[movie for movie, rate in sorted(final_d.items(),reverse = True, key=lambda x:x[1])]
return res
import collections
def findTopTen (aList):
d = collections.defaultdict(list)
final_d = {}
for element in aList:
movie, rate = element[0],element[1]
d[movie].append (rate)
print (d)
for movie, rate in d.items():
final_d[movie] = max(rate)
res =[movie for movie, rate in sorted(final_d.items(),reverse = True, key=lambda x:x[1])]
return res
Here's my solution using python
L = [("abc", 10), ("def", 15), ("ghi", 10), ("abc", 12), ("xyz", 100)]
# get the top 10 video ratings
def top_video_ratings(L):
# create a dict to sum all the values, O(n)
video_ratings = {}
for (name, rates) in L:
if not name in video_ratings:
video_ratings[name] = 0
video_ratings[name] += rates
# create a list with tuple, that's because we can't sort a dict, O(n)
video_ratings_list = []
for name in video_ratings:
video_ratings_list.append((name, video_ratings[name]))
bubble_sort_desc(video_ratings_list) # sort descending, O(n^2) <-- we can do better with quick sort
# return the top 10
return video_ratings_list[0:10]
def bubble_sort_desc(array):
n = len(array)
for i in range(n):
for j in range(n - i - 1):
# swap values
if array[j][1] < array[j+1][1]:
array[j], array[j+1] = array[j+1], array[j]
print(top_video_ratings(L))
N: Number of the show - rating pairs
Time complexity: O(NlogN)
Space complexity: O(N)
def findTopTen(aListOfShows):
if aListOfShows is None: return None
if len(aListOfShows) == 0: return aListOfShows
if len(aListOfShows) == 1: return aListOfShows
sortedList = sorted(aListOfShows, key=lambda x: x[1], reverse=True)
i = 0
seen = set()
res = []
while i < len(sortedList):
show, rate = sortedList[i]
if show not in seen:
seen.add(show)
res.append(sortedList[i])
i+=1
return res
Time complexity: log(N), If insertion to BST is not counted otherwise O(N)
class Node():
def __init__(self, show, rate):
self.show = show
self.rate = rate
self.left = None
self.right = None
class BST():
def __init__(self):
self.root = None
def createBST(self, show, rate):
if self.root is None:
self.root = Node(show, rate)
return
else:
current = self.root
while True:
if current.show == show:
if current.rate < rate:
current.rate = rate
break
if rate < current.rate:
if current.left:
current = current.left
else:
current.left = Node(show, rate)
break
elif rate > current.rate:
if current.right:
current = current.right
else:
current.right = Node(show, rate)
break
return
def inOrderTraversal(self, limit):
res = []
def traverse(root, res):
if root:
traverse(root.right, res)
res.append([root.show, root.rate])
traverse(root.left, res)
return
traverse(self.root, res)
return res[:limit]
values = [("abc", 13), ("xyz", 19), ("efr", 89), ("afaf", 23), ("abc", 7), ("afeogh", 5)]
bst = BST()
for val in values:
show, rate = val
bst.createBST(show, rate)
print(bst.inOrderTraversal(3))
expected output
(for the < 10 input data items in the prompt, works for > 10 or with top K for K < 10...)
--
$ javac TopKVideos.java
$ java TopKVideos
[xyz,abc,def,ghi]
--
where TopKVideos.java contains
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
public class TopKVideos {
public static void main(String args[]) {
List<VideoStatistic> sampleVideoStats = getSampleVideoStatisticsList();
TopKVideoQueue topKVideoQueue = new TopKVideoQueue(10);
for (VideoStatistic videoStat : sampleVideoStats) {
topKVideoQueue.add(videoStat);
}
List<String> topVideoNames = topKVideoQueue.getTopKVideoNames();
String outputString = "[";
for (String topVideoName : topVideoNames) {
if (!outputString.equals("[")) { outputString += ","; }
outputString += topVideoName;
}
outputString += "]";
System.out.println(outputString);
}
private static class TopKVideoQueue {
int k = 10;
List<VideoStatistic> topVideos = new ArrayList<VideoStatistic>(k);
Map<String, VideoStatistic> allVideoCounts = new HashMap<String, VideoStatistic>();
public TopKVideoQueue(int k) {
this.k = k;
}
public void add(VideoStatistic videoStats) {
// remove any existing entries (after combining new video view data with old will add top K again)
for (int idx = 0; idx < topVideos.size(); idx++) {
VideoStatistic videoInTopK = topVideos.get(idx);
if (videoInTopK.videoName.equals(videoStats.videoName)) {
topVideos.remove(idx);
}
}
VideoStatistic cumulativeVideoStats = allVideoCounts.get(videoStats.videoName);
if (cumulativeVideoStats == null) {
cumulativeVideoStats = new VideoStatistic(videoStats.videoName, 0);
}
cumulativeVideoStats.watchRate += videoStats.watchRate;
allVideoCounts.put(videoStats.videoName, cumulativeVideoStats);
int topVideosSize = topVideos.size();
int lowestTopKWatchRate;
if (topVideosSize == k && ((lowestTopKWatchRate = topVideos.get(topVideosSize - 1).watchRate) >= cumulativeVideoStats.watchRate)) {
System.out.println("skipping " + videoStats.videoName + ": top K priority queue is full and the watch rate of this video (" + cumulativeVideoStats.watchRate + ") is not higher than the lowest watch rate (" + lowestTopKWatchRate + ").");
return;
}
// we know the video will be inserted into the top K. now we determine the index.
int insertIdx;
for (insertIdx = topVideosSize; insertIdx > 0 && topVideos.get(insertIdx - 1).watchRate < cumulativeVideoStats.watchRate; insertIdx--);
// insert the updated video statistic at the determined index
topVideos.add(insertIdx, cumulativeVideoStats);
// make sure we don't exceed max size
while(topVideos.size() > k) {
System.out.println("size: " + topVideos.size() + ", max: " + k + "; removing last item");
topVideos.remove(topVideos.size() - 1);
}
}
public List<String> getTopKVideoNames() {
List<String> output = new ArrayList<String>();
for (VideoStatistic videoStats : topVideos) {
output.add(videoStats.videoName);
}
return output;
}
}
private static class VideoStatistic {
public String videoName;
public int watchRate;
public VideoStatistic(String videoName, int watchRate) {
this.videoName = videoName;
this.watchRate = watchRate;
}
}
private static List<VideoStatistic> getSampleVideoStatisticsList() {
List<VideoStatistic> sampleVideoStats = new ArrayList<VideoStatistic>();
sampleVideoStats.add(new VideoStatistic("abc", 10));
sampleVideoStats.add(new VideoStatistic("def", 15));
sampleVideoStats.add(new VideoStatistic("ghi", 10));
sampleVideoStats.add(new VideoStatistic("abc", 12));
// ... add some more elements here to really test this. but for submission just including the (<10) entries originally provided in the prompt
sampleVideoStats.add(new VideoStatistic("xyz", 100));
return sampleVideoStats;
}
}
public class Top10Movie{
private static Node[] topNodes;
public static void main(String args[]){
Node node1 = new Node("abc",10);
Node node2 = new Node("def",10);
Node node3 = new Node("ghi",15);
Node node4 = new Node("abc",12);
Node node5 = new Node("xyz",100);
Node node6 = new Node("xyz",33);
Node node7 = new Node("xyz",22);
Node nodes[] = {node1,node2,node3,node4,node5,node6,node7};
accumulateRatings(nodes, 3);
}
private static void accumulateRatings(Node[] nodes, int topK){
HashMap<String, Node> rateMap = new HashMap<>();
for(int i=0;i<nodes.length;i++){
Node node = nodes[i];
if(!rateMap.containsKey(node.getName())){
rateMap.put(node.getName(),node);
}
else{
rateMap.get(node.getName()).incRate(node.getRate());
}
}
//build minheap of size k to find top k movie
topNodes = new Node[topK];
int count = 0;
Iterator<Entry<String,Node>> it = rateMap.entrySet().iterator();
while(count<topK && it.hasNext()){
Entry<String, Node> entry = it.next();
topNodes[count++] = entry.getValue();
}
heapify(0);
while(it.hasNext()){
Entry<String, Node> entry = it.next();
Node node = entry.getValue();
if(node.getRate()>getMin().getRate()){
replaceMin(node);
}
}
for(int i=0;i<topK;i++)
{
Node node = extractMin();
System.out.println(node.getName());
}
}
public static void heapify(int pos){
int smallest = pos;
int l = pos*2+1;
int r = pos*2 +2;
if(l<topNodes.length &&
topNodes[l].getRate()<topNodes[pos].getRate())
smallest = l;
if(r<topNodes.length &&
topNodes[r].getRate()<topNodes[pos].getRate())
smallest = r;
if(smallest != pos){
swap(smallest,pos);
heapify(smallest);
}
}
public static void swap(int x, int y){
Node tmp = topNodes[x];
topNodes[x]= topNodes[y];
topNodes[y]=tmp;
}
public static Node getMin(){
return topNodes[0];
}
public static void replaceMin(Node node){
Node tmp = topNodes[0];
topNodes[0] = node;
heapify(0);
}
public static Node extractMin(){
Node tmp = topNodes[0];
Node dummy = new Node("dummy", Integer.MAX_VALUE);
topNodes[0] = topNodes[topNodes.length-1];
topNodes[topNodes.length-1]=dummy;
heapify(0);
return tmp;
}
private static class Node{
private String movieName;
private int rate;
public Node(String movieName, int rate){
this.movieName = movieName;
this.rate = rate;
}
public void incRate(int rate){
this.rate = this.rate +rate;
}
public int getRate(){
return this.rate;
}
public String getName(){
return this.movieName;
}
public String toString() {
return "movie: "+movieName+",rate: "+rate;
}
}
}
O(n + k log(n)) where k = 10
import heapq
def find_top_k(L, k):
heap = []
dictionary = {}
for video, rate in L:
if video in dictionary:
dictionary[video] += rate
else:
dictionary[video] = rate
for video, rate in dictionary.items():
heapq.heappush(heap, (-rate, video))
result = []
for _ in range(min(k, len(dictionary))):
rate, video = heapq.heappop(heap)
result.append(video)
return result
sorted_list = sorted(L, key=lambda x:x[1], reverse=True)[:10]
- Anonymous July 03, 2019print [v[0] for v in sorted_list]