Amazon Interview Question
SDE1sCountry: India
This is a problem of finding maximum occurring longest substrings from a bunch of strings. This I think can be solved with suffix tree. Prepare a suffix tree with all the strings. At each end point increment the number of finished strings. Then do a level wise traversal and get the largest number in the deepest height.
Suffix tree is a great thing and would work however I think you should notice two things:
- it's quite complex to implement it within interview time
- there is a number k given.
It should lead you to other solution - why just not to browse the sequences of size k from left to right? I.e having a->b->c->d, and k = 3 we got a->b->c, then remove first element, and add the next from the sequence to the end what results in b->c->d. This has a linear time. Moreover, we maintain a map (hash table) that matches the sequence with the counter. This leads to O(n log n) (c++ map), or O(n) solution (c++ unordered_map).
Util: Histogram of pages, sorted dynamicaly by frequency. (If count becomes greater than higher item, bubble up). Also, the pages are indexed by hashset for fast lookup. Lets call this util histogram.
Trie of depth k. The histogram is all of parent children.
Insert all sequences of size k. As walk trie, update histogram of each page at i level.
For sorted frequencies walk leaves in order (determined by histogram)
Thanks for your directions
Seems to be working using Trie of depth k and priority queue
{{
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.AbstractMap.SimpleEntry;
import java.util.TreeMap;
public class KPage {
public static void main(String[] args) {
int k = 3; int n = 3;
Trie t = new Trie(k);
t.addPages(new String[]{"x", "y", "z", "a", "b", "c", "d", "e", "f"});
t.addPages(new String[]{"z", "a", "b", "c", "d"});
t.addPages(new String[]{"x", "y", "a", "b", "c", "d"});
t.addPages(new String[]{"a", "b", "c", "d"});
String [][] nKPages = t.getKPages(n);
for(String[] kPage :nKPages) System.out.println(Arrays.toString(kPage));
}
}
class Trie {
int k;
Node root = new Node("");
Trie(int k){this.k = k;}
void addPages(String[] pages) {
for (int i = 0; i < pages.length - k + 1; i++) {
Node node = root;
for (int j = 0; j < k; j++) node = node.add(pages[i+j]);
}
}
String [][] getKPages(int n) {
String [][] nKPages = new String[n][];
String [] kPage = new String[k];
int i = 0;
Comparator<Map.Entry<Integer,String[]>> c = new Comparator<Map.Entry<Integer,String[]>>(){
public int compare(Map.Entry<Integer,String[]> a, Map.Entry<Integer,String[]> b){ return a.getKey()<b.getKey() ? 1 : (a.getKey()>b.getKey() ? -1 : 0);}};
PriorityQueue<Map.Entry<Integer,String[]>> kPagesQ = new PriorityQueue<Map.Entry<Integer,String[]>>(11,c);
for (Node node : root.childs.values()) node.getKPages(0, kPage, kPagesQ);
while(i < n && !kPagesQ.isEmpty()) nKPages[i++] = kPagesQ.poll().getValue();
return nKPages;
}
}
class Node {
String page;
Map<String, Node> childs = new TreeMap<String, Node>();
int count;
Node(String page) {this.page = page;}
public Node add(String page) {
Node node = childs.get(page);
if (node == null) childs.put(page, node = new Node(page));
node.count++;
return node;
}
public void getKPages(int l, String [] kPage, PriorityQueue<Map.Entry<Integer,String[]>> kPagesQ){
kPage[l++] = page;
if(childs.isEmpty()) kPagesQ.add(new SimpleEntry<Integer,String[]>(count, kPage.clone()));
else for (Node node : childs.values()) node.getKPages(l, kPage, kPagesQ);
}
}
}}}
#include <iostream>
#include <queue>
#include <vector>
#include <unordered_map>
using namespace std;
#define topElemetsN 3
typedef string pattern;
typedef int patternCount;
typedef pair<pattern, patternCount> lookUp;
// Using > operator we can achieve min heap
// so we can pop minimum element so that we are left with
// Max n elements
bool compareLookUp(lookUp lhs , lookUp rhs)
{
return lhs.second > rhs.second;
}
void topElmentsPatternLengthK(vector<string> ip , int k)
{
unordered_map<pattern, patternCount> lookUpTable;
for(int i = 0 ; i < ip.size(); ++i)
{
for(int ss = 0 ; ss + k <= ip[i].size() ; ++ss)
{
// if there is not key than key is inserted and count is set as 0
++lookUpTable[ip[i].substr(ss,k)];
}
}
priority_queue<lookUp , vector<lookUp> , decltype(&compareLookUp) > topElements(&compareLookUp);
//Working out topElements
for(auto aPair : lookUpTable)
{
topElements.push(aPair);
//Keeping size not more than topElemetsN
if(topElements.size() > topElemetsN)
{
topElements.pop();
}
}
//Printing pairs
while(!topElements.empty())
{
cout<<topElements.top().first<<" ";
topElements.pop();
}
}
int main()
{
vector<string> strVec( { "xyzabcdef" ,
"zabcd",
"yzabcd",
"abcd"});
topElmentsPatternLengthK(strVec , 3);
return 0;
}
static void Main(string[] args)
{
List<string> seq = new List<string> { "xyzabcdef", "zabcd", "yzabcd", "abcd" };
var result = FindTop3_K_PageSequence(3, seq);
}
public static List<string> FindTop3_K_PageSequence(int k, List<string> seq)
{
List<string> result = new List<string>();
Dictionary<string, int> freq = new Dictionary<string, int>();
foreach (string s in seq)
{
int sLength = s.Length;
for (int i = 0; i < sLength; i++)
{
if (i + k > sLength)
break;
string sSubstr = s.Substring(i, k);
if (!freq.ContainsKey(sSubstr))
freq.Add(sSubstr, 1);
else
freq[sSubstr] += 1;
if (result.Count < 3 && freq[sSubstr] == 1)
result.Add(sSubstr);
else
{
int j;
for (j = 0; j < 3; j++)
if (freq[result[j]] < freq[sSubstr])
break;
if (j < 3)
{
bool found = false;
foreach (string r in result)
if (r.Equals(sSubstr))
{
found = true;
break;
}
if (!found)
{
result.RemoveAt(j);
result.Add(sSubstr);
}
}
}
}
}
return result;
}
static List<string> FindTopNVisitedKPageSequence(List<string> data, int N, int K)
{
string sequence;
Dictionary<string, long> pages = new Dictionary<string, long>();
data.ForEach(d =>
{
if (d.Length >= K)
for (int i = 0; i < d.Length; i++)
if (i + K <= d.Length)
{
sequence = d.Substring(i, K);
if (!pages.ContainsKey(sequence))
pages.Add(sequence, 1);
else
pages[sequence]++;
}
});
List<IGrouping<long, KeyValuePair<string, long>>> groups = (from item in pages orderby item.Value descending select item).GroupBy(i => i.Value).ToList();
List<string> result = new List<string>();
int count = 0;
foreach (IGrouping<long, KeyValuePair<string, long>> item in groups)
{
foreach (KeyValuePair<string, long> pageSequence in item)
{
Console.WriteLine("Top {0} visited page sequence: {1} ({2})", count, pageSequence.Key, pageSequence.Value);
result.Add(pageSequence.Key);
if (++count > (N - 1))
return result;
}
}
return result;
}
cant we do this the map reduce way??emit the 3 patterened navigation known from the file
- Saurabh July 30, 2014x->y->z->a->b->c->d->e->f
would result in
x->y->z
y->z->a
z->a->b etc etc for user 1
simply maintain a hashtable and boom we have the count..dont we? what am I missing here?