Amazon Interview Question
SDE-2sCountry: United States
Interview Type: In-Person
std::vector<std::vector<std::string>>
groupByAnagrams(const std::vector<std::string>& input)
{
std::unordered_map<std::string, std::vector<std::string> > repToChildren;
for(const auto & str_raw : input){
std::string str(str_raw.size() );
std::partial_sort_copy(str_raw.begin(), str_raw.end(), str.begin() );
repToChildren[str].push_back(str_raw);
}
std::vector<std::vector<std::string> > result;
for(auto it = repToChildren.begin(); it != repToChildren.end(); ++it){
const auto& children_raw = it->second;
if(children_raw.size() > 1){
std::vector<std::string> children(children_raw);
std::partial_sort_copy(children_raw.begin(), children_raw.end(), children.begin());
result.push_back(std::move(children));
}
}
using vecOfStr = std::vector<std::string>
auto lambda
= [](const vecOfStr&a, const vecOfStr&b)->bool
{
return lexicographical_compare(a.begin(), a.end(), b.begin(), b.end() );
};
std::sort(result.begin(), result.end(), lambda);
return result;
}
public List<List<String>> groupAnagrams(String[] strs) {
List<List<String>> resultList = new ArrayList<>();
Map<String, List<String>> map = new HashMap<>();
for (String str : strs) {
char[] chArray = str.toCharArray();
Arrays.sort(chArray);
String tempString = String.valueOf(chArray);
if (!map.containsKey(tempString)) {
List<String> newList = new ArrayList<>();
map.put(tempString, newList);
}
map.get(tempString).add(str);
}
for(String key:map.keySet()){
resultList.add(map.get(key));
}
return resultList;
}
public List<List<String>> groupAnagrams(String[] strs) {
List<List<String>> resultList = new ArrayList<>();
Map<String, List<String>> map = new HashMap<>();
for (String str : strs) {
char[] chArray = str.toCharArray();
Arrays.sort(chArray);
String tempString = String.valueOf(chArray);
if (!map.containsKey(tempString)) {
List<String> newList = new ArrayList<>();
map.put(tempString, newList);
}
map.get(tempString).add(str);
}
for(String key:map.keySet()){
resultList.add(map.get(key));
}
return resultList;
}
Javascript solutions.
let set = ["cat", "tac", "act", "xyt", "tea", "eat"];
let ns = {}
//brute force
function subSet() {
for(let i =0; i < set.length; i++) {
let sub = []
sub.push(set[i]);
let a = sort(set[i]);
for( let j =0; j < set.length; j++) {
let b = sort(set[j]);
if(( set[i] !== set[j] && a === b )) {
sub.push(set[j]);
}
}
ns[a]= sub;;
}
}
function sort(s) {
var str = s.split("").sort().join("");;
return str;
}
//subSet();
// optimized (O(n))
let map = {};
for(let i =0; i < set.length; i++) {
var s = sort(set[i]);
if(!map[s]) {
map[s] = [set[i]];
} else {
map[s].push(set[i]);
}
}
var s = Object.keys(map).map(data => map[data])
console.log(s)
How about this C++ implementation? (Based on azambhrgn May 12, 2017 algorithm.)
#include <iostream>
#include <algorithm>
#include <string>
#include <vector>
#include <sstream>
#include <map>
using namespace std;
class AnagramDict {
multimap<int,int> hashes;
vector<string> anagrams;
const int primeHashNumber = 31;
int hashCode(string& s)
{
int h = 1;
for (auto& ch : s)
{
h *= primeHashNumber;
h += ch;
}
return h;
}
public:
AnagramDict(vector<string> strings)
{
int i = 0;
for (auto& s : strings)
{
string sorted(s);
partial_sort_copy(s.begin(), s.end(), sorted.begin(), sorted.end());
hashes.emplace(hashCode(sorted), i);
anagrams.push_back(s);
++i;
}
}
void show()
{
int newKey = -1;
for (auto& h: hashes)
{
if (h.first != newKey)
{
cout << endl;
newKey = h.first;
}
cout << anagrams[h.second] << " ";
}
}
};
int main()
{
AnagramDict anagrams = { { "act", "dog", "god", "cat", "tac", "fgo", "ogf" } };
anagrams.show();
return 0;
}
Take a hans multi set of strings, (use hashfuncion which supress the characters positions, equal uses the sort string matching.
iterate over the input string again, find the equal range if greater than 1, then these are all anagrams, print, and erase the entries from the hash_set
class equal_fun {
public:
bool operator()(const string &a, const string &b) const{
string s1(a); string s2(b);
sort(s1.begin(), s1.end());
sort(s2.begin(), s2.end());
if(!s1.compare(s2)) {
cout <<a << " equals " << b <<"\n";
return true;
}
return false;
}
};
class hash_fun {
public:
size_t operator()(const string &a) const{
size_t asum = a[0];
for(int i = 1; i <a.length(); i++) {
asum ^= a[i];
}
return asum;
}
};
void anagram_set() {
vector<string> input = {"dog", "ogd", "kak", "akk", "abg", "gdo", "alo"};
unordered_multiset<string, hash_fun, equal_fun> hms;
typedef unordered_multiset<string, std::hash<string>, equal_fun>::iterator hit;
for(int i = 0; i < input.size(); i++) {
hms.insert(input[i]);
}
for(hit h = hms.begin(); h != hms.end(); ++h) {
cout <<"Added " << *h <<"\t";
}
cout <<"\n";
for(int i = 0; i < input.size(); i++) {
pair<hit, hit> phit = hms.equal_range(input[i]);
if (phit.first != hms.end() && phit.first != phit.second) {
hit tempiter = phit.first;
while(tempiter != phit.second) {
cout <<"Anagram " << *tempiter <<"\t";
++tempiter;
}
}
cout << "\n";
hms.erase(phit.first, phit.second);
}
}
private int HashFunction(string str)
{
var chars = str.ToCharArray();
int result = 0;
foreach (var chr in chars)
result += chr.GetHashCode();
return result;
}
private Hashtable HashCol(String[] col)
{
Hashtable table = new Hashtable();
foreach (var str in col)
{
int key = HashFunction(str);
if (!table.ContainsKey(key))
{
table.Add(key, new List<string> { str });
}
else
{
(table[key] as List<string>).Add(str);
}
}
return table;
}
private String[] GetAnagrams(string str, Hashtable table)
{
int key = HashFunction(str);
return (table[key] as List<string>).ToArray();
}
public static int main(string arg)
{
Hashtable table = HashCol(new String[] { "cat", "dog", "ogd", "act", "tca", "ofg" });
String[] strs = GetAnagrams("dog", table);
}
The simple implementation of the above program is to use an unordered set in c++ and first check if the length of the two subset strings from the array has the same length or not if no then return false. If yes, then push all the characters of the string to the set and then compare each character from the other string and if there is any character which does not match with the character in the set return false, else return true.
Implementation:
#include <bits/stdc++.h>
using namespace std;
bool areAnagram(string str1, string str2){
unordered_set<char> s;
int i;
int n = str1.length();
int m = str2.length();
if (n != m)
return false;
for (i = 0; i < n; i++)
s.insert(str1[i]);
for(i = 0; i < m; i++){
if(s.find(str2[i]) == s.end())
return false;
}
return true;
}
void findAllAnagrams(string arr[], int n)
{
for (int i = 0; i < n; i++)
for (int j = i+1; j < n; j++)
if (areAnagram(arr[i], arr[j]))
cout << arr[i] << " is anagram of " << arr[j] << endl;
}
int main()
{
string arr[] = {"geeksquiz", "geeksforgeeks", "abcd",
"forgeeksgeeks", "zuiqkeegs"};
int n = sizeof(arr)/sizeof(arr[0]);
findAllAnagrams(arr, n);
return 0;
}
If i understand correctly -
String[] str = {"cat","dog","ogd","act","tca","ofg"}; is given
and the o/p -
dog ogd
cat act tca
ofg
There could be two ways to solve this question-
1- Create an auxilary array of same strings.
2- Sort the words inside the auxaliry array, Hence in our example - auxilary_array after sorting will be like {"act","dog","dog","act","act","fgo"}
3- create a hashmap with <string,Arraylist>, and put auxilary_array values as key and i/p array value as values.
Hence new result hashmap will be like -
act -> cat,act,tca
dog -> dog,ogd
fgo- > ofg
print all the values from hashmap
Problem with this approach are-
1- Extra space for the auxilary array
2- Time complxety will be n*klogk , where n is size of array of string and k is the maximum character word in array
Not a good approach
Second approach -
Why not we create a hashmap of the each words and then compare and add to the map for final result, But the problem is that what will you take as key of map.
words ? Not good , act and cat will be different keys
ASCII addition/multiplication of character in words ? There might be possible cases when you can get same values for different words. Not good
So if we write our own hasvalue function that will provide always unique values for words, but same for the anagrams , Then our problem can be solved.
Here is the way to write such type of hashfunction -
1- Create an array of size equals to unique character in i/p string, and assign a prime number for all chars.
We use english alphabates then there will be 26 char.
public static int[] hash = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101}; // 26 prime numbers
'a' will be mapped with first element of the array and 'z' with last. No uppercase
Below is working code-
We need extra space for creation of hash array , But the time complexity will be reduced to O(n*k) ~= O(n)
- azambhrgn May 12, 2017