Facebook Interview Question
Software Engineer / DevelopersCountry: United States
Interview Type: Phone Interview
Solution done in golang. Converts each word to an array of character counts in one pass. O(n).
This array is the key, the word list is the value.
So O(n+m) where n is the word length and m is the number of words. Also uses stores a 256 length byte array for each anagram (regardless of length of each word and frequency of each anagram)
Sorting at best is O(n log n) so be careful of sorting based solutions. Also since strings are usually immutable across most languages be careful of solutions that use string split and string append as they add (and hide) complexity.
package main
import (
"fmt"
)
func main() {
//can be done with a list rather than array to improve performance
solution := make(map[string][]string)
words := []string{"car", "arc", "rat", "tar", "st"}
for _, word := range words {
l := string(countMap(word)) //string is just byte array under covers in golang O(1)
if solution[l] == nil {
solution[l] = make([]string, 1)
}
solution[l] = append(solution[l], word)
}
fmt.Println(solution)
}
func countMap(word string) []byte {
m := make([]byte, 256)
for _, c := range word {
if m[c] == 0 {
m[c] = 1
} else {
m[c] = m[c] + 1
}
}
return m
}
Why sort? Just add the ASCII values, and you have a premitive hash. Use that as your key, and value is a list of words with the same sum
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PermutationFinder {
//Assumption: Only lowercase characters are present in a string.
// we can handle uppercase similarly. Instead of 26 unique characters then we
// can have 52 unique characters.
private final Logger logger = LoggerFactory.getLogger(PermutationFinder.class);
private static class MyString {
private final String str;
private final int hashCode;
public MyString(String str) {
this.str = str;
//Optimization. Computing hashCode once.
long hash = 1;
for(char c:this.str.toCharArray()) {
hash = hash * getPrime(c);
}
hashCode = (int)hash%Integer.MAX_VALUE;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object obj) {
if(obj != null && obj instanceof MyString) {
MyString sobj = (MyString)obj;
if(sobj.str.length() == str.length()) {
//It doesnt matter even if there are million characters as there are just 26 unique characters.
//So maximum number of keys can be 26(x2 for uppercase and lowercase)
Map<Character,Integer> mapChars1 = new HashMap<Character,Integer>(sobj.str.length());
for(Character c: sobj.str.toCharArray()) {
Integer count = mapChars1.get(c);
if(count == null) {
mapChars1.put(c, new Integer(1));
} else {
mapChars1.put(c, count + 1);
}
}
Map<Character,Integer> mapChars2 = new HashMap<Character,Integer>(str.length());
for(Character c: str.toCharArray()) {
Integer count = mapChars2.get(c);
if(count == null) {
mapChars2.put(c, new Integer(1));
} else {
mapChars2.put(c, count + 1);
}
}
//Compare the two maps
return isEqual(mapChars1, mapChars2);
}
}
return false;
}
private static boolean isEqual(Map<Character,Integer> map1, Map<Character,Integer> map2) {
if(map1.size() == map2.size()) {
for(Entry<Character,Integer> entry1:map1.entrySet()) {
Integer value2 = map2.get(entry1.getKey());
if(value2 ==null || !value2.equals(entry1.getValue())) {
return false;
}
}
return true;
}
return false;
}
//getPrime(char c) will return corresponding prime number of a char(first 26 prime numbers)
private static final int[] primes = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101};
private static final int getPrime(Character c) {
return primes[c-'a'];
}
}
public static List<List<String>> getPermutations(List<String> input) {
Map<MyString,List<String>> map = new HashMap<MyString,List<String>>();
for(String inputStr:input) {
MyString key = new MyString(inputStr);
List<String> value = map.get(key);
if(value == null) {
value = new LinkedList<String>();
map.put(key,value);
}
value.add(inputStr);
}
List<List<String>> output = new LinkedList<List<String>>();
for(List<String> val: map.values()) {
output.add(val);
}
return output;
}
public static void main(String[] args) {
List<String> input = new LinkedList<String>();
input.add("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzztzztzz");
input.add("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzztt");
input.add("star");
input.add("david");
input.add("jean");
input.add("sumo");
input.add("brian");
input.add("rats");
input.add("jane");
input.add("tars");
input.add("brain");
input.add("jaen");
List<List<String>> output = PermutationFinder.getPermutations(input);
System.out.println("Output is:"+output);
}
}
public ArrayList<ArrayList<String>> findAnagrams(String[] arr){
Hashtable<String , ArrayList<String>> ht = new Hashtable<String , ArrayList<String>>();
for(String str : arr){
String de_str= decode(str);
if(ht.containsKey(de_str )){
ht.get(de_str).add(str);
}
else{
ArrayList<String> tem = new ArrayList<String>();
tem.add(str);
ht.put(de_str,tem);
}
}
ArrayList<ArrayList<String>> res = new ArrayList<ArrayList<String>>(ht.values());
}
public String decode(String str){
int[] arr = new int[26];
for(int i = 0 ; i < str.length() ; i++){
arr[str.charAt(i)-'a'+1]++;
}
StringBuffer sb = new StringBuffer();
for(int k : arr){
sb.append(k);
sb.append(',');
}
return sb.toString();
}
Using scala, the solution is pretty simple. In this first version, I have not optimized anything:
val strs = List("star", "astr", "car", "rac", "st")
def anagram(s1:String, s2:String): Boolean = {
s1.toList.sortWith(_ < _) == s2.toList.sortWith(_ < _)
}
type slist = List[String]
type result = List[slist]
def search(f:String, r:slist, unmatched:slist, sofar:result):result = {
val used = ((f::r) filter {a => anagram(f,a)})
val um = (r diff used) ::: unmatched
gA(um, Nil, used :: sofar)
}
def gA(ma:slist, unmatched:slist, sofar:result):result = {
ma match {
case h::t => search(h, t, unmatched, sofar)
case List() => unmatched match {
case h::t => search(h, t, Nil, sofar)
case _ => sofar
}
}
}
Execute as follows with the included result:
scala> gA(strs, Nil, Nil)
res0: result = List(List(st), List(car, rac), List(star, astr))
scala>
A trivial optimization is to create a map from string to sorted list
Generate the first 26 prime numbers and use it to calculate a product for each anagram, these anagram product can be saved in a hash set. The mapping is like a-3, b-5, c-7...
To group words, we can use a Map<int, List<HashSet>> data structure. All words with the same length will be saved in a same list of hash set, anagrams will be saved in the same hash set.
GroupAnagrams(String[] s){
if( s== null || s.size() == 0)
return;
int[] prime = GenerateNPrimeNumbers(26);
Map<int, List<HashSet<String>> map = new HashMap<int, LinkedList<HashSet<String>>();
HashSet<String, int> product = new HashSet<String, int>();
for(String str: s){
int key = PrimeProduct(s);
List l = map.get(str.length);
if(l == null){
l = new LinkedList<HashSet<String>>();
AddNewString(l, s);
product.add(key);
}
for(HashSet h: l){
if(key == product.get(h.iterator().next())){
h.add(s);
}else{
AddNewHash(l, s);
product.add(key);
}
}
}
reportResult(map);
}
AddNewHash(List l, String s){
HashSet<String> hh = new HashSet<String>();
hh.add(s);
l.add(hh);
}
Use prime numbers to calculate a hash value for each word, anagrams will have the same hash value, save the results in a Map<int, HashSet<String>>. This is easier than the version I posted earlier.
GroupAnagrams(String[] s){
if( s== null || s.size() == 0)
return;
int[] prime = GenerateNPrimeNumbers(26);
Map<int, HashSet<String>> map = new HashMap<int, LinkedList<HashSet<String>>();
for(String str: s){
int key = PrimeProduct(s);
HashSet<String> h = map.get(key);
if(h != null){
h.add(s);
}else{
HashSet<String> hh = new HashSet<String>();
hh.add(s);
map.add(key, hh);
}
reportResult(map);
}
// implement this
ReportResult(Map<int, HashSet<String>> map){
print("[");
for(HashSet h: map.values()){
print("[");
Iterator ite = h.iterator();
print("\"" + ite.next() + "\"");
while(ite.hasNext()){
print(", ");
print("\"" + ite.next() + "\"");
}
print("]");
}
print("]");
}
PrimeProduct(String s, int[] prime){
int diff = 97;
int product = 1;
for(int i= 1; i< s.length(); i++){
product *= prime[(int)s.charAt[i] - 97]
}
return product;
}
Wow, someone down voted this answer without the explanation. I'm wondering what is wrong with this solution. :D
Your solution requires a sort O(nlogn) for each item in the input. A faster solution would be to implement a frequency histogram O(n) as is shown in this ruby code:
mylist = ["car","rac","star","astr","st"]
#generate a frequency histogram for a string
def frequency(string)
p = Hash.new(0)
chars = string.split(//)
chars.each{ |v| p[v] += 1 }
p
end
def anagrams(list)
map = Hash.new(false)
list.each do |e|
histogram = frequency(e)
#check if the histogram of your string is present in the hash map
if map[histogram] == false
map.merge!(histogram => e)
#otherwise add the string to your list of anagrams for that histogram
else
anagrams = [map[histogram]]
anagrams << e
map[histogram] = anagrams
end
end
return map
end
anagrams(mylist).values.each do |e|
puts e
puts '-'
end
A very easy problem in Python.
- nilkn January 16, 2014