Google Interview Question
Software Engineer / DevelopersCountry: United States
Interview Type: Phone Interview
Very neat indeed. But I was wondering what would be the complexity of this approach? Suppose the count of 'words' is huge, then this would have very high complexity. Maybe a TRIE can be implemented ?
Though the code is very neat, the time complexity is unsatisfying.
Besides the whole word list scan, this line can also be costly.:
tempList.remove((Object) z);
It's especially inappropriate when you have a fixed huge word list and variable given characters.
Instead of using an arraylist of letters, use a 26-element array to track the count for each letter. This would make the time complexity of "remove" operation O(1).
package careercup;
public class Scribble {
public static List<String> findDictionaryWords3rd(char[] letters, String[] words) {
List<String> _result = new ArrayList<String>();
final int[] indexedAlphabet = indexAlphabet(letters);
for (String word : words) if (testWord(word, indexedAlphabet)) _result.add(word);
return _result;
}
private static boolean testWord(String word, int[] indexedAlphabet) {
final int[] alphabet = indexedAlphabet.clone();
for (char z : word.toCharArray()) {
if (--alphabet[(z - 'a')] < 0) return false;
}
return true;
}
private static int[] indexAlphabet(char[] letters) {
final int[] result = new int[26];
for (char z : letters) result[z - 'a'] += 1;
return result;
}
}
A little optimized example, but the idea the same.
(1)count the times each character can be used.
(2)check each word's character use, if within limit, the word can be formed.
C++ Code:
#include <string>
#include <vector>
#include <iostream>
using namespace std;
void getWords(vector<string>& res, const vector<char>& vc, const vector<string>& vs)
{
res.clear();
if(vc.empty() || vs.empty()) return;
//get the count of all given characters
int count[128] = {0}, i, size;
for(i = 0, size = vc.size(); i < size; ++i) ++count[vc[i]];
//check each word
int times[128] = {0}, j, len;
for(i = 0, size = vs.size(); i < size; ++i){
const string& word = vs[i];
//check the count of each character
for(j = 0, len = word.size(); j < len; ++j){
if(++times[word[j]] > count[word[j]]) break;
}
//if can form this word
if(j == len) res.push_back(word);
//clear this word's count
for(; j >= 0; --j){
--times[word[j]];
}
}
}
test case:
int main()
{
vector<string> words;
words.push_back("cat");
words.push_back("act");
words.push_back("ac");
words.push_back("stop");
words.push_back("cac");
vector<char> characters;
characters.push_back('a');
characters.push_back('c');
characters.push_back('t');
vector<string> res;
getWords(res, characters, words);
for(int i = 0, size = res.size(); i < size; ++i){
cout << res[i] << "\n";
}
return 0;
}
import java.lang.*;
import java.util.*;
import java.io.*;
public class Main {
public static void main(String args[]){
Scanner inp = new Scanner(System.in);
char dict[] = {'a','c','t'};
String words[] = {"cat","act","ac","stop","cac"};
for(String str:words){
Hashtable<Character,Integer> counter = new Hashtable<Character,Integer>();
for(char c:dict){
if(counter.contains(c)){
counter.put(c, counter.get(c) +1);
}
else{
counter.put(c, 1);
}
}
boolean cannotbe=false;
for(char c:str.toCharArray()){
if(counter.get(c)==null){
cannotbe = true;
break;
}
else{
if(counter.get(c)==0){
cannotbe = true;
break;
}
else{
counter.put(c, counter.get(c)-1);
}
}
}
if(!cannotbe){
System.out.println(str);
}
}
}
}
public static ArrayList<String> getValidWords(String[]sIn, char[] chars ){
//set up a boolean array and init all to false
Boolean[] charList = new Boolean[128];
for (int i = 0; i < charList.length; i++) {
charList[i]= false;
}
//set valid char to true
for(char c: chars){
charList[c]= true;
}
ArrayList<String> sOut = new ArrayList<>();
// Check each char in each word with the valid chars
for(String word: sIn){
Boolean validWord = true;
for(char x: word.toCharArray()){
if(charList[x] == false){
//found InValid char break the inner for loop
validWord = false;
break;
}
}
//if all valid char add the word to output
if(validWord)
sOut.add(word);
}
//System.out.println(sOut);
return sOut;
}
void Test6486723970203648()
{
char sym[] = {'a', 'c' , 't'};
std::set<char> chrs(sym, sym + sizeof(sym));
char* arr[] = {"cat", "act", "ac", "stop" , "cac" };
int result = 0;
for( size_t i = 0; i < _countof(arr); i++)
{
size_t length = ::strlen(arr[ i ]);
if( sizeof(sym) >= length )
{
char tst[255] = {'\0'};
for( size_t j = 0; j < length; j++ )
{
char& chr = arr[ i ][j];
if( chrs.find( chr ) == chrs.end()
|| '\0' != tst[ chr ] ) // word should not use symbol twice
{
length = 0x00;
break;
}
tst[ chr ] = chr;
}
if( 0x00 != length )
{
std::cout << arr[i] << std::endl;
result++;
}
}
}
CPPUNIT_ASSERT_EQUAL_MESSAGE( "successed", 3, result );
}
function scribble($realwords = array(), $characters = array()) {
$output = array();
foreach ($realwords as $realword) {
$l = strlen($realword);
for ($i = 0; $i <= $l-1; $i++) {
if (!in_array($realword{$i}, $characters)) {
break(2);
}
}
$output[] = $realword;
}
return $output;
}
print_r(scribble(array('cat', 'act', 'ac', 'stop', 'cac'), array('a', 'c', 't')));
/*
Array
(
[0] => cat
[1] => act
[2] => ac
)
*/
Yet another solution in Java:
public static List<String> findWords(String[] givenWords, List<Character> givenChars) {
List<String> wrds = new ArrayList<String>();
for(String word : givenWords) {
List<Character> t = new ArrayList<Character>();
t.addAll(givenChars);
if(word.length() > t.size()) {
continue;
}
boolean wordCanBeFormed = true;
for(char c : word.toCharArray()) {
if(t.size() > 0 && t.contains(c)) {
t.remove(new Character(c));
}
else {
wordCanBeFormed = false;
break;
}
}
if(wordCanBeFormed) {
wrds.add(word);
}
}
return wrds;
}
Solution in C :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef char bool;
#define TRUE 1
#define FALSE 0
void swap(char *a, char *b) {
char tmp = *a;
*a = *b;
*b = tmp;
}
bool word_include(char *word, char letter) {
int len = strlen(word);
for (int i = 0; i < len; i++) {
if (word[i] == letter) {
word[i] = '\0';
swap(&word[i], &word[len - 1]);
return TRUE;
}
}
return FALSE;
}
void scribble(char *letter_set, char **words, size_t words_len) {
for (int i = 0; i < words_len; i++) {
char *letter_set_cpy = strdup(letter_set);
char *current_word = words[i];
size_t len = strlen(current_word);
size_t j = 0;
for (j = 0; j < len; j++) {
if (word_include(letter_set_cpy, current_word[j]) == FALSE) {
break;
}
}
if (len == j) {
printf("%s\n", current_word);
}
free(letter_set_cpy);
}
}
public static void getWords(String [] words , char [] chs ){
boolean [] dp = new boolean [256];
boolean flag = true;
for (int i =0 ;i<chs.length;++i){
dp[chs[i]]=true;
}
for (String word : words){
for (int j =0; j<word.length() ;++j){
if (dp[word.charAt(j)]){
dp[word.charAt(j)] = false;
}else{
flag = false ;
break;
}
}
if (flag){
System.out.println(word);
}
//reset the dp
for (int j =0 ;j<word.length() ;++j){
dp[word.charAt(j)] = true;
}
}
}
def scribble(words, letters):
from itertools import ifilter, imap
from itertools import permutations, combinations, chain
# Generate all substrings
sub = (imap(lambda x: "".join(x), combinations(letters, i))
for i in xrange(1, len(letters)+1))
substring_list = chain(*sub)
def legit_words(letters):
""" Return the set of words that can be expressed using 'letters'"""
all_possible_words = imap(lambda x: "".join(x), permutations(set(letters)))
matches = set(ifilter(lambda x: x in words, all_possible_words))
return matches
return reduce(set.union, imap(legit_words, substring_list))
words = set(['cat', 'act', 'ac', 'stop' , 'cac'])
letters ="actactastop"
assert scribble(words, letters) == set(['cat', 'act', 'ac', 'stop'])
What about sorting ?
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
public class SelectWordsOfChars {
public static void main(String[] args){
Collection<String> words = Arrays.asList(new String[]{"cat","tac","caat","dcaat"});
Collection<Character> chars = Arrays.asList(new Character[]{'c','a','t','a'});
System.out.println( getWordsOfChars(words,chars));
}
public static Collection<String> getWordsOfChars(Collection<String> words, Collection<Character> chars){
Set<String> filteredWords = new HashSet<String>();
Character[] sortedChars = chars.toArray(new Character[]{});
Arrays.sort(sortedChars);
for (String currentWord : words) {
char [] sortedCurrentWord = currentWord.toCharArray();
Arrays.sort(sortedCurrentWord);
int iterThroughChars = 0;
for(int iterThroughWord = 0; iterThroughWord < sortedCurrentWord.length && iterThroughChars < sortedChars.length && sortedCurrentWord[iterThroughWord] >= sortedChars[iterThroughChars] ; iterThroughChars ++){
if( sortedCurrentWord[iterThroughWord] == sortedChars[iterThroughChars] ){
iterThroughWord++;
}
}
if( iterThroughChars == sortedChars.length ){
filteredWords.add(currentWord);
}
}
return filteredWords;
}
}
There was a code error.. This version works and is optimized:
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
public class SelectWordsOfChars {
public static void main(String[] args) {
Collection<String> words = Arrays.asList(new String[] { "cat", "act",
"ac", "stop", "cac" });// acud
Collection<Character> chars = Arrays.asList(new Character[] { 'a', 'c',
't' });
System.out.println(getWordsOfChars(words, chars));
}
public static Collection<String> getWordsOfChars(Collection<String> words,
Collection<Character> chars) {
Set<String> filteredWords = new HashSet<String>();
Character[] sortedChars = chars.toArray(new Character[] {});
Arrays.sort(sortedChars);
for (String word : words) {
if (word.length() <= chars.size()) {
char[] sortedWord = word.toCharArray();
Arrays.sort(sortedWord);
int iterWord = 0;
for (int iterThroughChars = 0; iterWord < sortedWord.length
&& iterThroughChars < sortedChars.length
&& sortedWord[iterWord] >= sortedChars[iterThroughChars]; iterThroughChars++) {
if (sortedWord[iterWord] == sortedChars[iterThroughChars]) {
iterWord++;
}
}
if (iterWord == sortedWord.length) {
filteredWords.add(word);
}
}
}
return filteredWords;
}
}
How about a prefix tree?
We can convert a list of words into a prefix tree: each node we'll be a sorted list of letters, and have reference to one or more indexes of a corresponding words:
def build_prefix_tree(words):
tree = PrefixTree()
for i, word in enumerate(words):
tree.insert(value=sorted(word), index=i)
return tree
So each tree node can have multiple indexes:
Prefix tree with "node (indexes)":
acc (4)
/
ac (2) - act (0, 1)
\
psto (4)
In the tree above each word is sorted alphabetically and has index or indexes of the original word position in a `words` array.
Now, we can find all the matches in the prefix tree:
def scribble(chars, words):
chars = ''.join(sorted(chars))
prefix_tree = build_prefix_tree(words)
results
nodes = prefix_tree.root_nodes
while nodes:
node = nodes.pop()
if node == chars[:len(node)]:
results.extend(words[i] for i in node.indexes)
nodes.extend(node.children)
return sorted(results) # For consistent results.
//
// main.cpp
// PossibleWords
//
// Created by Vinod Gupta on 3/12/15.
// Copyright (c) 2015 vinodg. All rights reserved.
//
#include <iostream>
#include <vector>
#include <set>
#include <queue>
#include <sstream>
using namespace std;
string InsertAt(string str,int pos,char c)
{
stringstream ss;
ss<< str.substr(0,pos);
ss<<c;
ss<<str.substr(pos);
return ss.str();
}
vector<string> findWords(const char *letters,int len, set<string> &dict)
{
vector<string> out;
queue<string> qs;
for(int i=0;i<len;++i)
{
char c=letters[i];
size_t qsize = qs.size();
if(!qsize)
{
stringstream ss;
ss<<c;
qs.push(ss.str());
continue;
}
for(int j=0;j<qsize;++j)
{
string top = qs.front();
qs.pop();
for(int k=0;k<=top.length();++k)
{
string perm=InsertAt(top,k,c);
qs.push(perm);
if(dict.find(perm) != dict.end())
{
out.push_back(perm);
}
}
}
}
return out;
}
int main(int argc, const char * argv[]) {
set<string> dict;
dict.insert("cat");
dict.insert("act");
dict.insert("ac");
dict.insert("stop");
dict.insert("cac");
char letters[] = {'c','a','t'};
vector<string> out = findWords(letters, 3, dict);
for(vector<string>::iterator itr = out.begin(); itr!= out.end();++itr)
{
cout<<itr->c_str()<<" ";
}
cout<<endl;
return 0;
}
//
// main.cpp
// PossibleWords
//
// Created by Vinod Gupta on 3/12/15.
// Copyright (c) 2015 vinodg. All rights reserved.
//
#include <iostream>
#include <vector>
#include <set>
#include <queue>
#include <sstream>
using namespace std;
string InsertAt(string str,int pos,char c)
{
stringstream ss;
ss<< str.substr(0,pos);
ss<<c;
ss<<str.substr(pos);
return ss.str();
}
vector<string> findWords(const char *letters,int len, set<string> &dict)
{
vector<string> out;
queue<string> qs;
for(int i=0;i<len;++i)
{
char c=letters[i];
size_t qsize = qs.size();
if(!qsize)
{
stringstream ss;
ss<<c;
qs.push(ss.str());
continue;
}
for(int j=0;j<qsize;++j)
{
string top = qs.front();
qs.pop();
for(int k=0;k<=top.length();++k)
{
string perm=InsertAt(top,k,c);
qs.push(perm);
if(dict.find(perm) != dict.end())
{
out.push_back(perm);
}
}
}
}
return out;
}
int main(int argc, const char * argv[]) {
set<string> dict;
dict.insert("cat");
dict.insert("act");
dict.insert("ac");
dict.insert("stop");
dict.insert("cac");
char letters[] = {'c','a','t'};
vector<string> out = findWords(letters, 3, dict);
for(vector<string>::iterator itr = out.begin(); itr!= out.end();++itr)
{
cout<<itr->c_str()<<" ";
}
cout<<endl;
return 0;
}
If the dictionary contains many words and it is extenssively searched, a tria based solution may be the most efficient solution.
The search function should get a tria node and a set of possible letters ("act"). It should check if there is a sub node for "a", "c" and "t". Suppose we found a sub node for "c", then the sub node should be searched for the ramaining letters "at".
The efficiency of such implementation depends only on the number of letters and does not depend on the number of words in the dictionary.
Here is a sample main:
TriaNode tree;
tree.addWord("cat");
tree.addWord("act");
tree.addWord("ac");
tree.addWord("stop");
tree.addWord("cac");
tree.findScribble((string)"act", (string)"");
And the implementation:
#define NUM_LETTERS 256
class TriaNode
{
public:
TriaNode();
void addWord(string word);
void findScribble(string possibleLetters, string wordSoFar);
protected:
// a pointer to the sub node of each letter
TriaNode *m_pLet[NUM_LETTERS];
// indication if this node terminates an existing word in the dictionary
bool m_isTerminating;
};
void TriaNode::findScribble(string possibleLetters, string wordSoFar)
{
if (m_isTerminating)
{
cout << wordSoFar << endl;
return;
}
for (size_t i=0; i<possibleLetters.size(); i++)
{
char curLetter = possibleLetters[i];
if (m_pLet[curLetter])
{
string remainingLetters = "";
if (i > 0)
remainingLetters = letters.substr(0, i);
if (i < letters.size()-1)
remainingLetters += letters.substr(i+1);
m_pLet[curLetter]->findScribble(remainingLetters, wordSoFar + (char)curLetter);
}
}
}
- Akash Bhartiya December 19, 2013