Google Interview Question
Software DevelopersCountry: United States
1) Approach with HT (not so good)
One approach could be to have the dictionary in a hashtable and then create varions of the original string that miss one character, two characters etc.
if k is numbers of characters to be deleted and n length of the search string, there are n choose k possibilities (= n!/((n-k)! * k!):
1 possibilities to remove 0
n possibilities to remove 1
etc...
as an example, lets assume the input text is 20 characters and all possibilities up to 19 removed characters are tried:
this is ~2^n possibillities: ~1 Mio
but, if a word is expected to be found by removing at most
5 characters, "only" 20 choose 0 + 20 choose 1
+ ... + 20 choose 5 = ~22K possibilities need to be tried
2) Approach with trie (better in practice)
it can be improved by doing a kind of a breath dept search
in a trie. This is essentially equal to the hash-table approach but the number of options will not grow as fast becasue only mutations of the original input are followed that actually are prefexis of a real world. How much better this is should be verified by a real dictionary and examples to collect some statistical information. How ever, it will have an impact due to the redundancy of the language. The approach can be improved by limitting max number of characters that are allowed to be removed.
It works as follows, consider this example dictionary:
-fellow
-hello
-one
-hell
-fhel (a fictionary word)
--
and the searchstring input = "fhellowne"
initialize a min priority queue with the root trie-item, index 0, and priority 0 (note the priority is # of characters deleted):
- this gives a queue (prio:pointer to trie,next index):
0:root-item,0
- while the trie is not empty
- take index, trie-item from the prio queue's top
- if input[index] exists in the trie-item,
get the next node of the trie and store it with the
same priority and index + 1.
- if index == input.length -> terminate
- if input[index+j] match a next character in the trie-item:
- create a branch for all following single letters with
lower priority+j (assuming skiping/deleting characters)
- after first iteration queue now looks something like:
0:f(ellow),1
f(hel)
1:h(ell),2
h(ello)
5:o(ne),6
- at the next step:
0:fh(el),2
1:f(ellow),2
etc...
string findWordWithMinimalDeleting(const string& input, TrieItem* dict)
{
priority_queue<PrioQItem, vector<PrioQItem>, PQItemLess> q;
int n = input.length();
// initialize priority queue
q.push(PrioQItem(0, 0, dict));
while(q.size() > 0)
{
TrieItem* cti = q.top().tItem; // current trie item
int i = q.top().index; // index in input string
int p = q.top().prio; // prio = delete char counter
q.pop(); // remove item
if(i == n) return cti->word(); // if we matched all and it's word
TrieItem* ntim = cti->getNext(input[i]);
if(ntim != nullptr)
{ // next trie itme if match
q.push(PrioQItem(p, i + 1, ntim));
if(ntim->isWord())
{
q.push(PrioQItem(p + n - i - 1, n, ntim));
}
}
while(i < n - 1)
{ // try by deleting up to the end and create branch if
// there is continuation in the trie
TrieItem* ntid = cti->getNext(input[i + 1]);
if(ntid != nullptr)
{
q.push(PrioQItem(p + 1, i + 2, ntid));
}
i++;
p++;
}
}
return string("");
}
I assume the delete "in order" is a hint that tells you
if your search string is "Hellzuo" and you decide to
delete the "z" for your second delete only "u" and "o"
are options...
complete code:
#include <iostream>
#include <string>
#include <unordered_map>
#include <queue>
#include <vector>
using namespace std;
// primitive, unoptimized trie with mem. leak etc.
class TrieItem
{
private:
unordered_map<char, TrieItem*> next_;
string word_ = ""; // simplification, should be only a flag
char letter_;
public:
TrieItem() : TrieItem('\0') { }
TrieItem(char letter) : letter_(letter) { }
void addWord(const string& word) { addWord(word, 0); }
TrieItem* getNext(char letter)
{
auto it = next_.find(letter);
if(it != next_.end()) return it->second;
return nullptr;
}
bool isWord() const { return word_.length() > 0; }
string word() const { return word_; }
char letter() const { return letter_; }
private:
void addWord(const string& word, int index)
{
if(index == word.length())
{
word_ = word.substr(0, index + 1);
}
else
{
auto it = next_.find(word[index]);
TrieItem *item;
if(it != next_.end())
{
item = it->second;
}
else
{
item = new TrieItem(word[index]);
next_[word[index]] = item;
}
item->addWord(word, index + 1);
}
}
};
// priority queue item
struct PrioQItem
{
int prio;
int index;
TrieItem* tItem;
PrioQItem(int p, int i, TrieItem* ti)
: prio(p), index(i), tItem(ti) {}
};
// priority queue less predicate
struct PQItemLess
{
bool operator() (const PrioQItem& a, const PrioQItem& b) const
{
return a.prio > b.prio;
}
};
string findWordWithMinimalDeleting(const string& input, TrieItem* dict)
{
priority_queue<PrioQItem, vector<PrioQItem>, PQItemLess> q;
int n = input.length();
// initialize priority queue
q.push(PrioQItem(0, 0, dict));
while(q.size() > 0)
{
TrieItem* cti = q.top().tItem; // current trie item
int i = q.top().index; // index in input string
int p = q.top().prio; // prio = delete char counter
q.pop(); // remove item
if(i == n) return cti->word(); // if we matched all and it's word
TrieItem* ntim = cti->getNext(input[i]);
if(ntim != nullptr)
{ // next trie itme if match
q.push(PrioQItem(p, i + 1, ntim));
if(ntim->isWord())
{
q.push(PrioQItem(p + n - i - 1, n, ntim));
}
}
while(i < n - 1)
{ // try by deleting up to the end and create branch if
// there is continuation in the trie
TrieItem* ntid = cti->getNext(input[i + 1]);
if(ntid != nullptr)
{
q.push(PrioQItem(p + 1, i + 2, ntid));
}
i++;
p++;
}
}
return string("");
}
int main()
{
TrieItem trie;
trie.addWord("fellow");
trie.addWord("hello");
trie.addWord("one");
trie.addWord("hell");
trie.addWord("fhel");
cout << "the word is \"" << findWordWithMinimalDeleting("fhellowne", &trie) << "\"" << endl;
}
It sounds like you can treat this like a graph problem and use BFS; you're guaranteed to find the minimum number of characters to remove that way. But I don't get what remove in order means. What does it matter if you remove the characters in order or not?
Hellowz can become Hello if you remove either the w or the z first; the two approaches are completely equivalent.
/*
One way, as Chris said,
for n chars, 2^n for sure,
but then sort these no.s such that
the comparison is how many 1's the binary string has.
Now, iterate and find the binary string with 1 means deletion of char
where the new word after deletion is into dictionary
*/
def find_word( word , dictionary ){
n = #|word|
l = list([1: 2 ** n ]) -> { s = str($.o, 2) ; '0' ** ( n - #|s|) + s }
sorta( l ) :: {
ls = sum( $.left.value ) -> { $.o == _'1' ? 1:0 }
rs = sum( $.right.value ) -> { $.o == _'1' ? 1:0 }
ls - rs
}
fold( l , null ) :: {
s = select ( $.o.value ) :: { $.o == _'0' } -> { word[$.i] }
w = str(s,'')
break ( w @ dictionary ){ w }
}
}
dictionary = set ( 'fellow' , 'hello' , 'one' , 'hell' , 'fhel' )
word = 'fhellowne'
println ( find_word( word, dictionary ) )
//Time Complexity: O(NL) where N is the number of words in the dictionary and L is the length of the longest word. Space: O(NL)
private static class Node{
private Node[] child;
boolean isWord;
private Node(){
child = new Node[26];
isWord = false;
}
}
public static List<String> words(String str, String[] dict){
if(str == null || str.length() == 0 || dict == null || dict.length == 0){
throw new IllegalArgumentException();
}
Set<String> result = new HashSet<String>();
Node rt = new Node();
makeTree(dict,rt);
search(0,str,new List<Character>(),rt,results);
}
private static void search(int i, String str, List<Character> wrd, Node rt,Set<String> result){
if(i == str.length()){
if(rt.isWord){
result.add(buildString(wrd));
}
return;
}
int curr = (int)str.charAt(i);
for(int j = i; j < str.length(); j++){
if(rt.child[curr] != null){
wrd.add(str.charAt(i));
search(i + 1,str,wrd,rt.child[curr],result);
wrd.remove(str.charAt(i));
}
}
}
private static void makeTree(String[] dict, Node rt){
for(String s: dict){
addWrd(s,rt);
}
}
private static void addWrd(String str, Node rt){
Node v = rt;
for(int i = 0; i < str.length(); i++){
int idx = (int)str.charAt(i) - 'a';
if(v.child[idx] == null){
v.child[idx] = new Node();
}
v = v.child[idx];
}
v.isWord = true;
}
public class ReducedWordInDictionary {
private static TreeSet<String> dictionary = new TreeSet<String>(LengthComparator.INSTANCE);
static {
dictionary.add("a");
dictionary.add("an");
dictionary.add("be");
dictionary.add("cat");
dictionary.add("car");
dictionary.add("dear");
dictionary.add("down");
dictionary.add("fire");
dictionary.add("hire");
dictionary.add("product");
dictionary.add("row");
dictionary.add("report");
}
public static void main(String[] args) {
System.out.println(find(null, dictionary));
System.out.println(find("", dictionary));
System.out.println(find("a", dictionary));
System.out.println(find("b", dictionary));
System.out.println(find("catalysis", dictionary));
System.out.println(find("preowned", dictionary));
System.out.println(find("preregistered-owned", dictionary));
System.out.println(find("product", dictionary));
System.out.println(find("fibre", dictionary));
}
private static String find(String word, TreeSet<String> dictionary) {
if (word == null || word.isEmpty()) return null;
// otherwise
int length = word.length();
// otherwise
Iterator<String> dit = dictionary.descendingIterator();
boolean isCandidate = false;
while(dit.hasNext()) {
String s = dit.next();
if (!isCandidate && s.length() > length) continue;
// otherwise
isCandidate = true;
// otherwise
if (s == null || s.isEmpty()) break;
// otherwise
if (match(s, word)) return s;
}
return null;
}
private static boolean match(String word, String originalWord) {
int currentIndex = 0;
for (int i = 0; i < word.length(); i++) {
char c1 = word.charAt(i);
int j = currentIndex;
for (; j < originalWord.length(); j++) {
char c2 = originalWord.charAt(j);
if (c1 == c2) {
currentIndex = j+1;
break;
}
// otherwise, continue
}
if (j == originalWord.length()) return false;
}
return true;
}
private static class LengthComparator implements Comparator<String> {
private static Comparator<String> INSTANCE = new LengthComparator();
@Override
public int compare(String s1, String s2) {
if (s1 == null) return s2 == null? 0: -1;
if (s2 == null) return 1;
int l1 = s1.length();
int l2 = s2.length();
if (l1 == l2) return s1.compareTo(s2);
return l1 > l2? 1: -1;
}
}
}
This solution is `O(M(n+k))` where `M` is the length of the dictionary, `n` is the average length of a word in the dictionary (in English language that would be 5.1) and `k` is the length of the input string. More efficient irl, as the length of `k` grows.
/**
* Given a string and dictionary of words, form a word by removing
* minimum number of characters. Characters can be removed `in-order`
* only.
*/
module.exports = function (dictionary, S) {
var map = {}
for (var i = 0; i < dictionary.length; ++i) {
var l = dictionary[i].length
map[l] = map[l] || []
map[l].push(dictionary[i])
}
l = S.length
while (l > 0) {
var spelled = []
var offset = S.length - l
if (map[l]) {
var words = map[l]
for (i = 0; i < words.length; ++i) {
var bool = true
var chars = {}
for (j = 0 + offset; j < S.length; ++j) {
chars[S.charAt(j)] = ++chars[S.charAt(j)] || 1
}
for (var j = 0; j < words[i].length; ++j) {
var word = words[i]
var c = word.charAt(j)
if (chars[c]) {
chars[c]--
} else {
bool = false
break
}
}
if (bool) {
spelled.push(words[i])
}
}
}
--l
if (spelled.length) {
l = 0
}
}
return spelled.length ? spelled : -1
}
var dictionary = [
'bit',
'tab',
'cat',
'dog',
'rabbit',
'hat',
'car',
'harm',
'giraffe'
]
var Z = module.exports(dictionary, 'abbitcar')
console.log(Z) // => 'car'
Z = module.exports(dictionary, 'dtbarbi')
console.log(Z) // => 'rabbit'
Z = module.exports(dictionary, 'farigfe')
console.log(Z) // => 'giraffe'
Z = module.exports(dictionary, 'ablonol')
console.log(Z) // => -1
Solution to what ChisK mentioned earlier. Build Trie for dictionary of words and than iterate input string for each character. There are 2 possibilities
A. current char was found in Trie.
B. current char is not found
public class FindWordMinDeletionMatchingDictionary {
public static void main(String args[]) {
Trie trie = new Trie();
List<String> dictionary = Arrays.asList("fellow", "hello", "whatsapp", "zukam");
String inputstring = "zwhatufellkamow";
for(String dic: dictionary) {
trie.insert(dic, trie.root);
}
List<Integer> delpos = new ArrayList<Integer>();
List<Integer> result = lookup(inputstring, trie.root, 0, delpos);
System.out.println(result.size());
for(Integer pos: result) {
System.out.println(inputstring.charAt(pos));
}
}
private static List<Integer> lookup(String inputstring, TrieNode parentNode, int pos, List<Integer> delpos) {
if(inputstring.length() == pos) return delpos;
TrieNode childNode = parentNode.children.get(inputstring.charAt(pos));
if(childNode != null) {
List<Integer> resultA = lookup(inputstring, childNode, pos + 1, delpos);
List<Integer> temp = new ArrayList<>(delpos);
temp.add(pos);
List<Integer> resultB = lookup(inputstring, parentNode, pos + 1, temp);
return resultA.size() < resultB.size() ? resultA : resultB;
} else {
List<Integer> temp = new ArrayList<>(delpos);
temp.add(pos);
List<Integer> resultB = lookup(inputstring, parentNode, pos + 1, temp);
return resultB;
}
}
}
apple
dimple
triple
staple
given string atdjfpstjpwietle..
word, index position, string length of the words.. now start taking characters from string match the char with word @ index position. after first character we increment the index position.
apple 0 5
dimple 0 6
triple 0 6
staple 0 6
a-tdjfpstjpwietle
apple 1 5
dimple 0 6
triple 0 6
staple 0 6
at-djfpstjpwietle
apple 1 5
dimple 0 6
triple 1 6
staple 0 6
atd-jfpstjpwietle
apple 1 5
dimple 1 6
triple 1 6
staple 0 6
atdjf-pstjpwietle
apple 1 5
dimple 1 6
triple 1 6
staple 0 6
atdjfp-stjpwietle
apple 2 5
dimple 1 6
triple 1 6
staple 0 6
atdjfpst-jpwietle
apple 2 5
dimple 1 6
triple 1 6
staple 2 6
atdjfpstjp-wietle
apple 3 5
dimple 1 6
triple 1 6
staple 2 6
atdjfpstjp-wietle
apple 3 5
dimple 1 6
triple 1 6
staple 2 6
atdjfpstjpwietle
apple 5 5
dimple 1 6
triple 1 6
staple 2 6
apple
dimple
triple
staple
given string atdjfpstjpwietle..
word, index position, string length of the words.. now start taking characters from string match the char with word @ index position. after first character we increment the index position.
apple 0 5
dimple 0 6
triple 0 6
staple 0 6
a-tdjfpstjpwietle
apple 1 5
dimple 0 6
triple 0 6
staple 0 6
at-djfpstjpwietle
apple 1 5
dimple 0 6
triple 1 6
staple 0 6
atd-jfpstjpwietle
apple 1 5
dimple 1 6
triple 1 6
staple 0 6
atdjf-pstjpwietle
apple 1 5
dimple 1 6
triple 1 6
staple 0 6
atdjfp-stjpwietle
apple 2 5
dimple 1 6
triple 1 6
staple 0 6
atdjfpst-jpwietle
apple 2 5
dimple 1 6
triple 1 6
staple 2 6
atdjfpstjp-wietle
apple 3 5
dimple 1 6
triple 1 6
staple 2 6
atdjfpstjp-wietle
apple 3 5
dimple 1 6
triple 1 6
staple 2 6
atdjfpstjpwietle
apple 5 5
dimple 1 6
triple 1 6
staple 2 6
import java.util.*;
public class DictionaryMapping {
static int max = Integer.MIN_VALUE;
static String s = "abbeq";
static Map<String, Boolean> map = new HashMap<String, Boolean>();
static Set<String> set = new HashSet<String>();
public static void main(String[] args) {
set.add("ab");
set.add("abde");
set.add("abbe");
call(0, "");
System.out.println(s.length() - max);
}
private static void call(int m, String str) {
if (map.get(m + "_" + str) != null) {
return;
}
System.out.println(m + " " + str);
if (m >= s.length()) {
return;
}
for (int i = m; i < s.length(); i++) {
if (set.contains(str + s.charAt(i)) && max < (str + s.charAt(i)).length()) {
max = (str + s.charAt(i)).length();
}
map.put(m + "_" + str + s.charAt(i), set.contains(str + s.charAt(i)));
call(i + 1, str + s.charAt(i));
}
}
}
Complexity=len(inputString)*len(dict)*len(wordsinDict) ==> n*m*k O(nmk)
import java.util.HashSet;
import java.util.Set;
import commons.Utils;
public class DictionarymappingDynamic {
static Set<String> dict = new HashSet<String>();
static {
dict.add("ab");
dict.add("abde");
dict.add("abbe");
}
static int getMinimum(String s) {
if (s == null || s.isEmpty()) {
return -1;
}
int max = Integer.MIN_VALUE;
for (String a : dict) {
if (s.length() < a.length()) {
continue;
}
int[][] arr = new int[a.length() + 1][s.length() + 1];
for (int i = 1; i <= a.length(); i++) {
for (int j = 1; j <= s.length(); j++) {
if (a.charAt(i - 1) == s.charAt(j - 1)) {
arr[i][j] = Math.max(arr[i - 1][j - 1] + 1, arr[i - 1][j]);
} else {
arr[i][j] = Math.max(arr[i][j - 1], arr[i - 1][j]);
}
}
}
if (max < arr[a.length()][s.length()]) {
max = arr[a.length()][s.length()];
}
}
return s.length() - max<0?-1:s.length() - max<0;
}
public static void main(String[] args) {
// example string
System.out.println(getMinimum("abbefgj"));
System.out.println(getMinimum(""));
System.out.println(getMinimum("ab"));
System.out.println(getMinimum("a"));
System.out.println(getMinimum("pqrst"));
}
}
Output:
3
-1
0
-1
5
1. build a Trie from words of dictionary
2. test if string is present in Trie (without removing any chars from string). if so return string.
3. if not traverse string from left to right removing one char at a time and testing if resulting substring is present in Trie, repeat this loop removing from end to start (reverse order).
4. If still not found for 1 char, now increase number of chars to 2, and keep removing 2 at a time, and so on checking if word is present in Trie, do the same from end to start (reverse order).
5. keep increasing the number of chars to removed doing this until there's no chars left in String.
What does removing in-order mean ? Does it mean all characters before current character to be removed in the string should be removed as well ?
- turukmakto1990 December 14, 2016