Amazon Interview Question
SDE-2sTeam: Bangalore
Country: India
Interview Type: In-Person
dictionary = ["hi","winter"]
def insertSpace(givenString):
value = False
end = 0
last = len(givenString)
while (value is False) and (end < last):
end += 1
newString = givenString[:end]
if newString in dictionary:
value = insertSpace(givenString[end:])
elif newString == "":
value = []
if (value is False):
if last <= 0:
return []
return False
else:
fl = []
fl.extend(value)
if newString == "":
return fl
fl.insert(0,newString)
return fl
inputString = raw_input("Enter:")
l = insertSpace(inputString)
if l is not False:
finalString = ""
for i in range(len(l)): finalString += " "+l[i]
print finalString[1:]
else:
print l
a recursive solution.....
Easy, considering dictionary is implemented using TRIE.
keep a string word Initialized with "" (empty string)
Keep a string sentence initialized with "" (empty string)
String input, is the input sentence without any spaces.
scan input from left to right.
for i=0 to i = input.length
{
//Add alphabets to word.
word += input[i]
//check if word forms any word in dictionary?
// If yes add word to sentence followed by a space.
if(dictionary.isWord(word))
sentence += word + " ";
// If word forms prefix of any word in dictionary then continue;
else if(dictionary.isPrefix(word))
continue;
else
return invalid sentence.
}
cout<<sentence;
In trie you have for every node 26 pointers that account for the 26 different alphabets. Instead use a ternary search tree which has only three pointers for a particluar node. One if the node value is less than the current value . The middle one if its value is equal to the current value and the third one if its value is more than the current value. In every node you have 4 spaces . One for the three pointers that i mentioned above and one that is 0 or 1 to denote whether we have reached the end of word (a valid one) or not.
a. So put this sentence in a ternary search tree and wherever we find a 1 that is a valid one or the end of a word is reached stop there and print it.
b. In this way continue down until you get 1 in any of the node.
c. But if you found that you have reached the end of a word and the value of the last node is 0 then return false as there is not a valid word else return true
Suppose there is a word called dog in the dictionary and and a substring called dea it will return 1 according to your tree but that not a valid word
#include <iostream>
#include <stdio.h>
#include <string>
#include <string.h>
using namespace std;
string d[]{"they","are","here","and","there","is","no","turning","back"};
int hit(char str[])
{
for(int i=0;i<9;i++)
{
if(!strcmp((d[i].c_str()),(str)))//found
return 1;
}
return 0;
}
int spaces[256];
int x=0;
int spacify(char arr[])
{
char *p,*s;
p=arr;
s=NULL;
while(p!=NULL)
{
char temp[50];
int k=0;
while(*p!='\0')
{
temp[k++]=*p;
temp[k]='\0';
if(hit(temp))
{
std::cout<<"found \""<<temp<<"\" in dictionary"<<"\n";
s=p;
std::cout<<"s points to "<<*s<<"\n";
}
p++;
}
if(s==NULL)
{
return 0;
}
p=s;
s=NULL;
k=0;
spaces[x++]=(p-arr)+1;
std::cout<<"insert space at "<<(p-arr)+1<<"\n";
p++;
//std::cout<<"p points to "<<*p<<"\n";
}
return 1;
}
int main()
{
//char a[]={'t','h','e','y','a','r','e','h','e','r','e'};
int j=0,z=0;
char *a="theyarehereandtheresnoturningback";
int b=spacify(a);
char out[(x+(strlen(a)))];
for(int i=0;i<(strlen (a));i++)
{
if(i==spaces[z])
{
z++;
out[j++]=' ';
}
out[j++]=a[i];
}out[j]='\0';
for(int i=0;i<strlen(out);i++)
std::cout<<out[i];
std::cout<<"\n";
return 0;
}
Hi,
below code (with out using any tree data structure) works fine.
If anyone has clean and working code using tree data structure, please post.
static int isWord(String word) {
List<String> list = new ArrayList<String>();
list.add("this");
list.add("sentence");
list.add("is");
list.add("sentenceisseparated");
if (list.contains(word))
return 1;
else
return 0;
}
public static void main(String[] args) {
String a = "thissentenceisseparated";
char[] array = a.toCharArray();
int k = 0;
ArrayList<String> list = new ArrayList<String>();
for (int i = 1; i < a.length(); i++) {
String word = "";
int j = k;
while (j <= i) {
word += array[j];
j++;
}
if (isWord(word) == 1) {
list.add(word);
k = i + 1;
i++;
} else {
if (i + 1 >= a.length()) {
boolean flag = true;
while (flag) {
String lastWord = list.get(list.size() - 1);
word = lastWord + word;
list.remove(list.size()-1);
if (isWord(word) == 1) {
list.add(word);
flag = false;
} else {
if(list.isEmpty()){
flag=false;
}
}
}
}
}
}
if(list.isEmpty()){
System.out.println("can not be separated");
}else{
System.out.println(list); //use list of words to make string separated by space
}
import java.util.ArrayList;
import java.util.List;
public class JavaProgram1 {
private static List<String> worddictionary = new ArrayList<String>();
public JavaProgram1()
{
worddictionary.add("this");
worddictionary.add("sentence");
worddictionary.add("is");
worddictionary.add("space");
worddictionary.add("seperated");
}
public static void main(String[] args) {
JavaProgram1 jp = new JavaProgram1();
String input = "isspace";
String output = "";
String word = new String();
int start = 0;
int end = 1;
while (start < input.length()) {
if(end > input.length())
{
break;
}
// fetch word from start till end
word = jp.fetchWordFromStart2End(input, start, end);
// is valid word in dictionary
boolean flag = jp.isValidWordInDictionary(word);
if (flag == true) {
output = output.concat(word).concat(" ");
start = end;
}
end = end + 1;
}
System.out.println("Final Output::" + output);
}
private boolean isValidWordInDictionary(String word) {
return worddictionary.contains(word);
}
private String fetchWordFromStart2End(String input, int start,
int end) {
String word = "";
word = input.substring(start, end);
return word;
}
}
public void breakWithSpaces(String word, int start){
if(start == word.length()) System.out.println("The words are: " + Arrays.asList(sentence));
for(int i = start; i < word.length(); i++){
if(dictionaryContains(word.substring(start, i+1))){
sentence.add(word.substring(start, i+1));
// System.out.println("Adding " + word.substring(start, i+1));
breakWithSpaces(word, i+1);
sentence.remove(sentence.size()-1);
// System.out.println("Removing " + word.substring(start, i+1));
}
}
}
#include<iostream>
#include<map>
#include<string>
using namespace std;
std::map<string,string> dict;
int print(const char *p,int i)
{
char l[50];
if(*(p + i) == '\0')
{
return 0;
}
strncpy(l,p,i+1);
l[i+1] = '\0';
if(dict.find(l) == dict.end())
{
return(print(p,i+1));
}
else
{
if(*(p+i+1) == '\0')
{
fwrite(p,i+1,1,stdout);
cout << endl;
return 1;
}
const char *q = p + i + 1;
if(print(q,0))
{
fwrite(p,i+1,1,stdout);
cout << endl;
}
else
{
return(print(p,i+1));
}
}
}
int main()
{
dict["despodent"] = "dejected";
dict["corpulent"] = "fat";
dict["reverence"] = "respect";
dict["corp"] = "area";
dict["hi"] = "hello";
string str1 = "corpulenthi";
print(str1.c_str(),0);
return 0;
}
suppose we have dictionary ready with us, stored as trie data structure; start at the beginning of given input and try to find word in trie, if found pass remaining input to the program as well as continue searching long length word in trie and then input remaining string to program.. as suggested by @math.matt perform recursion..
Start reading the input string, navigate pointer to the trie according to the character you have read. If found word print it and/or continue reading.
Please find the pseudo code for that.
String temp, Result;
triePtr; // pointer to root of trie
while(input is not NULL)
{
temp <- temp + Read a character from input;
Navigate "triePtr" according to the read character in trie;
if(Fount Valid word in trie)
{
Result =Result +" " + temp;
clear temp;
Reset triePtr to root of trie;
}
}
if temp is not NULL
return false;
else
return Result;
I thought on the problem and that's what I want to offer an approach.
1. We must not do the analysis of given line, and in the beginning itself dictionary to find patterns in it and opportunities for optimization.
A simple example - in the English language has many words ends in -er (or -ing). So be wise to analyze not beginning of line, and the end of line.
2. Very important should be applied statistics. We need to find the frequency of occurrence of words for each letter of the alphabet. And when looking for an elementary search on this list, not because the words are arranged in the alphabet.
3.The third idea is to look, not words but syllables. After all, virtually every word begins with a consonant. So the priority should be given to the analysis of consonants. The truth is there is a danger of finding subword contained in the general word.
4. In general, you need to take into account the ambiguity of interpretation of the separation of fused sentences into words.
lets try an example
doingwhat
as per the logic given
it will no give any proper result
as it ll try to separate "do" first.
than it will not be able to get any proper word
but actaully it should give as "doing what"
int dictionaryContains(string word)
{
string dictionary[] = {"mobile","samsung","sam","sung","man","mango",
"icecream","and","go","i","like","ice","cream"};
int size = sizeof(dictionary)/sizeof(dictionary[0]);
for (int i = 0; i < size; i++)
if (dictionary[i].compare(word) == 0)
return true;
return false;
}
// Returns true if string can be segmented into space separated
// words, otherwise returns false
bool wordBreak(string str)
{
int size = str.size();
if (size == 0) return true;
// Create the DP table to store results of subroblems. The value wb[i]
// will be true if str[0..i-1] can be segmented into dictionary words,
// otherwise false.
bool wb[size+1];
memset(wb, 0, sizeof(wb)); // Initialize all values as false.
for (int i=1; i<=size; i++)
{
// if wb[i] is false, then check if current prefix can make it true.
// Current prefix is "str.substr(0, i)"
if (wb[i] == false && dictionaryContains( str.substr(0, i) ))
wb[i] = true;
// wb[i] is true, then check for all substrings starting from
// (i+1)th character and store their results.
if (wb[i] == true)
{
// If we reached the last prefix
if (i == size)
return true;
for (int j = i+1; j <= size; j++)
{
// Update wb[j] if it is false and can be updated
// Note the parameter passed to dictionaryContains() is
// substring starting from index 'i' and length 'j-i'
if (wb[j] == false && dictionaryContains( str.substr(i, j-i) ))
wb[j] = true;
// If we reached the last character
if (j == size && wb[j] == true)
return true;
}
}
}
/* Uncomment these lines to print DP table "wb[]"
for (int i = 1; i <= size; i++)
cout << " " << wb[i]; */
// If we have tried all prefixes and none of them worked
return false;
}
// Driver program to test above functions
int main()
{
wordBreak("ilikesamsung")? cout <<"Yes\n": cout << "No\n";
wordBreak("iiiiiiii")? cout <<"Yes\n": cout << "No\n";
wordBreak("")? cout <<"Yes\n": cout << "No\n";
wordBreak("ilikelikeimangoiii")? cout <<"Yes\n": cout << "No\n";
wordBreak("samsungandmango")? cout <<"Yes\n": cout << "No\n";
wordBreak("samsungandmangok")? cout <<"Yes\n": cout << "No\n";
return 0;
}
Backtracking with prefix tree will result on an exponential algorithm, take for example this case:
dictionary=["a","aa"]
string = "aaaaaaaaaaaaaaaaaaaaaaaa....b" (N 'a's and a b at the end). Using backtracking will require roughly 2^N steps to realize that it is not possible to get a valid solution.
www .geeksforgeeks.org/dynamic-programming-set-32-word-break-problem/
- Amit July 16, 2013good explanation