Uber Interview Question
Software EngineersCountry: United States
Interview Type: Phone Interview
Even, I got O(mn) solution. I would like to see if some one have better solution than this.
public static void replace() {
String[] symbols = { "I", "Am", "cro", "Na", "le", "abc" };
String[] arr = { "Amazon", "Microsoft", "Google" };
for (int i = 0; i < arr.length; i++) {
String name = arr[i];
String selectedSymbol = "";
for (String symbol : symbols) {
if (name.contains(symbol)) {
if (symbol.length() > selectedSymbol.length())
selectedSymbol = symbol;
}
}
if (selectedSymbol.length() > 0) {
arr[i] = name.replace(selectedSymbol, "[" + selectedSymbol + "]");
}
}
System.out.println(Arrays.toString(arr));
}
public class SymbolMatch {
public static void main(String[] args) {
String str[] ={"Amazon","Microsoft","Google"," "};
String sym[]={"i", "Am", "zon" ,"cro", "Na", "le", "soft"};
int l1 = str.length;
int l2 = sym.length;
String temp="";
int i,j,len=0;
for(i=0;i<l1;i++)
{
for(j=0;j<l2;j++)
{
if((str[i].contains(sym[j]))&&(len<sym[j].length()))
{
len = sym[j].length();
temp = sym[j];
}
}
if(len>0)
{
int k = str[i].indexOf(temp);
str[i] = str[i].substring(0,k)+"["+str[i].substring(k, k+len)+"]"+str[i].substring(k+len);
temp="";
len=0;
}
}
for(i=0;i<l1;i++)
System.out.println(str[i]);
}
}
Well I'd start by asking if I can exploit the fact that chemical symbols are quite short, at least in the example. If we can limit the length of chemical symbol to, say, 5 characters, it's trivial to create O(n) solution. Otherwise, we could do nlogn solution by binary searching for the longest chemical symbol from each position and using a hash table, however this would require using rolling hash to calculate the hashes in O(1), otherwise it becomes n^2 solution, so it'd be a pretty hardcore solution to expect in an interview. Aho-corasick is n^2 at worst as there might a quadratic number of matches so that's out. Suffix tree based solution might work too, but who can code that in an interview? All in all, I think they were expecting you to exploit the fact that the symbols are quite short so you can jsut keep them in hash table and look for the symbols at each position.
Theoretically, you could reimplement Aho-Corasick algorithm for searching multiple strings (let's assume total length of "m" characters) in a single, longer string (let's say "n" characters) at once. This way it would be an O(m+n) algorithm, however, this is quite tedious and probably not-doable in 30 mins or so...
#include <string>
#include <algorithm> // std::sort
#include <vector> // std::vector
#include <stdio.h>
#include <stdlib.h>
using namespace std;
bool sort_by_length(const string &str1, const string &str2)
{
return str1.length() >= str2.length();
}
void sort_vector_by_len(vector<string> &vec)
{
sort(vec.begin(), vec.end(), sort_by_length);
}
void display_new_string(const string &str1, const string &str2)
{
string nStr;
int start = str1.find(str2), end = start + str2.length();
for(int i = 0; i < str1.length(); i++)
{
if(start == i)
{
nStr += "[";
}
nStr += str1[i];
if(end == i + 1)
{
nStr += "]";
}
}
printf("NEW STRING: %s\n", nStr.c_str());
}
int main(int argc, char** argv)
{
vector<string> names, symbols;
names.push_back("Amazon");
names.push_back("Microsoft");
names.push_back("Google");
symbols.push_back("I");
symbols.push_back("Am");
symbols.push_back("cro");
symbols.push_back("Na");
symbols.push_back("le");
symbols.push_back("abc");
vector<string>::iterator it1, it2;
sort_vector_by_len(symbols);
for(it1 = names.begin(); it1 != names.end(); it1++)
{
for(it2 = symbols.begin(); it2 != symbols.end(); it2++)
{
int start = (*it1).find((*it2)), end;
if(start != string::npos)
{
display_new_string(*it1, *it2);
break;
}
}
}
printf("\n");
return 0;
}
package com.eb.corealgo;
import java.util.ArrayList;
public class ChemicalStringSymbol {
public String[] replaceBrackets(String []chemical,String []symbol){
int length=-1;
int currentFoundIndex=-1;
ArrayList<String> returnList=new ArrayList<String>();
for(String che:chemical){
//iterate through symbols
for(int i=0;i<symbol.length;i++){
if(che.contains(symbol[i])){
//found so take length
if(length<symbol[i].length()){
length=symbol[i].length();
currentFoundIndex=i;
}
}
}
//replace highest with bracket
returnList.add(getWithBrackets(che,symbol[currentFoundIndex]));
currentFoundIndex=-1;
length=-1;
}
return returnList.toArray(new String[returnList.size()]);
}
private String getWithBrackets(String che, String tempString) {
//get substring
int subStringLength=che.indexOf(tempString);
if(subStringLength==-1){
return null;
}
String subString=che.substring(subStringLength,tempString.length()+subStringLength);
String appender=subString.replace(subString, "["+subString+"]");
//replace with actual string
return che.replace(subString, appender);
}
//test
public static void main(String args[]){
ChemicalStringSymbol c=new ChemicalStringSymbol();
String s[]=c.replaceBrackets(new String[]{ "Amazon", "Microsoft", "Google" }, new String[]{ "I", "Am", "cro", "Na", "le", "abc" });
for(String ss:s){
System.out.println(ss);
}
}
}
One approach is to use a Trie for the dictionary (the symbols), and then match brute force. The complexity will depend on the dictionary; if all are suffixes of the other, it will be n*m (where m is the size of the dictionary). For example, in Python:
from functools import reduce
class TrieNode:
def __init__(self):
self.c = dict()
self.sym = None
def bracket(words, symbols):
root = TrieNode()
for s in symbols:
t = root
for char in s:
if char not in t.c:
t.c[char] = TrieNode()
t = t.c[char]
t.sym = s
result = dict()
for word in words:
i = 0
symlist = list()
while i < len(word):
j, t = i, root
while j < len(word) and word[j] in t.c:
t = t.c[word[j]]
if t.sym is not None:
symlist.append((j+1-len(t.sym), j+1, t.sym))
j += 1
i += 1
if len(symlist) > 0:
sym = reduce(lambda x, y: x if x[1]-x[0] >= y[1]-y[0] else y, symlist)
result[word] = "{}[{}]{}".format(word[:sym[0]], sym[2], word[sym[1]:])
return tuple(word if word not in result else result[word] for word in words)
One option is to use a Trie to store the dictionary in, then loop over the input words and save all matches. Note that matches may overlap, even though they don't in the example. Time complexity is n*m, where m is the length of the dictionary. The intuition is that every character in the input list may occur in each given symbol. In the example below I've assumed lowercase input.
from functools import reduce
class TrieNode:
def __init__(self):
self.c = dict()
self.sym = None
def bracket(words, symbols):
root = TrieNode()
for s in symbols:
t = root
for char in s:
if char not in t.c:
t.c[char] = TrieNode()
t = t.c[char]
t.sym = s
result = dict()
for word in words:
i = 0
symlist = list()
while i < len(word):
j, t = i, root
while j < len(word) and word[j] in t.c:
t = t.c[word[j]]
if t.sym is not None:
symlist.append((j+1-len(t.sym), j+1, t.sym))
j += 1
i += 1
if len(symlist) > 0:
sym = reduce(lambda x, y: x if x[1]-x[0] >= y[1]-y[0] else y, symlist)
result[word] = "{}[{}]{}".format(word[:sym[0]], sym[2], word[sym[1]:])
return tuple(word if word not in result else result[word] for word in words)
bracket(['amazon', 'microsoft', 'google'], ['i', 'am', 'cro', 'na', 'le', 'abc'])
>>> ('[am]azon', 'mi[cro]soft', 'goog[le]')
This seems to work. C# solution.
1. Sort symbols based on length
2. For each name, look if it contains any symbols (starting with longest)
3. If it does, manipulate name to fit output and break out of the inner loop
4. Otherwise keep going.
public static string ChemSymbolFound(string[] names, string[] symbols)
{
var output = "";
// sort in place, largest to smallest
Array.Sort(symbols, (x,y) => y.Length.CompareTo(x.Length));
foreach (var name in names)
{
foreach (var sy in symbols)
{
if (name.Contains(sy))
{
var index = name.IndexOf(sy);
output += name.Remove(index, sy.Length).Insert(index, "[" + sy + "]") + ", ";
break;
}
}
}
return output.TrimEnd(',', ' ');
}
public class Chemicals {
public static void main(String[] args) {
// TODO Auto-generated method stub
String[] chem = {"Amazon", "Microsoft", "Google"};
String[] symbols = {"I", "Am", "cro", "Na", "le", "abc"};
Chemicals c =new Chemicals();
c.findMatch(chem, symbols);
}
private void findMatch(String [] chem, String [] sym){
for(int i=0; i<chem.length;i++){
String chemical = chem[i];
for(int j=0; j<sym.length; j++){
if(chemical.contains(sym[j]))
System.out.println(chemical.replace(sym[j],"["+sym[j]+"]"));
}
}
}
}
public List<String> permute(List<String> input, List<String> rep){
List<String> newStrings = new ArrayList<String>;
for(String in : input){
String cur =""; int maxlen =0;
for(String re : rep){
if(in.contains(rep)){
int startIndex = in.indexOf(re);
int lastIndex = re.length();
len = re.length();
if(len > maxlen){
cur = re.substring(0,startIndex) + '[' +re + ']' + re.substring(lastIndex);
maxlen = len;
}
}
}
newStrings.add(cur);
}
}
I have written the below code without using String.Contains. String.Contains might be unacceptable in an interview.
Language used C#. You can run this code in rextester
public static void Main(string[] args)
{
string[] chemicals = {"amazon", "microsoft", "google"};
string[] symbols = {"i", "am", "cro", "na", "le", "abc"};
Dictionary<string, int> symbol_dict = new Dictionary<string, int>();
int max_symbol_length = 0; //Stores the length of longest symbol
//Add all the symbols into the dictionary
for(int i = 0; i< symbols.Length; i++) {
if (max_symbol_length < symbols[i].Length)
max_symbol_length = symbols[i].Length;
symbol_dict.Add(symbols[i], 0);
}
Dictionary<string, string> result = new Dictionary<string, string>();
//Loop through each chemical name
foreach (string chemical in chemicals) {
int max_length_so_far = 0; //Store the max length of symbol matched with the chemical
//Loop through each character in the chemical.
for (int i=0; i < chemical.Length ; i++) {
int length = 0;
//condition to make sure our length doesn't exceed chemical length
if ((i + max_symbol_length) <= chemical.Length)
length = max_symbol_length;
else
length = chemical.Length - i;
//Once you have mapped a symbol, we should not check for symbols less than that length
while(length >= 0 && length > max_length_so_far) {
Console.WriteLine(chemical.Substring(i, length)); //Use this to understand what is actually getting compared with the symbol dictionary
if (symbol_dict.ContainsKey(chemical.Substring(i, length)) && length > max_length_so_far){
max_length_so_far = length;
result[chemical] = chemical.Substring(0, i) + "[" + chemical.Substring(i, length) + "]" + chemical.Substring(i+length);
}
length--;
}
}
}
foreach(KeyValuePair<string, string> output in result) {
Console.WriteLine(output.Key + " : " + output.Value);
}
}
Maybe faster...?
def chemical_sym(chem_arr, sym_arr):
# build sym dict
# O(n)
s_dict = {}
for s in sym_arr:
s_dict[s[0]] = s[1:]
ret = []
for chem in chem_arr:
ret.append(surround(chem, s_dict))
return ret
def surround(chem, s_dict):
chem_arr = list(chem)
idx = -1
sym = ''
for i, c in enumerate(chem_arr):
#O(m)
if c in s_dict:
#O(1)
suffix = s_dict[c] if s_dict[c] else ''
if len(chem)-i-1 >= len(suffix) and \
chem[i+1:i+1+len(suffix)] == suffix and \
len(suffix) + 1 > len(sym):
sym = c+suffix
idx = i
chem_arr.insert(idx, '[')
chem_arr.insert(idx+len(sym)+1, ']')
return ''.join(chem_arr)
if (symbol.length() > selectedSymbol.length() && name.contains(symbol))
Would save the "contains" call for same or smaller size strings, like when you compare Amazon with {A B C D MA zz,} It only compares for first and later single letters will not be compared at all. and After "MA" all of that size will be omitted.
- Nate May 26, 2016