Amazon Interview Question
SDE-2sCountry: United States
This does not answer the question so much as I can see.
I think this a very good solution for another problem: "smallest window size which contains all the characters of the given string" rather than "smallest window size with highest number of distinct characters".
If you want to solve the latter, than this algorithm will fail for example for: "aabcbcd" (suffix"bca" omitted from the original example) and will retrieve "abcbcd" which clearly has two 'b's and two 'c's in it - so not all characters are distinct within the retrieved window.
private static void smallwindow()
{
string s = Console.ReadLine();
Dictionary<char, int> d = new Dictionary<char, int>();
int wincount = 0;
for ( int i = 0; i < s.Length; i++ )
{
if ( !d.ContainsKey( s[i] ) )
{
wincount++;
d.Add( s[i], i );
}
else
{
int delind = d[s[i]];
List<char> remov = new List<char>();
foreach ( var kv in d )
{
if ( kv.Value <= delind )
remov.Add( kv.Key );
}
foreach ( char c in remov )
{
d.Remove( c );
wincount--;
}
}
}
Console.WriteLine( wincount );
}
Here's my O(nlogn) solution:
Main Idea: Binary search on answer.
Basically, we are looking for the smallest substring which contains all type of characters of original string S. let's call it T.
Now, We need to notice to the following fact:
If there is a substring with size X, that has T different characters, for sure there is substring with size more than X, that has T different characters.In other words, we have a non-decreasing function in term of size of substring, X. So we can safely apply binary search for finding the optimal point (here, minimum size).
Here's my code:
I used sliding window idea for counting different characters.
//MehrdadAP
#include <iostream>
#include <string>
using namespace std;
pair<string,int> findSmallest(string s,int size)
{
int frq[26]={0};
int unique=0,ansUnique;
int ansInd=0;
for (int i=0;i<size;i++){
frq[s[i]-'a']++;
if (frq[s[i]-'a']==1)
unique++;
}
ansUnique=unique;
for (int i=size;i<s.size();i++){
frq[s[i]-'a']++;
if (frq[s[i]-'a']==1)
unique++;
int tp = s[i-size]-'a';
frq[tp]--;
if (frq[tp]==0)
unique--;
if (unique>ansUnique){
ansInd=i-size+1;
ansUnique=unique;
}
}
//cout << size <<": " << s.substr(ansInd,size) << " " << ansUnique << endl;
return {s.substr(ansInd,size),ansUnique};
}
string smallestSlidingWindow(string s)
{
int N=s.size();
int lo=0,hi=N;
int unique = findSmallest(s,N).second;
string ans=s;
while (lo<=hi){
int mid= (lo+hi) >> 1;
auto tmp = findSmallest(s,mid);
if (tmp.second < unique){
lo=mid+1;
}
else{
ans = tmp.first;
hi=mid-1;
}
}
return ans;
}
int main()
{
string s;
while (cin >> s){
cout << smallestSlidingWindow(s) << endl;
}
return 0;
}
Here's my O(n^2) solution. But I think this problem can be solved in O(n).
private static String findUniqueSubstring(String string) {
List<Character> uniqueCharacters = new ArrayList<Character>();
StringBuilder uniqueString = new StringBuilder();
String bestUniqueStringSoFar = "";
for(int i = 0; i < string.length(); i++) {
uniqueCharacters.clear();
uniqueString.delete(0, uniqueString.length());
for(int j = i; j < string.length(); j++) {
if(!uniqueCharacters.contains(string.charAt(j))) {
uniqueCharacters.add(string.charAt(j));
uniqueString.append(string.charAt(j));
} else {
break;
}
}
if(uniqueString.length() > bestUniqueStringSoFar.length()) {
bestUniqueStringSoFar = uniqueString.toString();
}
}
return bestUniqueStringSoFar;
}
I optimized this solution so that it becomes O(n). Though the solution has nested for loops, I'm using a variable called 'currentPosition' which makes the input string traversal almost linear.
Your inputs are welcome :)
private static String findUniqueSubstring(String string) {
List<Character> uniqueCharacters = new ArrayList<Character>();
StringBuilder uniqueString = new StringBuilder();
String bestUniqueStringSoFar = "";
int currentPosition = 0;
for(int i = currentPosition; i < string.length(); i++) {
uniqueCharacters.clear();
uniqueString.delete(0, uniqueString.length());
for(int j = i; j < string.length(); j++) {
if(!uniqueCharacters.contains(string.charAt(j))) {
uniqueCharacters.add(string.charAt(j));
uniqueString.append(string.charAt(j));
} else {
currentPosition = j;
break;
}
}
if(uniqueString.length() > bestUniqueStringSoFar.length()) {
bestUniqueStringSoFar = uniqueString.toString();
}
}
return bestUniqueStringSoFar;
}
Here is my c++ solution
#include<cstdio>
#include<cstdlib>
#include<cmath>
#include<cstring>
#include<string>
#include<iostream>
#include<algorithm>
#include<vector>
#include<stack>
#include<deque>
#include<queue>
#include<map>
using namespace std;
int main(){
string s;
cin>>s;
int beg,end,l,len,max,ht[26];
beg = end = len = 0;
max= -9999;
memset(ht,0,sizeof(ht));
l = s.length();
while(end<l){
if(!ht[s[end] - 'a']){
ht[s[end] - 'a'] = 1;
len = end - beg + 1;
if(max<len){
max = len;
}
end++;
}
else{
while(s[beg]!=s[end]){
ht[s[beg]-'a'] = 0;
beg++;
}
ht[s[beg] - 'a'] = 0;
beg++;
}
}
cout<<max<<endl;
}
public String substring(String str) {
int start=0, end=0; //current window
int maxStart=0, maxEnd = 0; //max
Map<Character, Integer> charIndex = new LinkedHashMap<>();
charIndex.put(str.charAt(0), 0);
for(int i=1; i<str.length(); i++) {
char c = str.charAt(i);
if(charIndex.containsKey(c)) {
if((end-start) > (maxEnd-maxStart)) {
maxStart = start;
maxEnd = end;
}
Iterator<Entry<Character, Integer>> iterator = charIndex.entrySet().iterator();
while(iterator.hasNext()) {
Entry<Character, Integer> entry = iterator.next();
if(!entry.getKey().equals(c)) {
iterator.remove();
} else {
start = entry.getValue() + 1;
iterator.remove();
break;
}
}
}
charIndex.put(str.charAt(i), i);
end++;
}
if((end-start) > (maxEnd-maxStart)) {
maxStart = start;
maxEnd = end;
}
return str.substring(maxStart, maxEnd+1);
}
char word[] = in.next().toCharArray();
Set<Character> set = new HashSet<>();
for(int i=0;i<word.length;i++) {
set.add(word[i]);
}
int count = set.size();
set.clear();
TreeSet<Integer> treeSet = new TreeSet<>();
Map<Character, Integer> map = new HashMap<>();
int left = 0, right = Integer.MAX_VALUE;
for(int i=0;i<word.length;i++) {
if(!map.containsKey(word[i])) {
treeSet.add(i);
} else {
int index = map.get(word[i]);
treeSet.remove(index);
treeSet.add(i);
}
map.put(word[i], i);
if(treeSet.size() == count) {
int l = treeSet.first();
int r = treeSet.last();
if(r-l < right - left) {
left = l;
right = r;
}
}
}
if(right != Integer.MAX_VALUE) {
for(int i=left;i<=right;i++) {
out.print(word[i]);
}
}
char word[] = in.next().toCharArray();
Set<Character> set = new HashSet<>();
for(int i=0;i<word.length;i++) {
set.add(word[i]);
}
int count = set.size();
set.clear();
TreeSet<Integer> treeSet = new TreeSet<>();
Map<Character, Integer> map = new HashMap<>();
int left = 0, right = Integer.MAX_VALUE;
for(int i=0;i<word.length;i++) {
if(!map.containsKey(word[i])) {
treeSet.add(i);
} else {
int index = map.get(word[i]);
treeSet.remove(index);
treeSet.add(i);
}
map.put(word[i], i);
if(treeSet.size() == count) {
int l = treeSet.first();
int r = treeSet.last();
if(r-l < right - left) {
left = l;
right = r;
}
}
}
if(right != Integer.MAX_VALUE) {
for(int i=left;i<=right;i++) {
out.print(word[i]);
}
}
String A="aabcbcdbca";
char[] arr=new char[A.length()];
for(int i=0;i<A.length();i++){
arr[i]=A.charAt(i);
}
String temp="";
for(int i=0;i<arr.length;i++){
StringBuffer sb=new StringBuffer();
sb.append(arr[i]);
for(int j=i+1;j<arr.length;j++){
int t=i;
int r=j;
int flag=0;
while(t<r){
if(arr[t]!=arr[r]){
t++;
}
else if(arr[t]==arr[r]){
flag=1;
break;
}
}
if(flag==0){
sb.append(arr[r]);
}
else if(flag==1){
break;
}
}
if(sb.length()>temp.length()){
temp=sb.toString();
}
}
System.out.println("String is "+ temp +" and length is "+temp.length());
}
String A="aabcbcdbca";
char[] arr=new char[A.length()];
for(int i=0;i<A.length();i++){
arr[i]=A.charAt(i);
}
String temp="";
for(int i=0;i<arr.length;i++){
StringBuffer sb=new StringBuffer();
sb.append(arr[i]);
for(int j=i+1;j<arr.length;j++){
int t=i;
int r=j;
int flag=0;
while(t<r){
if(arr[t]!=arr[r]){
t++;
}
else if(arr[t]==arr[r]){
flag=1;
break;
}
}
if(flag==0){
sb.append(arr[r]);
}
else if(flag==1){
break;
}
}
if(sb.length()>temp.length()){
temp=sb.toString();
}
}
System.out.println("String is "+ temp +" and length is "+temp.length());
}
import java.util.HashSet;
import java.util.Set;
public class SmallestWindowLengthCompact {
static int smallestWindowLength(String s) {
int distinctCharacters = distinctCharactersInString(s);
for (int n = distinctCharacters; n > 0; n--) {
for (int w = n; w < s.length(); w++) {
Interval substringInterval = firstSubstringOfLengthWWithNDistinctCharacters(s, w, n);
if (substringInterval != null) {
String solutionString = s.substring(substringInterval.getStart(), substringInterval.getFinish());
return solutionString.length();
}
}
}
return -1;
}
private static Interval firstSubstringOfLengthWWithNDistinctCharacters(String s, int w, int n) {
int max = s.length() - n;
for (int start = 0; start <= max; start++) {
int finish = start + w;
String substring = s.substring(start, finish);
if (distinctCharactersInString(substring) == n) {
Interval result = new Interval(start, finish);
return result;
}
}
return null;
}
private static int distinctCharactersInString(String s) {
Set<Character> inventory = new HashSet<Character>();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
inventory.add(c);
}
return inventory.size();
}
}
class Interval {
int start;
int finish;
public Interval(int start, int finish) {
super();
this.start = start;
this.finish = finish;
}
public int getStart() {
return start;
}
public int getFinish() {
return finish;
}
}
Here is my c++ O(n) solution using a hash to keep the position of the last time that letter appeared, and then keeping track after each letter the position of the last substr of that length, so that the longest L will be the position of the last character in my distinct substring.
It's a play off of an O(nlogn) algorithm used to solve an LIS except the hash check for next position to update makes the O(logn) from the LIS binary search to just O(1). Also since we don't have to keep track of each element of the sequence with this unique case, we can just go back (L-1) characters and take the substring after the loop.
#include <iostream>
#include <unordered_map>
using namespace std;
string getMaxDistinctSubstr(const string &letterString) {
string sub;
if (letterString.empty()) return sub; // if empty, nothing to do
unordered_map<char,int> letterMap;
int pos[letterString.size()+1];
int L = 0;
int newL = 0;
for (int i = 0; i < letterString.size(); i++) {
char c = letterString[i];
if (letterMap[c]) {
// letter exists, lets see if it is withing the range of L before
if ((i - letterMap[c]) <= newL) {
newL = i - letterMap[c];
} else {
newL++;
}
} else {
// this is a disinct letter
// take current substr and increase by one
newL++;
}
letterMap[c] = i; // update count of last time this letter appeared
pos[newL] = i; // update position of new substr to this element
if (newL > L) {
L = newL;
}
}
int start = pos[L]-(L-1); // pos[L] is the last position, so if we keep L letters, go back (L-1)
sub = letterString.substr(start,L);
return sub;
}
int main () {
// write test here
string A = "aabcbcdbca";
cout << getMaxDistinctSubstr(A) << endl;
string AA = "aabcbcdbcafkdkabcdefj";
cout << getMaxDistinctSubstr(AA) << endl;
string B = "bccccf";
cout << getMaxDistinctSubstr(B) << endl;
string C = "ab";
cout << getMaxDistinctSubstr(C) << endl;
string D = "a";
cout << getMaxDistinctSubstr(D) << endl;
string E = "";
cout << getMaxDistinctSubstr(E) << endl;
}
Details: cpluspluslearning-petert.blogspot.co.uk/2015/09/data-structure-and-algorithm-find.html
O(n) solution.
void TestCasesOfFindLongestUniqueCharacterSerial()
{
std::string testString;
assert(FindLongestUniqueCharacterSerial(testString).empty());
testString = "aaaaaaaaaaaaaaaaaaaa";
assert(FindLongestUniqueCharacterSerial(testString) == "a");
testString = "abcdefghijklmn";
assert(FindLongestUniqueCharacterSerial(testString) == testString);
testString = "aabcbcdbca";
assert(FindLongestUniqueCharacterSerial(testString) == "dbca");
testString = "bcccf";
assert(FindLongestUniqueCharacterSerial(testString) == "bc");
}
System.out.println("Longest unique substring: " + Strings.returnLongestStringOfConsecutiveUniqueCharacters("aabcbcdbca"));
Output: Longest unique substring: dbca
public static String returnLongestStringOfConsecutiveUniqueCharacters(String input) {
if (input == null || input.isEmpty()) {
return null;
}
String runningLongestSubstring = "";
String currentSubstring = "";
for (char c : input.toCharArray()) {
if (currentSubstring.contains(c + "")) {
if (currentSubstring.length() > runningLongestSubstring.length()) {
runningLongestSubstring = currentSubstring;
}
currentSubstring = currentSubstring.substring(currentSubstring.indexOf(c) + 1) + c;
} else {
currentSubstring += c;
}
}
if (currentSubstring.length() > runningLongestSubstring.length()) {
runningLongestSubstring = currentSubstring;
}
return runningLongestSubstring;
}
O(n) solution
int findMaxWindowWithDistinctElems(string& s){
int sqStart = 0, sqEnd = 0, numElems = 1;
int resStart = sqStart, resEnd = sqEnd, res = numElems;
int ascii[256];
for(int i=0; i<256; i++){
ascii[i] = 0;
}
ascii[s[0]] = 1;
for(int i=1; i<s.length(); i++){
if(ascii[s[i]] == 0){
ascii[s[i]] = 1;
sqEnd = i;
numElems++;
}else{
if(numElems > res){
res = numElems;
resStart = sqStart;
resEnd = sqEnd;
}
for(int j=0; j<256; j++){
ascii[j] = 0;
}
ascii[s[i]] = 1;
sqStart = i;
sqEnd = i;
numElems = 1;
}
}
if (numElems > res){
resStart = sqStart;
resEnd = sqEnd;
res = numElems;
}
return res;
}
O(n) solution, with comments:
- Irit September 13, 2015