Google Interview Question
Software EngineersCountry: United States
Java solution, the complexity is O(n):
private String encode(String s) {
StringBuilder builder = new StringBuilder();
char[] A = s.toCharArray();
int i = 0;
while (i < A.length) {
char c = A[i];
int counter = 0;
while (i < A.length && A[i] == c) {
i++;
counter++;
}
if (counter > 1) {
builder.append(counter).append('x').append(c);
} else {
builder.append(c);
}
}
return builder.toString();
}
In python:
def encode_str(in_str):
result = ''
ndx = 0
while ndx < len(in_str):
if ndx + 1 > len(in_str):
result += in_str[ndx]
else:
count = 1
while len(in_str) > (ndx + 1) and in_str[ndx + 1] == in_str[ndx]:
count +=1
ndx += 1
if count > 1:
result += '{}x{}'.format(count, in_str[ndx])
else:
if in_str[ndx] == 'x':
result += '1xx'
else:
result += in_str[ndx]
ndx += 1
return result
Testing:
str1 = 'p14akkkkkkkkpq'
exp1 = 'p14a8xkpq'
str2 = 'x'
exp2 = '1xx'
str3 = '8x'
exp3 = '81xx'
str4 = '8'
exp4 = '8'
tests = [(str1, exp1), (str2, exp2), (str3, exp3), (str4, exp4)]
for s,e in tests:
r = encode_str(s)
assert e == r, '{} -> {} != {}'.format(s, r, e)
print r
public static String encodeStr(String str) {
char[] c = str.toCharArray();
char cur;
char prev;
int j = 0;
StringBuilder sb = new StringBuilder(new Character(str.charAt(0)).toString());
for (int i = 1; i < c.length; i++) {
cur = c[i];
prev = c[i-1];
j = 0;
if (cur == prev) {
while (cur == prev) {
j++;
i++;
cur = c[i];
prev = c[i-1];
}
}
if (j > 1) {
sb.deleteCharAt(sb.length()-1);
sb.append(j + 1).append("x").append(prev);
i--;
} else {
sb.append(cur);
}
}
return sb.toString();
}
public static StringBuilder appendToString(StringBuilder string, Character x, int num){
if(num==0){
string.append(x);
}
for(int i=1;i<=num;i++){
string.append(x);
}
return string;
}
public static String decompressString(String string){
Character s=null;
int num=0;
StringBuilder sb=new StringBuilder();
for(int i=0;i<string.length();i++){
Character p=string.charAt(i);
if(s==null){
s=p;
}else{
String x=String.valueOf(p);
if(p>='0' && p<='9'){
Integer y= Integer.valueOf(x);
num=num*10+y;
}else{
sb= appendToString(sb,s,num);
s=p;
num=0;
}
}
}
sb.append(s);
return sb.toString();
}
public static StringBuilder appendToString(StringBuilder string, Character x, int num){
if(num==0){
string.append(x);
}
for(int i=1;i<=num;i++){
string.append(x);
}
return string;
}
public static String decompressString(String string){
Character s=null;
int num=0;
StringBuilder sb=new StringBuilder();
for(int i=0;i<string.length();i++){
Character p=string.charAt(i);
if(s==null){
s=p;
}else{
String x=String.valueOf(p);
if(p>='0' && p<='9'){
Integer y= Integer.valueOf(x);
num=num*10+y;
}else{
sb= appendToString(sb,s,num);
s=p;
num=0;
}
}
}
sb.append(s);
return sb.toString();
}
function encode(str) {
var counter = 1;
var encodedStr = '';
for(var i = 0; i < str.length; i++) {
if(i < str.length-1 && str.charAt(i) === str.charAt(i+1) {
counter++;
} else {
encodedStr += counter > 1 ? encodeChar(str.charAt(i), counter) : str.charAt(i);
counter = 1;
}
}
return encodedStr;
}
function encodeChar(c, n) {
return c + 'x' + n;
}
I don't think that the encoding function should use 'x' to represent the repeating appearance of a character because the problem states that an input can be any ASCII char.
For example, if an input String is "18xa", how can it be encoded? "18xa", or "1x11x81xx1xa"?
If we don't encode the char with count < 4, the code will be "18xa" and the decoder will decode it as "1aaaaaaaa" if the maximum count is set to 9.
My idea is to use a char to store the counts for each char. i.e. ['a']['b']['b']['b'] -> [1]['a'][3]['b']
Therefore, we don't need to worry about 'x' and the maximum count can be 255.
public static char[] encode(char[] arr){
int dupLocalCount=0,arrayLength=0;
for(int i=1;i<arr.length;i++){
if(arr[i]==arr[i-1]){
dupLocalCount++;
if(i==arr.length-1){
arrayLength+=digitlength(dupLocalCount)+2;
dupLocalCount=0;
}
}else{
if(dupLocalCount>0){
arrayLength+=digitlength(dupLocalCount)+2;
dupLocalCount=0;
}else{
arrayLength+=1;
}
}
}
char[] encodedArray=new char[arrayLength];
int k=0;
for(int i=1;i<arr.length;i++){
if(arr[i]==arr[i-1]){
dupLocalCount++;
if(i==arr.length-1){
char[] temp=digitToChar(dupLocalCount);
for(int j=0;j<temp.length;j++){
encodedArray[k++]=temp[j];
}
encodedArray[k++]='x';
encodedArray[k++]=arr[i-1];
}
}else{
if(dupLocalCount>0){
char[] temp=digitToChar(dupLocalCount);
for(int j=0;j<temp.length;j++){
encodedArray[k++]=temp[j];
}
encodedArray[k++]='x';
encodedArray[k++]=arr[i-1];
dupLocalCount=0;
}else{
encodedArray[k++]=arr[i-1];
}
}
}
return encodedArray;
}
public static int digitlength(int count){
int digitlen=0;
while(count>0){
count/=10;
digitlen++;
}
return digitlen;
}
public static char[] digitToChar(int count){
char[] digitInChar = new char[digitlength(count)];
int i=0;
while(count>0){
digitInChar[i]=(char)('0'+count%10);
count/=10;
i++;
}
return digitInChar;
}
Since the encoder was provided by many, here is the decoder in C++, running time: O(N).
void
decoder(std::string A) {
int x = 0;
for(int i = 0; i<A.size(); ++i) {
while(A[i] >= '0' && A[i] <= '9') {
x = x*10 + (A[i] - '0');
i++;
}
if(x>0 && A[i] == 'x') {
i++;
for(int j = 0; j < x; ++j) {
std::cout<< A[i];
}
x = 0;
} else if(x>0 && A[i] != 'x') {
std::cout<< x << A[i];
x = 0;
} else {
std::cout<< A[i];
}
}
std::cout<<std::endl;
}
Output:
p14a8xkpq => p14akkkkkkkkpq
abc11kresf10xAddd => abc11kresfAAAAAAAAAAddd
If the goal of the problem is to produce the same starting string to the encoder after it has been encoded and then decoded (a very common practice), then the problem can not be solved.
The problem states that "the string can have any possible ASCII character". This means that the following input string to the encoder could be used:
5XABBBB
When encoded, it should produce:
5XA4XB
When this string is then decoded, it would then produce:
AAAAABBBB
This is not the same starting string used as input to the encoder.
Either the problem needs to be restated to clarify that the multiplier character can't be used as input to the encoder or the problem is a test to see if the job applicant can recognize the issues involved and propose a reasonable workaround.
If the goal of the problem is to produce the same starting string to the encoder after it has been encoded and then decoded (a very common practice), then the problem can not be solved.
The problem states that "the string can have any possible ASCII character". This means that the following input string to the encoder could be used:
5XABBBB
When encoded, it should produce:
5XA4XB
When this string is then decoded, it would then produce:
AAAAABBBB
This is not the same starting string used as input to the encoder.
Either the problem needs to be restated to clarify that the multiplier character can't be used as input to the encoder or the problem is a test to see if the job applicant can recognize the issues involved and propose a reasonable workaround.
public class Solution {
public static void main(String [] args) {
System.out.println(decode("p14akkkkkkkkpq"));
}
public static String decode(String string) {
StringBuilder buf = new StringBuilder();
int count = 1;
char prev = string.charAt(0);
for (int i = 1; i < string.length(); i++) {
if (string.charAt(i) == prev) {
count++;
} else {
if (count > 3) {
buf.append(count).append('x').append(prev);
} else {
for (int j = 0; j < count; j++) {
buf.append(prev);
}
}
count = 1;
prev = string.charAt(i);
}
}
if (count > 3) {
buf.append(count).append('x').append(prev);
} else {
for (int j = 0; j < count; j++) {
buf.append(prev);
}
}
return buf.toString();
}
}
One case for the encoder if the length of the substring to be encoded is less or equals that 3 don't make sense to apply, example
aa => 2xa in this case the info is expanded is counterproductive
aaa => 3xa is the same length so no improve in the compress so no make sense.
There are some considerations about the decoder that are not clarified, for example if we have a string like this: 24xa there are 2 cases
case 1 => aaaaaaaaaaaaaaaaaaaaaaaa
case 2 => 2aaaa
- For the first case we need to handle an input like 5aaaaaa => 56xa after the decoded this would give us a huge expanded string like aaaaaaaaaa...aaaaa so we need to encode something like: 5a5xa; in this case substrings that has length of 4 or less don't make sense to compress
- For the second case we need to manage chunks, example dgeeeeeeeeeeeeeeee => dg9xe7xe
This is my solution for the first case
Output:
- hnatsu May 23, 2016abc => abc
abbccc => abbccc
abbcccdddd => abbccc4xd
p14akkkkkkkkpq => p14a8xkpq
p14xxxxxx1pq => p141xx5xx1pq
p14xxxxxx => p141xx5xx
qp45aaaaaaaaaaaadf => qp45a11xadf