Facebook Interview Question
Software EngineersCountry: United States
Interview Type: Phone Interview
This works perfect with a small change. Add below
Point 5: If you find the pattern string already consumed (patternIndex < 0) and there are already existing characters in the source string then retrun false.
if (sourceIndex < 0 && patternIndex < 0) {
return true;
} else if (sourceIndex < 0) {
//Check if the remaining parts of the pattern are not optional
while (patternIndex >= 0) {
if (patternStr.charAt(patternIndex) == '*') {
patternIndex = patternIndex - 2;
} else {
return false;
}
}
return true;
}
// point 5 added here: Source string present but pattern string is consumed
else if(patternIndex < 0)
{
return false;
}
It seems this fails for the case where "aa" needs to get matched to "aa*". Should return true. The issue is that a* exhausts all the a's in the input string but does not allow one of the input string's a's to satisfy the first a in the regex
@Good Replier
If it is allowed to change pattern while iterating, we could "eat up" equal characters like this:
if (sourceStr.charAt(sourceIndex) == patternStr.charAt(patternIndex - 1)) {
//NEW CODE
if (patternIndex > 1 && patternStr.charAt(patternIndex - 1) == patternStr.charAt(patternIndex - 2)) {
patternStr = patternStr.substring(0, patternIndex - 1) + "*";
patternIndex--;
}
//END NEW CODE
return isMatching(sourceStr, patternStr, --sourceIndex, patternIndex);
}
Had @Good Maker used char[] for the pattern instead of String, could have avoided subString and concat. Could just replace current char with "*" after characters that remained after "eating up" equals.
isMatch(“b*a”, “a”) → true
isMatch(“a*a”, “a”) → true
isMatch(“aab”, “c*a*b”) → true
Why should the above 3 return true ?
def ismatch(str1,str2):
if "*" not in str1 and "*" not in str2:
if str2==str1:
return True
else:
return False
else:
if str2=="":
if len(str1)/2.==str1.count("*"):
return True
else:
return False
elif str1=="":
if len(str2)/2.==str2.count("*"):
return True
else:
return False
if str2=="*" or str1=="*":
return True
else:
if str1[0]==str2[0]:
if len(str1)!=1 and len(str2)!=1:
if str1[1]=="*":
if len(str1)==2:
return True
else:
print str1[len(str1)-1::],str2[2::]
return ismatch(str1[2::],str2[len(str2)-1::])
elif str2[1]=="*":
if len(str2)==2:
return True
else:
return ismatch(str1[len(str1)-1::],str2[2::])
else:
if len(str1)==1:
return ismatch("",str2[1::])
else:
return ismatch(str1[1::],"")
else:
if str1[1]=="*":
return ismatch(str1[2::],str2)
elif str2[1]=="*":
return ismatch(str1,str2[2::])
#return False
Here is my brute force approach in C++;
Running time is O(N+M) where N is length of target word, and M is length of pattern.
/*
Implement the IsMatch producing the following results given the string and the pattern
IsMatched('aa', 'a') = false
IsMatched('aa', 'aa') = true
IsMatched('aaa', 'aa') = false
IsMatched('aa', 'a*') = true;
IsMatched('aa', '*') = true;
IsMatched('ab', '*') = true
IsMatched('b*a', 'a') = true;
IsMatched('aab', 'c*a*b') = true;
took longer than 20 minutes and had to fix the bug while walking through the test cases.
I wish I could come up with the initial algorithm quicker but understanding the question took a while.
Also made silly mistakes like typos. Shame on you !!!.
Still could not get the thing right after rewrite. Need to keep an eye on this problem.
*/
#include <string>
#include <vector>
#include <iostream>
using namespace std;
struct token {
char letter;
int minLen;
bool hasStar;
token(char c, int l, bool s):letter(c), minLen(l), hasStar(s){}
};
vector<token> parse(string s)
{
vector<token> result;
bool bStar;
char l;
int i = 0;
while (i < s.length())
{
l = 0;
bStar = false;
char cur = s[i];
while (i< s.length())
{
if (s[i] == '*')
{
if (l == 0){
cout<<"invalid string" <<endl;
result.clear();
return result;
}
l--;
bStar = true;
} else if (cur != s[i])
{
result.push_back(token(cur, l, bStar));
break;
} else {
l++;
}
if (i == s.length()-1)
{
result.push_back(token(cur, l, bStar));
i++;
break;
}
i++;
}
}
return result;
}
bool IsMatched(string target, string pattern)
{
if (target =="*" || pattern == "*")
return true;
vector<token> ttoken = parse(target);
vector<token> ptoken = parse(pattern);
int i = 0, j = 0;
while (i < ttoken.size() && j < ptoken.size())
{
if (ttoken[i].letter == ptoken[j].letter)
{
if (ttoken[i].minLen == ptoken[j].minLen || (ptoken[j].hasStar && ttoken[i].minLen > ptoken[j].minLen) || (ttoken[i].hasStar && ttoken[i].minLen < ptoken[i].minLen) )
{
i++;
j++;
}
else
return false;
} else {
if (ttoken[i].hasStar && ptoken[j].hasStar)
{
if (ttoken[i].minLen == 0)
i++;
else if (ptoken[i].minLen == 0)
j++;
else
return false;
} else if (ttoken[i].minLen == 0)
{
i++;
} else if (ptoken[i].minLen == 0)
{
j++;
} else
return false;
}
}
if (j < ptoken.size())
{
while (j < ptoken.size())
{
if (ptoken[j].minLen == 0)
j++;
else
break;
}
}
return (i == ttoken.size() && j == ptoken.size());
}
int main()
{
cout<<"IsMatched('aa', 'a') = " << IsMatched("aa", "a") <<endl;
cout<<"IsMatched('aa', 'aa') = " << IsMatched("aa", "aa") <<endl;
cout<<"IsMatched('aaa', 'aa') = " << IsMatched("aaa", "aa") <<endl;
cout<<"IsMatched('aa', 'a*') = " << IsMatched("aa", "a*") <<endl;
cout<<"IsMatched('aa', '*') = " << IsMatched("aa", "*") <<endl;
cout<<"IsMatched('ab', '*') = " << IsMatched("ab", "*") <<endl;
cout<<"IsMatched('b*a', 'a') = " << IsMatched("b*a", "a") <<endl;
cout<<"IsMatched('b*', 'a*') = " << IsMatched("b*", "a*") <<endl;
cout<<"IsMatched('b*', 'a*a') = " << IsMatched("b*", "a*a") <<endl;
cout<<"IsMatched('aab', 'c*a*b') = " << IsMatched("aab", "c*a*b") <<endl;
return 1;
}
#define forless(i, s, e) for(int i = (s); i < (e); i++)
#define forto(i, s, e) for(int i = (s); i <= (e); i++)
using namespace std;
bool dfs(const string& str, int i, const string& patter, int j)
{
if(i == str.length() && j == patter.length())
{
return true;
}
if(j == patter.length())
{
return false;
}
// * after
if(j + 1 < patter.length() && patter[j + 1] == '*')
{
//not use patter[j]
if(dfs(str, i, patter, j + 2))
{
return true;
}
//use patter[j]
if(i < str.length() && str[i] == patter[j])
{
return dfs(str, i + 1, patter, j);
}
return false;
}
//not * after
else
{
if(i < str.length() && str[i] == patter[j])
{
return dfs(str, i + 1, patter, j + 1);
}
return false;
}
}
bool isMatch(const string& str, const string& patter)
{
return dfs(str, 0, patter, 0);
}
#define forless(i, s, e) for(int i = (s); i < (e); i++)
#define forto(i, s, e) for(int i = (s); i <= (e); i++)
using namespace std;
bool dfs(const string& str, int i, const string& patter, int j)
{
if(i == str.length() && j == patter.length())
{
return true;
}
if(j == patter.length())
{
return false;
}
// * after
if(j + 1 < patter.length() && patter[j + 1] == '*')
{
//not use patter[j]
if(dfs(str, i, patter, j + 2))
{
return true;
}
//use patter[j]
if(i < str.length() && str[i] == patter[j])
{
return dfs(str, i + 1, patter, j);
}
return false;
}
//not * after
else
{
if(i < str.length() && str[i] == patter[j])
{
return dfs(str, i + 1, patter, j + 1);
}
return false;
}
}
bool isMatch(const string& str, const string& patter)
{
return dfs(str, 0, patter, 0);
}
#define forless(i, s, e) for(int i = (s); i < (e); i++)
#define forto(i, s, e) for(int i = (s); i <= (e); i++)
using namespace std;
bool dfs(const string& str, int i, const string& patter, int j)
{
if(i == str.length() && j == patter.length())
{
return true;
}
if(j == patter.length())
{
return false;
}
// * after
if(j + 1 < patter.length() && patter[j + 1] == '*')
{
//not use patter[j]
if(dfs(str, i, patter, j + 2))
{
return true;
}
//use patter[j]
if(i < str.length() && str[i] == patter[j])
{
return dfs(str, i + 1, patter, j);
}
return false;
}
//not * after
else
{
if(i < str.length() && str[i] == patter[j])
{
return dfs(str, i + 1, patter, j + 1);
}
return false;
}
}
bool isMatch(const string& str, const string& patter)
{
return dfs(str, 0, patter, 0);
}
def matchRegEx(pattern,string):
if len(pattern) == 1 and pattern[0]=='*':
return True
elif pattern[0]=='*':
for i,c in enumerate(string):
if matchRegEx(pattern[1:],string[i:]):
return True
return False
else:
p,s = 0, 0
while p < len(pattern) and s < len(string):
if pattern[p] == '*':
restp, rests = pattern[p:], string[s:]
return matchRegEx(restp[::-1],rests[::-1])
else:
if pattern[p] != string[s]:
return False
p, s = p + 1, s + 1
return True
print matchRegEx('a***ca**u','abaccccaouu')
def matchRegEx(pattern,string):
if len(pattern) == 1 and pattern[0]=='*':
return True
elif pattern[0]=='*':
for i,c in enumerate(string):
if matchRegEx(pattern[1:],string[i:]):
return True
return False
else:
p,s = 0, 0
while p < len(pattern) and s < len(string):
if pattern[p] == '*':
restp, rests = pattern[p:], string[s:]
return matchRegEx(restp[::-1],rests[::-1])
else:
if pattern[p] != string[s]:
return False
p, s = p + 1, s + 1
return True
print matchRegEx('a***ca**u','abaccccaouu')
bool _isMatch(std::string s, int pos_s, std::string regex, int pos_r) {
if (pos_s == -1 && pos_r == -1) {
return 1;
} else if (pos_s == -1 || pos_r == -1) {
return 0;
}
if (regex[pos_r] == '*') {
while (s[pos_s] == regex[pos_r-1]) {
--pos_s;
}
return _isMatch(s, pos_s, regex, pos_r-2);
} else {
if (regex[pos_r--] == s[pos_s--]) {
return _isMatch(s, pos_s, regex, pos_r);
} else {
return 0;
}
}
}
bool isMatch(std::string s, std::string regex) {
return _isMatch(s, s.length()-1, regex, regex.length()-1);
}
bool isMatch(char * in, char *pat)
{
char recur;
uint pi = 0; uint ii = 0; // pattern and input indices
if (pat == NULL || input == NULL) {return false};
if (pat[0] == '*') return (pat[1] == '\0'); // only a single "*" is a valid beginner *
while (pat[pi] != '\0' && in[ii] != '\0')
{
if (pat[pi] == in[ii]) // no brainer - continue;
{++pi ; ++ii; continue; }
else if (pat[pi] == '*' || pat[pi + 1] == '*')
{
if (pat[pi] == *) {recur = pat[pi-1];}
if (pat[pi+1] == *) {recur = pat[pi+1]; ++pi}
while (in[ii] != '\0' && recur == in[ii]) // this may happen 0 or more times :-)
{++ii;}
++pi;
}
else // ha ! this shouldn't happen if they match.
break;
}
return (pat[pi] == '\0' && in[ii] == '\0');
}
Dynamic programming solution, comments inline :
public boolean regexMatch(String word, String pattern) {
int wordLength = word.length(), patternLength = pattern.length();
boolean [][] dp = new boolean [wordLength+1][patternLength+1];
dp[0][0] = true;
for(int i=1; i<=patternLength; i++) {
// assuming '*' will never be the first letter
if(i < patternLength && pattern.charAt(i) == '*') {
dp[0][i] = dp[0][i + 1] = dp[0][i - 1];
i += 1;
}
else break;
}
for(int i=1; i<=wordLength; i++) {
for(int j=1; j<=patternLength; j++) {
// if the next letter in pattern is '*'
if(j < patternLength && pattern.charAt(j) == '*') {
dp[i][j] = dp[i][j+1] = (dp[i][j-1]) || // current word substring already matched in previous pattern
(dp[i-1][j-1] && (word.charAt(i-1) == pattern.charAt(j-1))) || // prev word substring already matched in prev pattern
(dp[i-1][j] && (word.charAt(i-1) == pattern.charAt(j-1))); // prev word substring matched with current pattern
j += 1; // increment by one more
} else {
// current letter of word must match with pattern's current letter
// and also last substring of word should also match with last substring of pattern
dp[i][j] = dp[i-1][j-1] && (word.charAt(i-1) == pattern.charAt(j-1));
}
}
}
return dp[wordLength][patternLength];
}
Untest. But I believe it is correct, (but no error handling)
bool isMatch(char* str1, char* str2)
{
while ( *str !=‘\0’ && str != ‘\0’ ){
if( *str == *str2 && *(str+1) != ‘*’ && *(str2+1) != ‘*’){
str++;
str2++;
}
else if ( *str == *str2 && *(str+1) == ‘*’ ){
while(*str2 == *str){
++str2;
}
str += 2;
}
else if ( *str == *str2 && *(str2+1) == ‘*’ ){
while(*str2 == *str){
++str;
}
str2 += 2;
}
else if (*str != *str2 && *(str+1) == ‘*’ )
str++;
else if (*str != *str2 && *(str2+1) == ‘*’ )
str2++;
else
return false;
}
if (*str != ‘\0’ || *str2 != ‘\0’)
return false;
else
return true;
}
def is_match(string,regex):
if regex == '*':
return True
return is_match_index(string,regex,0,0)
def is_match_index(string, regex, index, r_index):
is_string_end = False
is_regex_end = False
if index >= len(string)-1:
is_string_end = True
if r_index >= len(regex)-1:
is_regex_end = True
if is_string_end and is_regex_end:
return True
if not is_string_end and is_regex_end:
return False
alphabet = regex[r_index]
if r_index+1 < len(regex):
next_alphabet = regex[r_index+1]
if next_alphabet == "*" and index < len(string):
return is_match_index(string,regex,index,r_index+2) or is_match_index(string,regex,index+1,r_index)
else:
if is_string_end:
return False
string_alphabet = string[index]
if alphabet == string_alphabet:
return is_match_index(string,regex,index+1,r_index+2)
else:
return False
if __name__ == "__main__":
print is_match("aabc","a*b*c")
print is_match("aabbc","a*b*bc")
print is_match("bbc","a*b*bc")
print is_match("ac","a*b*bc")
Python Code to the problem with all the test cases covered. Any comments would be appreciated.
def is_match(string,regex):
if regex == '*':
return True
return is_match_index(string,regex,0,0)
def is_match_index(string, regex, index, r_index):
is_string_end = False
is_regex_end = False
if index >= len(string)-1:
is_string_end = True
if r_index >= len(regex)-1:
is_regex_end = True
if is_string_end and is_regex_end:
return True
if not is_string_end and is_regex_end:
return False
alphabet = regex[r_index]
if r_index+1 < len(regex):
next_alphabet = regex[r_index+1]
if next_alphabet == "*" and index < len(string):
return is_match_index(string,regex,index,r_index+2) or is_match_index(string,regex,index+1,r_index)
else:
if is_string_end:
return False
string_alphabet = string[index]
if alphabet == string_alphabet:
return is_match_index(string,regex,index+1,r_index+2)
else:
return False
if __name__ == "__main__":
print is_match("aabc","a*b*c")
print is_match("aabbc","a*b*bc")
print is_match("bbc","a*b*bc")
print is_match("ac","a*b*bc")
This is based on GoodMaker's code.
public static boolean matches(String str, String pattern) {
return matches(str, pattern, str.length()-1, pattern.length()-1);
}
public static boolean matches(String str, String pattern,int strIdx, int patternIdx) {
if("*".equals(pattern)) {
return true;
}
if(strIdx < 0 && patternIdx < 0) {
return true;
} else if(strIdx < 0) {
while(patternIdx >= 0) {
if(pattern.charAt(patternIdx) == '*') {
patternIdx = patternIdx-2;
if(patternIdx < 0) {
return true;
}
} else {
return false;
}
}
} else if(patternIdx < 0){
while(strIdx >= 0) {
if(str.charAt(strIdx) == '*') {
strIdx = strIdx-2;
if(strIdx < 0) {
return true;
}
} else {
return false;
}
}
} else {
if(pattern.charAt(patternIdx) == str.charAt(strIdx)) {
return matches(str, pattern, --strIdx, --patternIdx);
} else {
if(pattern.charAt(patternIdx) == '*') {
--patternIdx;
while(strIdx >=0 && pattern.charAt(patternIdx) == str.charAt(strIdx)) {
strIdx--;
}
return matches(str, pattern, --strIdx, --patternIdx);
}
if(str.charAt(patternIdx) == '*') {
strIdx = strIdx - 2;
return matches(str, pattern, strIdx, --patternIdx);
}
}
}
return false;
}
This is based on Good Maker's code.
public static boolean matches(String str, String pattern) {
return matches(str, pattern, str.length()-1, pattern.length()-1);
}
public static boolean matches(String str, String pattern,int strIdx, int patternIdx) {
if("*".equals(pattern)) {
return true;
}
if(strIdx < 0 && patternIdx < 0) {
return true;
} else if(strIdx < 0) {
while(patternIdx >= 0) {
if(pattern.charAt(patternIdx) == '*') {
patternIdx = patternIdx-2;
if(patternIdx < 0) {
return true;
}
} else {
return false;
}
}
} else if(patternIdx < 0){
while(strIdx >= 0) {
if(str.charAt(strIdx) == '*') {
strIdx = strIdx-2;
if(strIdx < 0) {
return true;
}
} else {
return false;
}
}
} else {
if(pattern.charAt(patternIdx) == str.charAt(strIdx)) {
return matches(str, pattern, --strIdx, --patternIdx);
} else {
if(pattern.charAt(patternIdx) == '*') {
--patternIdx;
while(strIdx >=0 && pattern.charAt(patternIdx) == str.charAt(strIdx)) {
strIdx--;
}
return matches(str, pattern, --strIdx, --patternIdx);
}
if(str.charAt(patternIdx) == '*') {
strIdx = strIdx - 2;
return matches(str, pattern, strIdx, --patternIdx);
}
}
}
return false;
}
Implemented using two stacks. Runs in O(n), but also has a space complexity of O(n).
public static boolean isMatch(String str, String regex) {
//If the regex is just a * it's always a match
if (regex.equals("*")) return true;
//Create 2 stacks. One for the characters of the
//regular expression, and the other for the string
Stack<Character> strStack = new Stack<Character>();
Stack<Character> regStack = new Stack<Character>();
//Push the characters from each onto their corresponding stacks
for (int i = 0; i < str.length(); i++) {
strStack.push(str.charAt(i));
}
for (int i = 0; i < regex.length(); i++) {
regStack.push(regex.charAt(i));
}
//As long as at least one of the stack is not empty
while (!strStack.empty() || !regStack.empty()) {
//Get the next letter from the string (or null)
char strChar = (strStack.empty()) ? '\0' : strStack.peek();
//Get the next letter from the regex (or null)
char regChar = (regStack.empty()) ? '\0' : regStack.peek();
//If the character is a star, get the letter in the regex
//that is under (proceding in the string) the star (denoted L),
//and continue popping from strStack until the top letter no
//longer equals L.
if (regChar == '*') {
regStack.pop();
regChar = regStack.pop();
while (!strStack.empty() && strStack.peek() == regChar)
strStack.pop();
}
//If the two characters match, pop them off the stack
else if (strChar == regChar) {
strStack.pop();
regStack.pop();
}
//If they don't, and one isn't a star, return false.
else return false;
}
return true;
}
Attached below the detailed code and algorithm with explanation, it covers all of the mentioned cases ...
- Good Maker August 08, 2015