Google Interview Question
Software Engineer / DevelopersCountry: United States
C# with Test method
public class TreeNode
{
public int val;
public TreeNode left;
public TreeNode right;
public TreeNode(int x) { val = x; }
}
public static List<TreeNode> GetDuplicateSubTrees(TreeNode tree)
{
List<TreeNode> matches = new List<TreeNode>();
FindDuplicateSubTree(tree, tree.left, matches);
FindDuplicateSubTree(tree, tree.right, matches);
return matches;
}
//Find if there is at least one duplicate of the subtree in tree
private static Boolean FindDuplicateSubTree(TreeNode tree, TreeNode subTree, List<TreeNode> matches)
{
if (tree == null || subTree == null) return false;
Boolean subTreeMatched = ((tree.val == subTree.val) && tree != subTree); //values match and not the same tree
if (subTreeMatched)
{
if (tree.left != null && subTree.left != null)
{
subTreeMatched = subTreeMatched & FindDuplicateSubTree(tree.left, subTree.left, matches);
}
if (subTreeMatched)
{
if (tree.right != null && subTree.right != null)
{
subTreeMatched = subTreeMatched & FindDuplicateSubTree(tree.right, subTree.right, matches);
}
}
}
if (!subTreeMatched)
{
FindDuplicateSubTree(tree.left, subTree, matches);
FindDuplicateSubTree(tree.right, subTree, matches);
}
else
{
if (matches != null)
{
AddNewSubTree(matches, subTree);
}
}
return subTreeMatched;
}
//Add if not found
private static void AddNewSubTree(List<TreeNode> subTrees, TreeNode tree)
{
foreach (TreeNode n in subTrees)
{
if (FindDuplicateSubTree(n, tree, null))
{
return;
}
}
subTrees.Add(tree);
}
public static void Test()
{
TreeNode n = new TreeNode(1);
n.left = new TreeNode(2);
n.left.left = new TreeNode(4);
n.right = new TreeNode(3);
n.right.left = new TreeNode(2);
n.right.left.left = new TreeNode(4);
n.right.right = new TreeNode(4);
Dump(n);
Console.WriteLine();
List<TreeNode> dups = GetDuplicateSubTrees(n);
foreach(TreeNode t in dups)
{
Dump(t);
Console.WriteLine();
}
}
private static void Dump(TreeNode t)
{
if (t == null) return;
Console.Write(t.val + ", ");
Dump(t.left);
Dump(t.right);
}
def tree_hash(tree, depth=1):
if tree is None:
return 0
return tree_hash(tree.left, depth + 1) + tree_hash(tree.right, depth + 1) \
+ depth * tree.value
class Tree:
def __init__(self, value, left=None, right=None):
self.value = value
self.left = left
self.right = right
self.rank = None
def set_rank(self, rank):
self.rank = rank
def __repr__(self):
return 'T({0})'.format(tree_hash(self))
def heights(tree, res):
if tree is None:
return 0
height = max(heights(tree.left, res), heights(tree.right, res)) + 1
res.append((height, tree))
return height
def rank(node):
return -1 if node is None else node.rank
def unique_ids(nodes):
return [(node.value, rank(node.left), rank(node.right), node) for _, node in nodes]
def equal_subtrees(tree):
res = []
height_list = []
heights(tree, height_list)
height_list.sort()
i = 0
while i < len(height_list):
j = i
while j < len(height_list) and height_list[i][0] == height_list[j][0]:
j += 1
# Now [i, j) is a block of nodes with equal height
ids = unique_ids(height_list[i:j])
ids.sort()
current_rank = 0
k = 0
while k < j - i:
m = k
while m < j - i and ids[k][:3] == ids[m][:3]:
ids[m][3].set_rank(current_rank)
m += 1
# Now [k, m) is a block of nodes with equal ids,
# and therefore with equal subtrees
current_rank += 1
res.append(map(lambda tup: tup[3], ids[k:m]))
k = m
i = j
return res
t = Tree(1,
Tree(2,
Tree(3),
Tree(3)),
Tree(2,
Tree(3),
Tree(3)))
print equal_subtrees(t)
Solution in Java. If, for example, the output is 2l4, this represents a tree with head 2 and a node in the left as 4. It also shows all duplicates. In the example of the question there are 3, so 3 results are printed in the output
public class BinaryTree {
private class Node {
int data;
Node p_left;
Node p_right;
public Node(int data) {
this.data = data;
}
}
private final Node head;
public BinaryTree() {
head = new Node(1);
head.p_left = new Node(2);
head.p_left.p_left = new Node(4);
head.p_right = new Node(3);
head.p_right.p_left = new Node(2);
head.p_right.p_left.p_left = new Node(4);
head.p_right.p_right = new Node(4);
}
public void findDuplicates() {
ArrayList<String> paths = getPaths();
Set<String> uniquePaths = new HashSet<>();
for (String aPath : paths) {
while (aPath.length() > 1) {
if (!uniquePaths.contains(aPath)) {
uniquePaths.add(aPath);
} else {
System.out.println(aPath);
}
aPath = aPath.substring(2);
}
if (!uniquePaths.contains(aPath)) {
uniquePaths.add(aPath);
} else {
System.out.println(aPath);
}
}
}
private ArrayList<String> getPaths() {
ArrayList<String> paths = new ArrayList<>();
getPaths(head, "", paths);
return paths;
}
private void getPaths(Node root, String currentPath, ArrayList<String> paths) {
if (root == null) {
return;
}
if (root.p_left == null && root.p_right == null) {
paths.add(currentPath + root.data);
return;
}
getPaths(root.p_left, currentPath + root.data + "l", paths);
getPaths(root.p_right, currentPath + root.data + "r", paths);
}
Output:
2l4
4
4
1. For each subtree compute a unique representation of it. We can use a pre-order traversal and construct a string for each subtree. However 2 different subtrees can have the same pre-order output. So represent each missing child with a "NULL" or something.
2. For each node, add the string representation of the subtree rooted at that node to a hashmap. If the hashmap already contains such a string then we found a duplicate. Add the duplicate to a set and continue
Code :
public class DuplicateSubtrees {
public static void main(String[] args) {
TreeNode root = new TreeNode(1);
TreeNode two = new TreeNode(2);
TreeNode three = new TreeNode(3);
TreeNode four1 = new TreeNode(4);
TreeNode four2 = new TreeNode(4);
TreeNode four3 = new TreeNode(4);
TreeNode two2 = new TreeNode(2);
root.left=two;
root.right = three;
three.left = two2;
three.right = four3;
two2.left = four2;
two.left = four1;
Set<String> results = new HashSet<String>();
findAllSubtrees(root, new HashMap<String, Integer>(), results);
for (String str : results) {
System.out.println(str.replace("-", " ").replace("N", ""));
}
}
public static String findAllSubtrees(TreeNode root, HashMap<String, Integer> map, Set<String> results) {
if (root == null) return "N";
String str = root.val + "-";
String leftStr = findAllSubtrees(root.left, map, results);
String rightStr = findAllSubtrees(root.right, map, results);
str = str + leftStr + rightStr;
if (map.containsKey(str)) {
results.add(str);
}
map.put(str, 1);
return str;
}
public static class TreeNode {
TreeNode right;
TreeNode left;
int val;
public TreeNode(int x) {
this.val = x;
}
}
}
C++ approach
A basic Node structure
struct Node
{
Node* left;
Node* right;
int val;
};
Part 1 - Check if a tree has duplicate sub-trees
bool hasDuplicates(Node* root)
{
std::vector<Node*> nodesToCheck;
nodesToCheck.reserve(100); // Helps with vector push_back efficiency
Node* node = root;
// Compare the children
while (node != NULL)
{
// Push the right tree onto a list to compare
nodesToCheck.push_back(node->right);
// Compare the each right sub-tree to the left tree
for (int i = 0; i < nodesToCheck.size(); ++i)
{
if (duplicateFound(node->left, nodesToCheck[i]) return true;
}
// Check for duplicates purely within the right trees
if hasDuplicates(node->right) return true;
// Get the next left node to check
node = node->left;
}
return false;
}
// Recursively compare two trees
bool duplicateFound(Node* left, Node* right)
{
if (left == right == NULL) return true;
if (left == NULL && right != NULL) return false;
if (left != NULL && right == NULL) return false;
// Recursive check
if (left->val == right->val &&
compare(left->left, right->left &&
compare(left->right, right->right)
return true;
// The trees are not the same, so check all the right children
if (duplicateFound(left, right->left)) return true;
return duplicateFound(left, right->right));
}
Part 2 - Return all duplicates
With the above code, this isn't too hard. Just replace the main
"return true" with "duplicates.push_back(...);"
std::vector<Node*> hasDuplicates(Node* root)
{
std::vector<Node*> duplicates;
duplicates.reserve(100); // Helps with vector push_back efficiency
std::vector<Node*> nodesToCheck;
nodesToCheck.reserve(100); // Helps with vector push_back efficiency
Node* node = root;
// Compare the children
while (node != NULL)
{
// Push the right tree onto a list to compare
nodesToCheck.push_back(node->right);
// Compare the each right sub-tree to the left tree
for (int i = 0; i < nodesToCheck.size(); ++i)
{
if (duplicateFound(node->left, nodesToCheck[i])
duplicates.push_back(node->left);
}
// Check for duplicates purely within the right trees
hasDuplicates(node->right); // No need for an extra push_back()
// Get the next left node to check
node = node->left;
}
return duplicates
}
// Recursively compare two trees
bool duplicateFound(Node* left, Node* right)
{
if (left == right == NULL) return true;
if (left == NULL && right != NULL) return false;
if (left != NULL && right == NULL) return false;
// Recursive check
if (left->val == right->val &&
compare(left->left, right->left &&
compare(left->right, right->right)
return true;
// The trees are not the same, so check all the right children
if (duplicateFound(left, right->left)) return true;
return duplicateFound(left, right->right));
}
Another C++ solution, but with hash maps for better efficiency.
The style here is to add up the values of each node, then
hash the result and store the node into a map with the hashed
key. The idea is to save time by only comparing trees when
there is a hash collision.
Basic Node structure
struct Node
{
Node* left;
Node* right;
int val;
};
Part 1 - Check if a tree has duplicate sub-trees
bool hasDuplicates(Node* root)
{
if (root == NULL) return false;
bool dupFound = false;
// Multimap allows multiple values with same key
std::unordered_multimap<int, Node*> hashMap;
hashTree(root, hashMap, dupFound);
return dupFound;
}
// Returns recursively calculated hash value
int hashTree(Node* node,
std::unordered_multimap<int, Node*>& hashMap,
bool& dup)
{
if (dup) return; // Kick out of recursion early
if (node == NULL) return 0;
// Get hash value of left tree
int leftHash = hashTree(node->left, map, dup);
if (dup) return; // Kick out of recursion early
// Get hash value of right tree
int rightHash = hashTree(node->right, map, dup);
if (dup) return; // Kick out of recursion early
int hashValue = hash(leftHash + rightHash + node->val);
// Loop through collisions
for (std::unordered_multimap<int, Node*>::iterator collisionMap =
hashMap.find(hashValue);
collisionMap != hashMap.end();
++collisionMap)
{
// Compare due to a hash collision
dup = compareTrees(node, collisionMap->second);
// Return on duplicate found
if (dup) return;
}
// Add the tree to the map
hashMap.insert(std::make_pair(hashValue, node));
}
// Simple hash function
int hash(int key)
{
// Increase number to sacrifice space for time, and vice-versa
return key % 100;
}
// Recursive tree comparison method
bool compareTrees(Node* left, Node* right)
{
if (NULL == left == right) return true;
if (left == NULL || right == NULL) return false;
return left->val == right->val &&
compareTrees(left->left, right->left) &&
compareTrees(left->right, right->right);
}
Part 2 - Return all duplicates
For efficiency, the duplicate list should be passed in by reference
void hasDuplicates(Node* root, std::list<Node*>& duplicates)
{
duplicates.clear(); // Good coding practice
if (root == NULL) return;
// Multimap allows multiple values with same key
std::unordered_multimap<int, Node*> hashMap;
hashTree(root, hashMap, duplicates);
return dupFound;
}
// Returns recursively calculated hash value
int hashTree(Node* node,
std::unordered_multimap<int, Node*>& hashMap,
std::list<Node*>& duplicates)
{
if (node == NULL) return 0;
// Get hash value of left tree
int leftHash = hashTree(node->left, map, dup);
if (dup) return; // Kick out of recursion early
// Get hash value of right tree
int rightHash = hashTree(node->right, map, dup);
if (dup) return; // Kick out of recursion early
int hashValue = hash(leftHash + rightHash + node->val);
// Loop through collisions
for (std::unordered_multimap<int, Node*>::iterator collisionMap =
hashMap.find(hashValue);
collisionMap != hashMap.end();
++collisionMap)
{
// Compare due to a hash collision
if (compareTrees(node, collisionMap->second))
duplicates.push_back(node);
}
// Add the tree to the map
hashMap.insert(std::make_pair(hashValue, node));
}
// Simple hash function
int hash(int key)
{
// Increase number to sacrifice space for time, and vice-versa
return key % 100;
}
// Recursive tree comparison method
bool compareTrees(Node* left, Node* right)
{
if (NULL == left == right) return true;
if (left == NULL || right == NULL) return false;
return left->val == right->val &&
compareTrees(left->left, right->left) &&
compareTrees(left->right, right->right);
}
package helloworld;
import java.awt.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import javax.swing.text.html.HTMLDocument.Iterator;
public class TreeRep{
public static class TreeNode{
TreeNode left;
TreeNode right;
int val;
public TreeNode(int val){this.val = val;}
}
private HashMap<Integer, ArrayList<TreeNode>> map = new HashMap<Integer, ArrayList<TreeNode>>();
private void hashTree(TreeNode tn){
if(tn == null){
return;
}
hashTree(tn.left);
if(!map.containsKey(tn.val)){
map.put(tn.val, new ArrayList<TreeNode>());
}
map.get(tn.val).add(tn);
hashTree(tn.right);
}
private ArrayList<Integer> getInorderScan(TreeNode tn, ArrayList<Integer> result){
if(tn == null)
return result;
getInorderScan(tn.left,result);
result.add(tn.val);
getInorderScan(tn.right,result);
return result;
}
private boolean isContained(ArrayList<ArrayList<Integer>> set, ArrayList<Integer> a){
if(a == null){
return false;
}
for(ArrayList<Integer> arr : set){
if(a.equals(arr))
return true;
}
return false;
}
public ArrayList<ArrayList<Integer>> findRepitions(TreeNode tn){
System.out.println("Hashing the tree");
ArrayList<ArrayList<Integer>> result = new ArrayList<ArrayList<Integer>>();
hashTree(tn);
System.out.println("Finished hashing the tree");
System.out.println("Check repitions in the lists and check if duplicate trees");
Set<Integer> keys = map.keySet();
for(Integer key: keys){
ArrayList<TreeNode> repeatedNodes = map.get(key);
ArrayList<ArrayList<Integer>> inorderScan = new ArrayList<ArrayList<Integer>>();
for(TreeNode node : repeatedNodes){
ArrayList<Integer> newScan = new ArrayList<Integer>();
getInorderScan(node,newScan);
if(!isContained(inorderScan,newScan)){
inorderScan.add(newScan);
}else{
result.add(newScan);
}
}
}
return result;
}
public static void main(String[] args){
TreeNode n2 = new TreeNode(2);
TreeNode n4 = new TreeNode(4);
TreeNode n5 = new TreeNode(5);
TreeNode n3 = new TreeNode(3);
TreeNode nr2 = new TreeNode(2);
TreeNode nr4 = new TreeNode(4);
TreeNode nr5 = new TreeNode(5);
TreeNode nr3 = new TreeNode(3);
n2.left=n4;
n4.left=n5;
n2.right=n3;
n3.right=nr2;
n3.left=nr4;
nr4.left=nr5;
System.out.println("The repetitions are:");
System.out.println(new TreeRep().findRepitions(n2));
}
}
create a hash list. then on each list that has duplicates, run inorder and check if the result is the same
package helloworld;
import java.awt.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import javax.swing.text.html.HTMLDocument.Iterator;
public class TreeRep{
public static class TreeNode{
TreeNode left;
TreeNode right;
int val;
public TreeNode(int val){this.val = val;}
}
private HashMap<Integer, ArrayList<TreeNode>> map = new HashMap<Integer, ArrayList<TreeNode>>();
private void hashTree(TreeNode tn){
if(tn == null){
return;
}
hashTree(tn.left);
if(!map.containsKey(tn.val)){
map.put(tn.val, new ArrayList<TreeNode>());
}
map.get(tn.val).add(tn);
hashTree(tn.right);
}
private ArrayList<Integer> getInorderScan(TreeNode tn, ArrayList<Integer> result){
if(tn == null)
return result;
getInorderScan(tn.left,result);
result.add(tn.val);
getInorderScan(tn.right,result);
return result;
}
private boolean isContained(ArrayList<ArrayList<Integer>> set, ArrayList<Integer> a){
if(a == null){
return false;
}
for(ArrayList<Integer> arr : set){
if(a.equals(arr))
return true;
}
return false;
}
public ArrayList<ArrayList<Integer>> findRepitions(TreeNode tn){
System.out.println("Hashing the tree");
ArrayList<ArrayList<Integer>> result = new ArrayList<ArrayList<Integer>>();
hashTree(tn);
System.out.println("Finished hashing the tree");
System.out.println("Check repitions in the lists and check if duplicate trees");
Set<Integer> keys = map.keySet();
for(Integer key: keys){
ArrayList<TreeNode> repeatedNodes = map.get(key);
ArrayList<ArrayList<Integer>> inorderScan = new ArrayList<ArrayList<Integer>>();
for(TreeNode node : repeatedNodes){
ArrayList<Integer> newScan = new ArrayList<Integer>();
getInorderScan(node,newScan);
if(!isContained(inorderScan,newScan)){
inorderScan.add(newScan);
}else{
result.add(newScan);
}
}
}
return result;
}
public static void main(String[] args){
TreeNode n2 = new TreeNode(2);
TreeNode n4 = new TreeNode(4);
TreeNode n5 = new TreeNode(5);
TreeNode n3 = new TreeNode(3);
TreeNode nr2 = new TreeNode(2);
TreeNode nr4 = new TreeNode(4);
TreeNode nr5 = new TreeNode(5);
TreeNode nr3 = new TreeNode(3);
n2.left=n4;
n4.left=n5;
n2.right=n3;
n3.right=nr2;
n3.left=nr4;
nr4.left=nr5;
System.out.println("The repetitions are:");
System.out.println(new TreeRep().findRepitions(n2));
}
}
A c++ solution. running at O(nlogn).
First step - convert all of the sub trees into strings, and them to a vector. The hack is that i'm building the strings with a recursive function, because each sub tree is subTreeLeft + node + subTreeRight. It tooks O(n).
Second Step - Sort the vector, then run over the vector and find duplicate strings and print them. O(nlogn).
// Example program
#include <iostream>
#include <string>
#include <vector>
#include <algorithm> // std::sort
struct Node {
int val;
Node* r;
Node* l;
Node (int x) {
val = x;
r=NULL;
l=NULL;
}
};
std::vector<std::string> all;
std::string getStringForNodeAddToVec(Node* head) {
std::string retVal;
if (head != NULL) {
retVal = getStringForNodeAddToVec(head->l) + ' ' + std::to_string(head->val) + ' ' + getStringForNodeAddToVec(head->r);
all.push_back(retVal);
//std::cout << retVal << "\n";
}
return retVal;
}
void DuplicateSubTrees(Node* head) {
getStringForNodeAddToVec(head);
std::sort(all.begin(), all.end() );
std::string prev;
int t=0;
for (unsigned int i=0; i< all.size(); i++ ) {
if (prev != all[i] ) {
t=0;
}
else {
if (t == 0 ) {
std::cout << all[i] << "\n";
t++;
}
}
prev = all[i];
}
}
int main()
{
Node a(1);
Node b(2);
Node c(3);
Node d(4);
Node e(2);
Node f(4);
Node g(4);
a.l=&b;
a.r=&c;
b.l=&d;
c.l=&e;
c.r=&f;
e.l=&g;
DuplicateSubTrees(&a);
}
public static List<TreeNode> findDuplicateTree(TreeNode root){
List<TreeNode> res = new ArrayList<>();
findDuplicateTreeHelper(res,new HashSet<>(),root,new StringBuilder());
return res;
}
public static String findDuplicateTreeHelper(List<TreeNode> res,Set<String> set,TreeNode root,
StringBuilder stribuild){
if(root==null){
stribuild.append("()");
return "()";
}
String str = "(" + root.val;
str+=findDuplicateTreeHelper(res,set,root.left,stribuild);
str+=findDuplicateTreeHelper(res,set,root.right,stribuild);
str+=")";
if(!set.contains(str) && stribuild.indexOf(str)>=0){
res.add(root);
set.add(str);
}
stribuild.append(str);
return str;
}
Interesting problem, can be solved with post-order traversal. Traverse left child, then right child. Make each child traversal return a "signature". This signature identifies the tree rooted at the left and right children uniquely.
Now build current node's signature by doing:
The "identifier" tags are there to help separate out the values so that the string hash doesn't assign the same signature to 2 different trees (e.g. a signature "33344" could be left_signature="33", curr.val="3", right_signature="44" or left_signature="3", curr.val="334", right_signature="4")
Once we have this signature, simply check in a global hash set if this signature occurred before, if it did, return true (this solves the original problem). For fetching the dupes, we can do the following (Java):
Time complexity: O(n), space-complexity: O(n)
- Killedsteel March 03, 2017