showell30@yahoo.com
BAN USER- 0of 0 votes
AnswersCode up a simple Bloom Filter for four letter words. Assume all words are lowercase, exactly four letters long, and only use the letters a to z. An example set of test words might be able, band, bend, card, darn, form, grab, harm, look, make, mate, peer, and team, but assume the actual list of words is much bigger.
- showell30@yahoo.com in United States| Report Duplicate | Flag | PURGE
Amazon Software Engineer / Developer Data Structures
A good permutation is composed by pulling out an element from the list and then finding all "good" permutations of the remaining elements that don't begin with the pulled out element's successor. This is fairly easy to code up recursively, and the recursive solution prunes bad permutations effectively. A more naive solution would be to just generate all permutations and filter out bad permutations.
def good_permute(arr, bad_head = None):
if len(arr) == 1:
if arr[0] == bad_head:
return []
return [arr]
result = []
for i in range(len(arr)):
v = arr[i]
if v == bad_head: continue
others = arr[:i] + arr[i+1:]
for p2 in good_permute(others, v+1):
result.append([v] + p2)
return result
print good_permute([1,2,3,4])
Solving the general case here is almost as easy as solving the specific case:
# Find the 2nd biggest by solving the general
# problem of finding the Nth largest element in a tree.
# The method is efficient, because it only traverses
# the left side of the tree when the right side
# has fewer than N-1 elements.
def test():
tree123 = ((None, 1, None), 2, (None, 3, None))
tree567 = ((None, 5, None), 6, (None, 7, None))
tree = (tree123, 4, tree567)
assert 6 == nth_element(tree, 2, 7) # second biggest
assert 2 == nth_element(tree, 5, 6) # fifth biggest <= 6
def nth_element(tree, n, max):
# Our inner function tries to return the nth largest
# element <= max, but the tree might be too small, so we
# also indicate the number of elements found.
def nth_element_and_num_checked(tree, n):
if tree is None:
return None, 0
left, val, right = tree
if val > max:
n_right = 0
else:
# For a big tree, we just recurse on the right tree.
r_nth, n_right = nth_element_and_num_checked(right, n)
if n_right == n:
return r_nth, n
if val <= max:
n_right += 1
# If the right side has exactly n-1 elements, the root
# value is the solution.
if n_right == n:
return val, n
# When we recurse to the left, we need to find a higher
# ranking element than nth, depending on how many nodes
# were on the right side.
n_left = n - n_right
result, n_left = nth_element_and_num_checked(left, n_left)
return result, n_left + n_right
result, num_checked = nth_element_and_num_checked(tree, n)
if num_checked == n:
return result
else:
raise Exception("tree too small")
test()
This is a non-recursive solution that uses backtracking. It simulates what most humans would do in a maze. You leave breadcrumbs, go as far as you can, and when you hit a dead end, pick up the last breadcrumb and try a new direction.
# This is an iterative solution for traversing a maze. We try
# to extend our path as long as we can, but then backtrack
# when we hit a dead end or solution.
def solve(maze):
num_solutions = 0
traversal = {}
traversal['directions'] = ''
traversal['point'] = (0, 0)
endpoint = (len(maze)-1, len(maze[0])-1)
forbidden = ''
while True:
new_traversal = extend_traversal(maze, traversal, forbidden)
if not new_traversal:
if traversal['directions'] == '':
print 'NO MORE OPTIONS'
break
traversal, forbidden = backtrack(maze, traversal)
continue
forbidden = ''
traversal = new_traversal
r, c = traversal['point']
if (r, c) == endpoint:
print 'solution:', traversal['directions']
num_solutions += 1
return num_solutions
def extend_traversal(maze, t, forbidden):
new_t = {}
r, c = t['point']
if 'D' not in forbidden:
if can_go_down(maze, r, c):
new_t['directions'] = t['directions'] + 'D'
new_t['point'] = (r+1, c)
maze[r+1][c] = 2
return new_t
if 'U' not in forbidden:
if can_go_up(maze, r, c):
new_t['directions'] = t['directions'] + 'U'
new_t['point'] = (r-1, c)
maze[r-1][c] = 2
return new_t
if 'R' not in forbidden:
if can_go_right(maze, r, c):
new_t['directions'] = t['directions'] + 'R'
new_t['point'] = (r, c+1)
maze[r][c+1] = 2
return new_t
if 'L' not in forbidden:
if can_go_left(maze, r, c):
new_t['directions'] = t['directions'] + 'L'
new_t['point'] = (r, c-1)
maze[r][c-1] = 2
return new_t
return None
def backtrack(maze, t):
new_t = {}
last_dir = t['directions'][-1]
r, c = t['point']
maze[r][c] = 0
new_t['directions'] = t['directions'][:-1]
if last_dir == 'D':
new_t['point'] = (r-1, c)
return new_t, 'D'
if last_dir == 'U':
new_t['point'] = (r+1, c)
return new_t, 'DU'
if last_dir == 'R':
new_t['point'] = (r, c-1)
return new_t, 'DUR'
if last_dir == 'L':
new_t['point'] = (r, c+1)
return new_t, 'DURL'
def can_go_left(maze, r, c):
return c-1 >= 0 and maze[r][c-1] == 0
def can_go_right(maze, r, c):
return c+1 < len(maze[0]) and maze[r][c+1] == 0
def can_go_down(maze, r, c):
return r+1 < len(maze) and maze[r+1][c] == 0
def can_go_up(maze, r, c):
return r-1 >= 0 and maze[r-1][c] == 0
maze = [
[0, 1, 0, 0, 0],
[0, 1, 0, 1, 0],
[0, 0, 0, 1, 0],
[0, 1, 0, 1, 0],
[0, 0, 0, 1, 0],
]
num_solutions = solve(maze)
print num_solutions
I took the challenge here as being able to do a generic sort in place. The qsort() function doesn't need to know anything about the indexing scheme; it works using get_elem() and put_elem() passed in.
def qsort(get_elem, put_elem, lo, hi, reverse):
def partition(lo, hi):
pivot = get_elem(hi)
i = lo
mid = lo
for i in range(lo, hi):
less_than = get_elem(i) <= pivot
if reverse:
less_than = not less_than
if less_than:
if mid < i:
tmp = get_elem(i)
put_elem(i, get_elem(mid))
put_elem(mid, tmp)
mid += 1
if mid < hi:
tmp = get_elem(mid)
put_elem(mid, get_elem(hi))
put_elem(hi, tmp)
return mid
def _sort(lo, hi):
if lo >= hi:
return
mid = partition(lo, hi)
_sort(lo, mid - 1)
_sort(mid + 1, hi)
_sort(lo, hi)
def even_odd_sort(lst):
def get_even_elem(i):
return lst[i*2]
def put_even_elem(i, v):
lst[i*2] = v
def get_odd_elem(i):
return lst[i*2+1]
def put_odd_elem(i, v):
lst[i*2 + 1] = v
qsort(get_even_elem, put_even_elem, 0, (len(lst)-1) / 2, False)
qsort(get_odd_elem, put_odd_elem, 0, len(lst) / 2 - 1, True)
lst = [4, 1, 2, 3, 0, 5, 6, 7]
even_odd_sort(lst)
print lst
For the first element in the list, solve the problem recursively for each symbol. For +, recursively look for a way to combine all the remaining elements to equal the target minus the current element. For *, recursively look for a way to comine all the elements to equal the target divided by the current element. For -, look for target+x as well x - target.
If pulling out the first element doesn't yield a solution, move on the second, and so on.
This can be solved iteratively by putting players in line one by one and bubbling them up in the line past runners they beat.
Put A in line.
Put B at the back of the line, and then keep moving him one space forward as long as the person in front lost the race to him. Let's say B beat A. The line is now BA.
Put C at the back of the line. Let's say C lost to A. The line is now BAC.
Last, put D at the back of the line. Let's say D beat A and C, but lost to B. The line is now BDAC, and we're done.
A linked list would be a good data structure for this.
This solution seems like the simplest approach, although it wouldn't shock me if there were more efficient approaches.
Just to try out one case, consider a scenario where a is faster than b is faster than c is faster than d, except that a has a bad race against d, and d wins.
Apply quicksort with "a" as pivot: dabc
Apply quicksort with "b" as pivot: abcd
Apply quicksort with "c" as pivot: abcd
Apply quicksort with "d" as pivot: bcda
All solutions are valid.
The arrangement abc satisifies all the conditions. Even though c beat a, the person to her immediate left, b, beat c. Other solutions: bca, cab.
- showell30@yahoo.com February 18, 2013This is an N-squared solution. Every element in the matrix gets visited a maximum of five times.
# In this solution as soon as we find a 1, we replace
# all the elements in the shape with 2 to avoid double
# counting the 1s. Otherwise, it's simple iteration.
def count_shapes(m):
num_shapes = 0
for r in range(len(m)):
for c in range(len(m[0])):
if m[r][c] == 1:
fill_shape_with_twos(m, r, c)
num_shapes += 1
# restore the original values
for r in range(len(m)):
for c in range(len(m[0])):
m[r][c] = m[r][c] / 2
return num_shapes
def fill_shape_with_twos(m, r, c):
m[r][c] = 2
if r > 0 and m[r-1][c] == 1:
fill_shape_with_twos(m, r-1, c)
if r + 1 < len(m) and m[r+1][c] == 1:
fill_shape_with_twos(m, r+1, c)
if c > 0 and m[r][c-1] == 1:
fill_shape_with_twos(m, r, c-1)
if c + 1 < len(m[0]) and m[r][c+1] == 1:
fill_shape_with_twos(m, r, c+1)
test_matrix = [
[1, 1, 0, 0, 1],
[1, 0, 0, 1, 0],
[1, 1, 0, 1, 0],
[0, 0, 1, 0, 0],
]
test_matrix2 = [
[1, 1, 0, 0, 1],
[1, 0, 0, 1, 0],
[1, 1, 0, 1, 0],
[0, 0, 1, 1, 0],
]
test_matrix3 = [
[1, 1, 1, 1, 1],
[1, 1, 0, 1, 1],
[1, 1, 0, 1, 1],
[1, 1, 1, 1, 1],
]
print count_shapes(test_matrix)
print count_shapes(test_matrix2)
print count_shapes(test_matrix3)
No worries. pop() is somewhat idiomatic in Python, but for folks coming from other languages, I don't think I express my intent in the code very well. As far as I can tell, my algorithm works correctly and efficiently, but I bet the expressiveness of the code could be much improved. Gotta run...
- showell30@yahoo.com October 31, 2012Biswajit, I don't think you're reasoning about my code correctly.
I added the the test case for "DDIIDI", and it produced the answer [3, 2, 1, 4, 6, 5, 7]. Is there a better solution?
Do you understand what pop() does in Python? It removes an item from the list, preventing the exact non-problem problem that you describe.
def num_ways_to_score(target_score, scores):
ways_to_score = 0
counts = [0] * len(scores)
while True:
score = sum([scores[i] * count for i, count in enumerate(counts)])
if score == target_score:
ways_to_score += 1
print counts
i = 0
if score > target_score:
for i, cnt in enumerate(counts):
if cnt > 0:
counts[i] = 0
i += 1
break
if i == len(counts):
break
if i == 0:
score = sum([scores[j] * count for j, count in enumerate(counts)])
score -= scores[0] * counts[0]
diff = target_score - score
jump = diff / scores[0]
if jump == 0:
jump = 1
else:
jump = 1
counts[i] += jump
return ways_to_score
print "\nBasketball"
scores = [1,2,3]
score = 8
solution = num_ways_to_score(score, scores)
print score, solution
Speed isn't a huge issue for the stated problem, since basketball scores are typically pretty small, but you are correct that this algorithm can benefit from simple memoization and run way faster.
You can memoize results with a decorator like the following:
def memoize(f):
dct = {}
def _f(score, scores):
key = dict(
score = score,
scores = scores
)
key = json.dumps(key)
if key in dct:
return dct[key]
val = f(score, scores)
dct[key] = val
return val
return _f
@memoize
def num_ways_to_score(score, scores):
...
This is a simple recursive algorithm that iterates through candidate solutions and terminates fairly quickly when a dead end is reached.
def min_solution(s):
nums = range(1, len(s) + 2)
def _min_solution(legal, nums, s):
if s == '':
if legal(nums[0]):
return nums
else:
return None
start = 0
for c in s:
if c == 'D':
start += 1
else:
break
for i in range(start, len(nums)):
if legal(nums[i]):
new_nums = nums[:]
val = new_nums.pop(i)
if s[0] == 'D':
new_legal = lambda n: n < val
else:
new_legal = lambda n: n > val
rest = _min_solution(new_legal, new_nums, s[1:])
if rest:
return [val] + rest
return None
always = lambda n: True
solution = _min_solution(always, nums, s)
return solution
tests = [
"",
"D",
"I",
"DD",
"II",
"DI",
"ID",
"DID",
"IDI",
"DDD",
"IDIDIDI",
"IIIIIIIIIII",
"DDDDDDDDDDI",
"IDDDDDDDDDD",
"DIDDDDDDDDD",
]
for s in tests:
print repr(s), min_solution(s)
This uses binary search to perform the check in log n time, but it depends on multiple-bit bit-shifts to be constant time. For this time of optimization, O() time is much less important than actually understanding how fast a processor performs specific operations, so take my solution with a huge grain of salt.
def msb(n):
def _msb(n, num_bits):
if num_bits == 1:
if n >= 1:
return 1
else:
return 0
m = num_bits / 2
if n >= 1 << m:
return m + _msb(n >> m, num_bits - m)
else:
return _msb(n, m)
return _msb(n, 31)
for i in range(100):
print i, msb(i)
Read the question more carefully. You need to determine the maximum number of swaps.
- showell30@yahoo.com October 28, 2012Also, one minor quibble. Why do say "< 2" when "<= 1" would express your intentions much better?
- showell30@yahoo.com October 28, 2012If I were interviewing you, I would first applaud you for an elegant solution that works. (I did test it--it does work.)
Then I would ask you to dig deeper.
Can you optimize this for the specific scoring system of basketball?
Can you generalize it to other scoring systems?
See my two Python examples for different takes on this.
This problem is analogous to the making-change problem. Do I get bonus points for generalizing? :)
def num_ways_to_score(score, scores):
if score < 0:
return 0
if not scores:
if score == 0:
return 1
else:
return 0
head = scores[0]
cnt = num_ways_to_score(score - head, scores)
cnt += num_ways_to_score(score, scores[1:])
return cnt
print "\nBasketball"
scores = [3,2,1]
for score in range(20):
print score, num_ways_to_score(score, scores)
# American football actually has a additional constraint
# that we don't capture--you cannot score a single point
# without first scoring a touchdown (6 points).
print "\nAmerican football"
scores = [6,3,2,1]
for score in range(20):
print score, num_ways_to_score(score, scores)
The logic behind f_2_1 is that there are approximately n/2 different ways to score n points when you have 2-pointers and 1-pointers. The distinct cases are all driven by how many 2-pointers the team scores. To give an example, if a team scored 5 points with no 3-pointers, then the team either did 0*2 + 5*1 or 1*2 + 3*1 or 2*2 + 1.
The logic behind f_3_2_1 is that we enumerate all the possibilities of scoring 3-pointers, and then for each number of 3-pointers, we count the different number of ways that the team would score the remainder of their points using just 1s and 2s.
We need to keep track of strings we already encountered, so we need a "set". If we can do this in-memory, then we use our language's native set implementation, if it has one (e.g. Python has sets), or, if not, we use a hash where the strings are the keys and the values are some set value like True or 1.
The basic algorithm is this:
visited = set()
cnt = 0
for each string s:
if s not in visited:
cnt += 1
visited.add(s)
If memory is a constraint, use an RDBMS to implement your set of strings. Create a table with a single column called string, and index on that column. Use SELECT to determine if a string has been encountered, and use INSERT to add a string to the table.
- showell30@yahoo.com October 28, 2012def stream(n):
for i in range(n):
yield 100 * (i + 1)
def find_mid_value(m, data):
# Take advantage of the fact that
# we can overwrite early values, so
# we use an array that is just
# a little bigger than m / 4.
max_mid = (m+1) / 2 - 1
arr_size = (max_mid/2) + 2
arr = []
n = 0
for value in data:
if n <= max_mid:
if n < arr_size:
arr.append(value)
else:
arr[n - arr_size] = value
n += 1
mid_pos = (n + 1) / 2 - 1
return arr[mid_pos % arr_size]
for n in [1, 2, 3, 4, 5]:
for m in range(n, 20):
data = stream(n)
mid = find_mid_value(m, data)
print n, mid, m
The maximum number of integers that you will need to store is M/4 + 1, which happens when you have encountered M/2 integers. To make this concrete, let M = 8. When you encounter the 4th element, the possible values for N are 4, 5, 6, 7, and 8, so the possible values for N/2 are 2, 3, and 4, so you need to hold on to the 2nd, 3rd, and 4th elements.
From a pragmatic standpoint, one of the other posters pointed out that you might as well pre-allocate an array of M/2 integers, to avoid having to resize the array as N increases and early values are no longer relevant. Keeping things simple is always sound advice, and this program is gonna require linear space for sure.
If you really want to save space, I'd recommend a circular queue. A circular queue is a perfect data structure for a queue that you know will never exceed a certain capacity, which is exactly in the wheelhouse of this problem.
def f_3_2_1(n):
cnt = 0
i = 0
while i <= n:
cnt += f_2_1(n - i)
i += 3
return cnt
def f_2_1(n):
return n / 2 + 1
def test():
for i in range(20):
print i, f_3_2_1(i)
test()
The actual number of swaps is determined by the first element in sequence that is out of the order in the unsorted list.
def f(arr):
# O(n log n) time to sort the list
n = len(arr)
tuples = [(x, i) for i, x in enumerate(arr)]
sorted_tuples = sorted(tuples)
# linear time to count the swaps
positions = [0] * n
for i, tuple in enumerate(sorted_tuples):
val, pos = tuple
positions[i] = pos
def count_swaps():
for i in range(n - 1):
if positions[i+1] < positions[i]:
return n - (i + 1)
return 0
num_swaps = count_swaps()
print
print 'arr', arr
print num_swaps, 'swaps'
# actually performing the swaps is N-squared
if num_swaps:
print 'start:', arr
which_elem = n - num_swaps
for i in range(which_elem, n):
pos = positions[i]
if pos < n - 1:
val = arr.pop(pos)
arr.append(val)
print 'swap: ', arr
for j in range(n):
if positions[j] > pos:
positions[j] -= 1
arr = [2, 3, 5, 7, 11, 13]
f(arr)
arr = list(reversed(arr))
f(arr)
import random
for i in range(3):
random.shuffle(arr)
f(arr)
54321
54312
54123
51234
12345
Python version of the O(N) algorithm:
- showell30@yahoo.com February 19, 2013