anonymous
BAN USERconcurrent hash map using multiple locks. I'm using global array for simplicity and omitted delete operation. A real hash map should not be like this.
#include <iostream>
#include <vector>
#include <mutex>
#include <string>
#include <algorithm>
using namespace std;
struct node {
int key;
string val;
node* next;
};
// Let' assume this hash function is a really good one :)
size_t my_hash(int val) {
return (unsigned)val % 1001;
}
mutex m[4];
node* my_map[1001];
// Let's assume that this is an internal API
static node* hash_search_impl(int key, unsigned h) {
auto c = my_map[h];
while (c) {
if (c->key == key) {
return c;
}
c = c->next;
}
return nullptr;
}
string hash_get(int key) {
auto h = my_hash(key);
int mi = min(h / 250, (unsigned)3);
{
unique_lock<mutex> lck(m[mi]);
auto n = hash_search_impl(key, h);
if (n) {
return n->val;
}
}
return string();
}
bool hash_put(int key, const string& val) {
auto h = my_hash(key);
int mi = min(h / 250, (unsigned)3);
{
unique_lock<mutex> lck(m[mi]);
auto n = hash_search_impl(key, h);
if (n) {
return false;
}
n = new node{ key, val, my_map[h] };
my_map[h] = n;
}
return true;
}
To avoid locks, we need to use atomic compare and swap operation. For example,
#include <iostream>
#include <string>
#include <algorithm>
#include <atomic>
using namespace std;
struct node {
int key;
string val;
node* next;
};
size_t my_hash(int val) {
return (unsigned)val % 1001;
}
atomic<node*> my_map[1001];
node* hash_search(int key) {
auto c = my_map[my_hash(key)].load();
while (c) {
if (c->key == key) {
return c;
}
c = c->next;
}
return nullptr;
}
string hash_get(int key) {
auto n = hash_search(key);
if (n) {
return n->val;
}
return string();
}
bool hash_put(int key, const string& val) {
auto n = new node{ key, val, nullptr };
auto h = my_hash(key);
do {
// Maybe other thread is likely to insert the same key
auto nn = hash_search(key);
if (nn) {
delete n;
return false;
}
n->next = my_map[h].load();
// using atomic compare and swap operation
} while (!my_map[h]->compare_exchange_strong(n->next, n));
return true;
}
This is a naive implementation, but can show a basic idea on how to implement lock-free hash map. If we need to support remove operation, then algorithm will be much more complex.
- anonymous December 18, 2014If we need to print out only maximum weight, algorithm can be simplified as follows.
auto W = [](const vector<vector<int>>& data) {
int N = data.size();
vector<vector<int>> R(N+1, vector<int>(N+1, 0));
for (int i = N-1; i >= 0; --i) {
for (int j = i; j >= 0; --j) {
R[i][j] = data[i][j] + max(R[i+1][j], R[i+1][j+1]);
}
}
return R[0][0];
};
If we should print out the path, then R matrix may store the max cell too.
- anonymous December 18, 2014If only 255 characters are allowed, I'd like to implement it like this. In C++, vector<bool> is implemented using bitmask. So, additional space will be 32 bytes + vector overhead. I think there is similar data structure in Java.
void remove_dup(string& str) {
if (str.length() > 0) {
vector<bool> exist(256, false);
int next = 0; // next position to move a unique char
for (int i = 0; i < str.length(); ++i) {
if (!exist[str[i]]) {
exist[str[i]] = true;
// in-place move
str[next++] = str[i];
}
}
str.resize(next);
}
}
Same algorithm with C++ code.
#include <iostream>
#include <vector>
#include <functional>
#include <algorithm>
#include <cassert>
using namespace std;
auto sort_bolts_and_nuts = [](vector<int>& bolts, vector<int>& nuts) {
if (bolts.size() != nuts.size()) {
return false;
}
auto partition = [](vector<int>& data, int left, int right, int pivot) {
int pivot_idx = -1;
int i = left;
while (i <= right) {
if (pivot_idx < 0 && data[i] == pivot) {
swap(data[i], data[right]);
pivot_idx = right;
continue;
}
if (data[i] < pivot) {
swap(data[left++], data[i]);
}
++i;
}
return pivot_idx >= 0 ? swap(data[left], data[pivot_idx]), left : -1;
};
function<bool(int, int)> impl = [&](int left, int right) {
if (left > right) {
return true;
}
if (left == right) {
return bolts[left] == nuts[left];
}
auto mid = left + (right - left) / 2;
auto r = partition(nuts, left, right, bolts[mid]);
if (r < 0) {
return false;
}
if (r != partition(bolts, left, right, nuts[r])) {
return false;
}
if (!impl(left, r - 1)) {
return false;
}
return impl(r + 1, right);
};
return impl(0, bolts.size() - 1);
};
ostream& operator<<(ostream& os, const vector<int>& data) {
for (auto d : data) {
os << d << " ";
}
return os;
}
int main() {
vector<int> bolts = { 8, 4, 3, 9, 1 };
vector<int> nuts = { 4, 9, 8, 1, 3 };
auto is_matched = sort_bolts_and_nuts(bolts, nuts);
cout << bolts << endl;
cout << nuts << endl;
assert(is_matched);
bolts = { 8, 4, 3, 9, 1 };
nuts = { 5, 9, 8, 1, 3 };
assert(!sort_bolts_and_nuts(bolts, nuts));
return 0;
}
An algorithm using BFS.
#include <iostream>
#include <vector>
#include <string>
#include <functional>
#include <utility>
#include <queue>
#include <unordered_map>
#include <algorithm>
using namespace std;
struct pos {
int x, y;
};
namespace std {
// hash function for unordered_map<pos,pos>
template <>
struct hash<pos> : private hash<long long> {
size_t operator()(const pos& val) const {
return hash<long long>::operator()(((long long)val.x << 32) | val.y);
}
};
// equality function for unordered_map<pos,pos>
bool operator==(const pos& pos1, const pos& pos2) {
return pos1.x == pos2.x && pos1.y == pos2.y;
}
bool operator!=(const pos& pos1, const pos& pos2) {
return !(pos1 == pos2);
}
ostream& operator<<(ostream& os, const pos& val) {
os << "(" << val.x << "," << val.y << ")";
return os;
}
}
auto find_shortest_path = [](int N, pos spos, pos epos) {
vector<vector<bool>> visited(N, vector<bool>(N, false));
// Here I assume horse can move up/down/left/right at each position
auto get_next_pos = [&](pos cpos) {
vector<pos> npos;
if (cpos.x-1 >= 0 && !visited[cpos.x-1][cpos.y]) {
npos.push_back({cpos.x-1, cpos.y});
}
if (cpos.x+1 < N && !visited[cpos.x+1][cpos.y]) {
npos.push_back({cpos.x+1, cpos.y});
}
if (cpos.y-1 >= 0 && !visited[cpos.x][cpos.y-1]) {
npos.push_back({cpos.x, cpos.y-1});
}
if (cpos.y+1 < N && !visited[cpos.x][cpos.y+1]) {
npos.push_back({cpos.x, cpos.y+1});
}
return npos;
};
auto get_path = [&]() {
queue<pos> q;
q.push(spos);
visited[spos.x][spos.y] = true;
// map to save path
unordered_map<pos,pos> path;
while (!q.empty()) {
auto cpos = q.front();
// arrive end position
if (cpos == epos) {
vector<pos> ret;
ret.push_back(cpos);
// path can be composed by following a path
// from end position to start position
while (cpos != spos) {
auto ppos = path[cpos];
ret.push_back(ppos);
cpos = ppos;
}
// finally reverse it
reverse(ret.begin(), ret.end());
return ret;
}
q.pop();
for (auto npos : get_next_pos(cpos)) {
q.push(npos);
visited[npos.x][npos.y] = true;
path[npos] = cpos;
}
}
return vector<pos>();
};
return get_path();
};
int main() {
for (auto pos : find_shortest_path(5, {0,0}, {4,4})) {
cout << pos << " ";
}
cout << endl;
return 0;
}
I'd like to solve the problem like this. O(N) algorithm. This algorithm is using the fact that if one edit is made, then the remaining string should be same.
#include <cstring>
#include <cassert>
using namespace std;
bool one_edit_apart(const char* s, const char* t) {
auto n = strlen(s), m = strlen(t);
m > n ? swap(n, m), swap(s, t), 0 : 0;
while (*t)
if (*s++ != *t++)
return n == m ? strcmp(s, t) == 0 : strcmp(s, t-1) == 0;
return n - m == 1; // to check a case such as "xyz" and "xy"
};
C++11 sieve of eratosthenes
auto print_primes2 = [](int n) {
if (n >= 2) {
vector<bool> non_primes(n);
auto mark_non_primes = [&](int p) {
int non_prime = 0;
for (int i = 2; (non_prime = i * p) <= n; ++i) {
non_primes[non_prime-1] = true;
}
};
for (int i = 1; i < n; ++i) {
if (!non_primes[i]) {
cout << i+1 << " ";
mark_non_primes(i+1);
}
}
cout << endl;
}
};
Another O(N) approach using hash map and bitset. While scanning the each element in each array, turn on the corresponding bit position in the hash map's value, bitset. If all bits are on, that element exist in all arrays.
auto find_commons2 = [](const vector<vector<int>>& data) {
unordered_map<int, bitset<5>> commons;
for (size_t i = 0; i < 5; ++i) {
for (size_t j = 0; j < data[i].size(); ++j) {
commons[data[i][j]].set(i);
}
}
vector<int> ret;
for (auto& p : commons) {
if (p.second.all()) {
ret.push_back(p.first);
}
}
return ret;
};
To make a class behave like a pointer, we need to overload two operators: * and ->. If the interviewer mean reference-counted smart pointer(shared_ptr in C++11), then we need to implement reference counting whenever a copy is made(copy ctor, copy assignment operator) or destroyed(dtor).
If the interviewer mean uniquely-owned smart pointer(unique_ptr in C++11), then copy is not allowed but move. There could be a move ctor and move assignment operator.
To make a class behave like a pointer, we need to overload two operators: * and ->. If the interviewer mean reference-counted smart pointer(shared_ptr in C++11), then we need to implement reference counting whenever a copy is made(copy ctor, copy assignment operator) or destroyed(dtor).
If the interviewer mean uniquely-owned smart pointer(unique_ptr in C++11), then copy is not allowed but move. There could be a move ctor and move assignment operator.
Here is the code for your 2nd solution
#pragma warning(disable:4996)
#include <iostream>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <algorithm>
#include <cassert>
using namespace std;
auto find_commons = [](const vector<vector<int>>& data) {
unordered_set<int> prev_commons{ data[0].begin(), data[0].end() };
unordered_set<int> cur_commons;
for (size_t i = 1; i < data.size(); ++i) {
for (size_t j = 0; j < data[i].size(); ++j) {
if (prev_commons.find(data[i][j]) != prev_commons.end()) {
cur_commons.insert(data[i][j]);
}
}
cur_commons.swap(prev_commons);
cur_commons.clear();
}
return vector<int>{prev_commons.begin(), prev_commons.end()};
};
int main() {
vector<vector<int>> data = {
{ 1, 2, 3, 4, 5, 6 },
{ 2, 3, 4, 5, 6, 7 },
{ 3, 4, 5, 6, 7, 8 },
{ 4, 5, 6, 7, 8, 9 },
{ 5, 6, 7, 8, 9, 9 }
};
auto r = find_commons(data);
auto ans = { 5, 6 };
assert(r.size() == 2 && equal(r.begin(), r.end(), ans.begin()));
for (auto e : r) {
cout << e << " ";
}
cout << endl;
}
C++ solution using hash map & doubly linked list of standard library.
#include <iostream>
#include <list>
#include <unordered_map>
#include <memory>
#include <cassert>
using namespace std;
class lru_cache {
struct cache_entry {
int key;
shared_ptr<string> val;
};
const unsigned max_elem = 3;
mutable list<cache_entry> cache_;
mutable unordered_map<int,list<cache_entry>::iterator> map_;
public:
shared_ptr<string> get(int key) const;
void set(int key, shared_ptr<string> val);
};
shared_ptr<string> lru_cache::get(int key) const {
auto r = map_.find(key);
if (r != map_.end()) {
// cache_.push_front(*(r->second));
// cache_.erase(r->second);
cache_.splice(cache_.begin(), cache_, r->second);
map_[key] = cache_.begin();
return cache_.begin()->val;
}
else {
return shared_ptr<string>();
}
}
void lru_cache::set(int key, shared_ptr<string> val) {
if (cache_.size() >= max_elem) {
map_.erase(cache_.back().key);
cache_.pop_back();
}
cache_.push_front({key, val});
map_[key] = cache_.begin();
}
int main() {
lru_cache cache;
cache.set(1, make_shared<string>("abcd"));
cache.set(2, make_shared<string>("bcde"));
cache.set(3, make_shared<string>("cdef"));
cache.set(4, make_shared<string>("defg"));
assert(cache.get(2) && *cache.get(2) == "bcde");
cout << *cache.get(2) << endl;
assert(cache.get(3) && *cache.get(3) == "cdef");
cout << *cache.get(3) << endl;
assert(cache.get(4) && *cache.get(4) == "defg");
cout << *cache.get(4) << endl;
assert(!cache.get(1));
assert(!cache.get(5));
cache.set(5, make_shared<string>("efgh"));
assert(!cache.get(2));
assert(cache.get(3));
assert(cache.get(4));
assert(cache.get(5));
return 0;
}
The problem is that we should be able to print out any n-dimensional array, not assuming a specific dimension. I think any n-dimensional array is expressed as just contiguous memory in memory. So we can express any n-dimensional array as follows.
// C++
struct md_arr {
vector<size_t> dim;
int* data;
};
With this representation, I can print out n-dim array as follows using suffix product.
#include <iostream>
#include <vector>
#include <functional>
#include <memory>
using namespace std;
struct md_arr {
vector<size_t> dim;
int* data;
};
auto print_md_arr = [](const md_arr& arr) {
vector<size_t> sprd(arr.dim.size());
sprd[sprd.size()-1] = 1;
for (int i = sprd.size() - 2; i >= 0; --i) {
sprd[i] = sprd[i+1] * arr.dim[i+1];
}
function<void(size_t, size_t)> impl = [&](size_t i, size_t start) {
if (i >= arr.dim.size()) {
cout << arr.data[start] << " ";
return;
}
for (size_t j = 0; j < arr.dim[i]; ++j) {
impl(i+1, start + j * sprd[i]);
}
cout << endl;
};
impl(0, 0);
};
int main() {
md_arr ma = {
vector<size_t>{2,2,2,3},
new int[24]{1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6}
};
print_md_arr(ma);
return 0;
}
If the interviewer tell me to throw an exception, I'll do the following.
#include <iostream>
#include <climits>
using namespace std;
class overflow {};
int reverse_int(int n) {
auto r = 0;
const int boundary = n > 0 ? INT_MAX / 10 : INT_MIN / 10;
const int remainder = n > 0 ? INT_MAX % 10 : INT_MIN % 10;
while (n) {
auto digit = n % 10;
if ((r > 0 && (r > boundary || r == boundary && digit > remainder)) ||
(r < 0 && (r < boundary || r == boundary && digit < remainder))) {
throw overflow();
}
r = r * 10 + digit;
n /= 10;
}
return r;
}
Yes, you're right. Here is the non-recursive version and not checking 3 times.
auto one_edit_apart = [](const string& s, const string& t) {
auto n = s.length(), m = t.length();
auto is_same_str = [&](size_t i, size_t j) {
while (i < n && j < m) {
if (s[i++] != t[j++]) {
return false;
}
}
return i >= n && j >= m ? true : false;
};
for (size_t i = 0, j = 0; i < n && j < m; ++i, ++j) {
if (s[i] != t[j]) {
if (n == m)
return is_same_str(i+1,j+1); // replace
else if (n < m)
return is_same_str(i,j+1); // insertion
else
return is_same_str(i+1,j); // deletion
}
}
return labs(n-m) == 1; // to check cases such as "xy" and "xyz" or "ab" and "abcd" or "abc" and "abc"
};
Two phase processing.
1st phase : election of a server which gathers IP addresses and # of hash buckets(which is dependent on size of each hash bucket and storage amount of each server) per each server. The elected server sends this information to every server. Or you can manually discover information about servers and distribute it to all servers. This server will take the IP address which should be exposed to clients.
2nd phase:
- The elected server hashes the top url into IP_hash_bucket_number using consistent hashing and send the retrieval request to the mapped server
- Each bot waits for retrieval request
- if a request is received and (the url is not fetched yet or the url is updated), then retrieve the url and parse the returned page and extract linked urls(in this phase, maybe external link can be excluded)
- for each url in urls in the page
- hash it into IP_hash_bucket_number using consistent hashing and send the retrieval request to the mapped server
The elected server sends heartbeat messages to keep track of active servers. If timeout occurs, the elected server starts 1st phase.
If each server does not receive a heartbeat message within timeout, it voluntarily starts 1st phase.
Any client can query for a url to the elected server and the elected server hashes the url and send redirect message to the client and client query the url again to the redirected server.
No recursion. O(N) time & O(1) space complexity. This algorithm is using the fact that if one edit is made, the remaining string should be same.
auto one_edit_apart = [](const string& s, const string& t) {
auto n = s.length(), m = t.length();
auto is_same_str = [&](size_t i, size_t j) {
while (i < n && j < m) {
if (s[i++] != t[j++]) {
return false;
}
}
return i >= n && j >= m ? true : false;
};
for (size_t i = 0, j = 0; i < n && j < m; ++i, ++j) {
if (s[i] != t[j]) {
return is_same_str(i+1,j+1) || // replace
is_same_str(i,j+1) || // insertion
is_same_str(i+1,j); // deletion
}
}
return false;
};
N=length of one string, M=length of the other string. O(max(N,M)) time complexity and O(max(N,M) space complexity algorithm. I didn't insert sum into the first position of the result string because it may cause reallocation and copying and in result, complexity may become O(max(N,M)^2). Instead, I append the sum and return the reversed string.
auto add2 = [](const string& ons1, const string& ons2) {
string ret;
int carry = 0;
auto ns1 = ons1.c_str(), ns2 = ons2.c_str();
int i = ons1.length()-1, j = ons2.length()-1;
if (ons1.length() < ons2.length()) {
swap(ns1, ns2);
swap(i,j);
}
ret.reserve(i+2); // pre-allocate space for longer string length + possible last carry to avoid reallocation
int n1 = 0, n2 = 0;
for (; i >= 0; --i, --j) {
n1 = ns1[i] - '0';
n2 = j >= 0 ? ns2[j] - '0' : 0;
ret.push_back((n1 ^ n2 ^ carry) + '0');
carry = (n1 & n2) | (n1 & carry) | (n2 & carry);
}
if (carry) {
ret.push_back(carry + '0');
}
reverse(ret.begin(), ret.end());
return ret;
};
Two more words:
1. Made ns1 the longer string to avoid additional O(max(N,M)) comparisons for i >= 0 when calculating n1 and comparisons for j >= 0 loop exit condition.
2. Computed sum and carry using only bit operations which is faster than arithmetic operations.
I'd like to divide the functionality into 4 classes:
Downloader, Session, LRUCache, CacheEntry
1. Downloader: singleton, active object which has its own thread and manages Sessions and Caches. internally opens a unix domain socket so that it can receive download request. It runs with event loop by calling select or poll API for ongoing download session fd and its domain socket to allow non-blocking operations and simultaneous multiple downloads. We can use multi-threaded downloader but typically user callback for downloads does not involve complex processing. So event-driven processing is enough and good thing with this approach is to avoid most synchronization overheads.
It keeps fd to Session map.
It should store downloaded data via LRUCache by giving LRUCache class url and downloaded data buffer and notify Sessions of timeout, downloaded data size, success, and/or failure.
It provides download(url, path) method which will run in the context of client's and do following things.
1. check whether the url already exist in cache and it does not expire by calling LRUCache.
2. if the test is passed, create session and notify download complete event and success event with cache data path
3. Otherwise, open the url and request a new cache entry and create a session. store the fd and session into the fd-to-session map. send a new download request with information about fd & url through the unix domain socket.
In the event loop, it should do following things.
call select() for all fds with the fastest timeout.
if there are readable fd sets,
- if the fd is its domain socket, process a new download request.
- otherwise, read the data and find out the mapped url and request LRUcache to store the data for the url.
if timeout event occurs,
- notify the mapped session of the timeout event.
2. LRUCache: maintains caches. Each cache entry keeps information about requested url, expiration information, cache data path.
Caches are organized as two different data structure: linked list of url and hash map of url and cache entry. This should store information persistently whenever an update is made.
If total data size is over maximum allowed data size, it should pick a LRU entry and extinct it. This process is done when a download is completed.
3. CachEntry: as mentioned above, keeps information about requested url, expiration information, cache data path
4. Session: keeps track of each download request status. It should keep requested url, requested local path, downloaded data size, progress callback, success callback, failure callback. When a download is completed, it is given a cached data path and should copy it into requested data path.
It should call user's callback as per events like timeout, process, download complete, download failure. We may just return cache data path but in that case, it become much harder to manage cache consistency between real cache data and cache metadata.
One more word.
If we want to support operations like suspend(), resume(), stop() on Session, then multi-thread downloader is more convenient to implement and Session can interact with LRUCache directly. In that case, LRUCache should be implemented in a thread-safe way.
O(N) time complexity & O(1) space complexity
auto remove_zeros = [](vector<int>& data) {
int next = 0; // next points the index to insert the next non-zero element
for (int i = 0; i < data.size(); ++i) {
if (data[i] != 0) {
data[next++] = data[i];
}
}
return next;
};
Note that this algorithm is using the fact that if one edit is made, the remaining string should be same.
#include <cstring>
#include <cassert>
using namespace std;
bool one_edit_apart(const char* s, const char* t) {
auto n = strlen(s), m = strlen(t);
m > n ? swap(n, m), swap(s, t), 0 : 0;
while (*t)
if (*s++ != *t++)
return n == m ? strcmp(s, t) == 0 : strcmp(s, t-1) == 0;
return n - m == 1; // to check a case such as "xyz" and "xy"
};
int main() {
assert(one_edit_apart("xyz", "xz"));
assert(one_edit_apart("xz", "xyz"));
assert(one_edit_apart("xyz", "xaz"));
assert(!one_edit_apart("abc", "abc"));
assert(!one_edit_apart("abc", "acb"));
assert(!one_edit_apart("ab", "abcd"));
return 0;
}
O(N) time complexity and O(1) space complexity algorithm using dynamic programming
auto W = [](const string& S) {
const auto n = S.length();
int N[] = { 1, 0, 0 };
for (int i = 0; i < n; ++i) {
N[(i+1)%3] = '1' <= S[i] && S[i] <= '9' ? N[i%3] : 0;
if (i >= 1) {
N[(i+1)%3] += (S[i-1]-'0') * 10 + (S[i]-'0') <= 26 ? N[(i-1)%3] : 0;
}
};
return N[n%3];
};
O(N) solution using a hash map
#include <iostream>
#include <cassert>
#include <functional>
#include <unordered_map>
using namespace std;
struct node {
int v;
node* n;
node* r;
node(int v, node* n = nullptr, node* r = nullptr) : v(v), n(n), r(r) {}
};
auto clone_random_list = [](const node* list) {
unordered_map<const node*,node*> cloned;
function<node*(const node*)> clone_list = [&](const node* n) {
return n ? cloned[n] = new node(n->v, clone_list(n->n)) : nullptr;
};
auto ret = clone_list(list);
function<void(const node*)> set_rand = [&](const node* n) {
n ? cloned[n]->r = cloned[n->r], set_rand(n->n), 0 : 0;
};
set_rand(list);
return ret;
};
auto is_same_list = [](const node* a, const node* b) {
function<bool(const node*, const node*)> impl = [&](const node* a, const node* b) {
return a && b ? a->v == b->v && a->r->v == b->r->v && impl(a->n, b->n) :
!a && !b ? true : false;
};
return impl(a,b);
};
ostream& operator<<(ostream& os, const node* list) {
while (list) {
os << list->v << "->" << list->r->v << " ";
list = list->n;
}
return os;
}
int main() {
auto n3 = new node(3);
auto n2 = new node(2, n3);
auto n1 = new node(1, n2, n3);
n2->r = n1;
n3->r = n2;
cout << n1 << endl;
auto c = clone_random_list(n1);
cout << c << endl;
assert(is_same_list(n1, c));
return 0;
}
Another solution using recursion & stack. O(n*m) time complexity and O(n+m) space complexity
#include <iostream>
#include <functional>
#include <stack>
using namespace std;
struct node {
int v;
node* n;
node(int v, node* n = nullptr) : v(v), n(n) {}
};
auto multiply = [](node* n1, node* n2) {
function<int(node*,int,node*&)> inner_impl = [&](node* n1, int v, node*& o) {
if (!n1) {
return 0;
}
if (o) {
auto res = o->v + n1->v * v + inner_impl(n1->n, v, o->n);
o->v = res % 10;
return res / 10;
}
else {
node* next = nullptr;
auto res = n1->v * v + inner_impl(n1->n, v, next);
o = new node(res % 10, next);
return res / 10;
}
};
stack<int> s;
auto n2cur = n2;
while(n2cur) {
s.push(n2cur->v);
n2cur = n2cur->n;
}
auto out = new node(0, nullptr);
while(!s.empty()) {
out->v = inner_impl(n1, s.top(), out->n);
s.pop();
out = new node(0, out);
}
return out;
};
I came up with another solution based on the similar idea to the above.
Let's assume that each server provides the following operation.
* get_median(lb, ub) for which the server returns the median among values [lb, ub], not among all values which it has. If there are no values in the range, return NoValues exception
1. initializes lb = min of possible values and ub = max of possible values such as Long.MIN or Long.MAX
2. requests every server get_median(lb, ub) and ignore NoValues exception and gather response -> { mi | i = 0 ... 9999 }
3. new_lb = min(mi), new_ub = max(mi)
4. if new_lb == new_ub, then new_lb is median
or if new_lb == lb and new_ub == ub, then (new_lb + new_ub) * 0.5 is median.
otherwise, lb = new_lb, ub = new_ub, repeat 2 ~ 4
I think probabilistically it will lose over billion numbers(N) at each iteration. # of messages would be bounded by O(X^2) where X is number of servers if we ignore processing time inside each server which is reasonable because we are talking about performance of whole distributed system, not a single machine. In worst case, at each iteration it will lose N and at each iteration it needs X response messages and X iterations are required to lose all numbers.
If each server maintains the sorted list, get_median operation can be done in O(log N) time on it because upper_bound and lower_bound algorithm can run in O(log N) and if indices of upper_bound and lower_bound are found, then it's trivial to find the median in the range.
I'd like to solve this problem like the following.
Thread A: will manage two data structures for set A
- hash_map with key = number and value = true if A intersection B
- hash_set for A - B
similarly
Thread B: will manage two data structures for set B
- hash_map with key = number and value = true if A intersection B
- hash_set for B - A
Thread C: will manage two data structures for A U B and A intersection B
- A U B: hash_map with key = number and value = bit fields(true if number contained in A, true if number contained in B)
- A intersection B: hash_set
If a number N is about to come to A, the producer notifies Thread A and Thread C
Thread A:
- updates hash_map with N
- waits for Thread C is done with N
Thread C:
- updates hash_map with N
- if N is contained in A and B, update hash_set for A intersection B and notifies Thread A with updated N, otherwise, just notifies Thread A
Thread A:
- wakes up
- if A intersection B is updated with N, it updates its hash_set. otherwise, just finish.
Similar procedure can be applied to a new number to set B.
What's your algorithm's complexity? Other solutions are trying to do it in O(N), where N is the length of the original string. But yours is O(N^2). I think this is the interviewer's point.
- anonymous December 18, 2014