losmi
BAN USERHere is a working C++11 solution with some tests. I didn't cover any special cases apart form the query not being found at all. I assume that all words in query are different.
The solution is based on the sliding window approach and is related to Rabin-Karp algorithm, with the difference of using a window that varies in size.
I first reduced the original text by removing all words that do not appear in the query. This step is not necessary, but in reduces the runtime (we can assume that it will have way more unique words than the query) and makes the code somewhat nicer with the cost of saving two additional arrays.
After that I go through all possible window sizes and for each size I initially create a hash map containing the number of occurrences of each word. After that in order to recalculate the hash map for the next window I simply have to remove the "oldest" element and add a new one, which is the idea from Rabin-Karp and brings the biggest speedup. As the hash map contains only element which occur in current window it is sufficient to simply compare its size with size of the query to determine if the current window contains all words from the query.
Let's say that:
L - total number of words in original text
Q - number of words in query
R - number of words in original text after removing all the words that do not appear in the query
W - longest word in original text (for calculating hash)
V - Longest word in query (for calculating hash)
Time complexity:
Preparing for shortening: O(Q*V)
Shortening text: O(L*W)
Calculating: O(R^2 * V)
Space complexity:
Dictionary: O(Q*V)
Shortened text: O(R*V) (not mandatory)
Hash: O(Q*V)
#include <iostream>
#include <string>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <utility>
#include <limits>
std::pair<int, int> smallest_window(const std::vector<std::string>& original, const std::vector<std::string>& query) {
std::unordered_set<std::string> query_check; // could've also been a trie
for(const std::string& word : query) query_check.insert(word);
// reduce the long text by removing all words which do not exist in the query
std::vector<std::string> reduced;
std::vector<int> red_orig;
for(int i = 0; i < original.size(); ++i) {
if(query_check.find(original[i]) != query_check.end()) {
reduced.push_back(original[i]);
red_orig.push_back(i);
}
}
std::pair<int, int> res = {-1, -1};
int min_window = std::numeric_limits<int>::max();
for(int window_size = query.size(); window_size <= reduced.size(); ++window_size) {
std::unordered_map<std::string, int> content;
// fill up the initial content
for(int i = 0; i < window_size; ++i) {
if(content.find(reduced[i]) == content.end()) content[reduced[i]] = 1;
else ++content[reduced[i]];
}
// now move the window until a match is found
int window_start = 0;
while (window_start <= reduced.size() - window_size) {
if(content.size() == query.size()) { // found a window!
if(red_orig[window_start + window_size - 1] - red_orig[window_start] < min_window) {
res = {red_orig[window_start], red_orig[window_start + window_size - 1]};
min_window = red_orig[window_start + window_size - 1] - red_orig[window_start];
}
} else { // move window
--content[reduced[window_start]]; // removing word furthest to the left
if(0 == content[reduced[window_start]]) content.erase(reduced[window_start]);
if(content.find(reduced[window_start + window_size]) == content.end()) { // adding new word
content[reduced[window_start + window_size]] = 1;
} else {
++content[reduced[window_start + window_size]];
}
}
++window_start;
}
}
return res;
}
int main() {
{
std::vector<std::string> original = {"aa", "ab", "ac", "ad", "ae", "ba", "bb", "bc", "bd", "be", "ca", "cb", "cc", "cd", "ce", "da", "db", "dc", "dd", "de"};
std::vector<std::string> query = {"db", "ae", "bc"};
std::pair<int, int> res = smallest_window(original, query);
std::cout << "Expected: 4 (\"ae\") : 16 (\"db\")" << std::endl;
std::cout << "Result : " << res.first << " (\"" << original[res.first] << "\") : " << res.second << " (\"" << original[res.second] << "\")" << std::endl;
}
{
std::vector<std::string> original = {"aa", "ab", "ac", "ad", "aa"};
std::vector<std::string> query = {"aa", "ad"};
std::pair<int, int> res = smallest_window(original, query);
std::cout << "Expected: 3 (\"ad\") : 4 (\"aa\")" << std::endl;
std::cout << "Result : " << res.first << " (\"" << original[res.first] << "\") : " << res.second << " (\"" << original[res.second] << "\")" << std::endl;
}
{
std::vector<std::string> original = {"aa", "ab", "ac", "ad", "aa"};
std::vector<std::string> query = {"ac"};
std::pair<int, int> res = smallest_window(original, query);
std::cout << "Expected: 2 (\"ac\") : 2 (\"ac\")" << std::endl;
std::cout << "Result : " << res.first << " (\"" << original[res.first] << "\") : " << res.second << " (\"" << original[res.second] << "\")" << std::endl;
}
{
std::vector<std::string> original = {"aa", "ab", "ac", "ad", "aa"};
std::vector<std::string> query = {"aa", "af"};
std::pair<int, int> res = smallest_window(original, query);
std::cout << "Expected: no pair found!" << std::endl;
std::cout << "Result : ";
if(-1 == res.first) std::cout << "no pair found!" << std::endl;
else std::cout << "pair found" << std::endl;
}
return 0;
}
Here is a regular Weighted Digraph. It shouldn't be that hard to modify it so that it solves this problem.
#include <iostream>
#include <vector>
#include <stack>
#include <algorithm>
class WeightedEdge {
public:
WeightedEdge(int a, int b, int weight)
: a_(a), b_(b), weight_(weight)
{}
int either() const {
return a_;
}
int other(int some) const {
if(a_ == some) return b_;
else if(b_ == some) return a_;
else return -1;
}
int weight() const {
return weight_;
}
bool operator==(const WeightedEdge& other) const {
return (a_ == other.a_ && b_ == other.b_ && weight_ == other.weight_)
|| (a_ == other.b_ && b_ == other.a_ && weight_ == other.weight_);
}
bool operator<(const WeightedEdge& other) const {
return weight_ < other.weight_;
}
bool operator>(const WeightedEdge& other) const {
return weight_ > other.weight_;
}
WeightedEdge& operator=(const WeightedEdge& other) {
a_ = other.a_;
b_ = other.b_;
weight_ = other.weight_;
return *this;
}
private:
int a_;
int b_;
int weight_;
};
class EdgeWeightedGraph {
public:
EdgeWeightedGraph(int size)
: adjacency_(size)
{}
void add_edge(int a, int b, int weight) {
if(a < adjacency_.size() && b < adjacency_.size() && std::find(adjacency_[a].begin(), adjacency_[a].end(), WeightedEdge(a, b, weight)) == adjacency_[a].end()) {
adjacency_[a].emplace_back(a, b, weight);
adjacency_[b].emplace_back(a, b, weight);
}
}
const std::vector<WeightedEdge>& adjacent(int a) const {
return adjacency_[a];
}
void print() const {
for(std::size_t i = 0; i < adjacency_.size(); ++i)
for(const auto& e : adjacency_[i])
std::cout << i << " " << e.other(i) << " " << e.weight() << std::endl;
}
std::vector<WeightedEdge> edges() const {
std::vector<WeightedEdge> res;
for(std::size_t i = 0; i < adjacency_.size(); ++i) {
for(const WeightedEdge& e : adjacency_[i])
if(i < e.other(i)) res.push_back(e);
}
return res;
}
std::vector<int> dfs(int start) const {
std::vector<bool> visited(adjacency_.size(), false);
std::stack<int> nodes_stack;
std::vector<int> res;
nodes_stack.push(start);
while(!nodes_stack.empty()) {
int curr = nodes_stack.top();
nodes_stack.pop();
if(!visited[curr]) {
res.push_back(curr);
visited[curr] = true;
for(const auto& e : adjacent(curr)) {
if(!visited[e.other(curr)])
nodes_stack.push(e.other(curr));
}
}
}
return res;
}
std::size_t size() const {
return adjacency_.size();
}
private:
std::vector<std::vector<WeightedEdge>> adjacency_;
};
int main() {
EdgeWeightedGraph graph(6);
graph.add_edge(0, 1, 1);
graph.add_edge(0, 2, 2);
graph.add_edge(0, 5, 3);
graph.add_edge(2, 4, 4);
graph.add_edge(3, 4, 5);
graph.add_edge(3, 5, 6);
graph.add_edge(4, 5, 7);
// graph.print();
// for(int i : graph.dfs(0))
// std::cout << i << std::endl;
for(const WeightedEdge& e : graph.edges())
std::cout << e.either() << " " << e.other(e.either()) << " " << e.weight() << std::endl;
return 0;
}
Assuming that there cannot be two neighboring cliffs of the same height, as those shall be treated as the same cliff.
Model the island as a directed graph where each cliff is a node with edges pointing to neighboring cliffs of lower height. Now use DFS from the starting cliff to find all cliffs which are surrounded only by higher cliffs. Remember which cliffs you have already visited in order to save checking the same path after some "fork-merge". After each fork determine which path went to the lower cliff. At the end return the lowest cliff.
This problem is known as Collatz conjecture (check Wikipedia). Brute-force method was proposed by Jerry. Such solution would waste a lot of time because it would recalculate for many number. For each number there is only one way to reach 1, so no need to recalculate. Solution for this problem - dynamic programming, i.e. caching!
We cannot use an array/vector because the numbers do not always necessarily decrease (consider 9999) and thus we cannot know how large it should be and even if we resize we would waste lots of space. Solution for this - hash map!
Here is a working C++11 solution for an arbitrary number of elements. It is hard to define complexity because it depends on how the sequence goes.
#include <iostream>
#include <unordered_map>
int collatz(std::unordered_map<int, int>* cache_ptr, int n) {
std::unordered_map<int, int>& cache = *cache_ptr;
const auto& calculated = cache.find(n);
if(calculated != cache.end()) // value already calculated (including 1), return the value
return calculated->second;
if(n%2) { // odd
cache[n] = collatz(cache_ptr, 3*n+1) + 1;
} else { // even
cache[n] = collatz(cache_ptr, n/2) + 1;
}
return cache[n];
}
int collatz(int max) {
std::unordered_map<int, int> cache;
int longest_path = 0;
int longest_start = 1;
cache.insert({1, 0});
for(int i = 2; i < max; ++i) {
int collatz_path = collatz(&cache, i);
if(collatz_path > longest_path) {
longest_path = collatz_path;
longest_start = i;
}
}
return longest_start;
}
int main() {
std::cout << collatz(10000) << std::endl;
return 0;
}
A working C++11 O(1) in time and O(N) in space solution with test program. I assumed that we work with unsigned numbers, but implementing negative ones would not be a problem wither.
#include <iostream>
#include <vector>
#include <random>
#include <algorithm>
class ValueGenerator{
public:
ValueGenerator() : vals_(4500), left_(4500), rd_(), rg_(rd_()) {
for(int i = 0; i < vals_.size(); ++i)
vals_[i] = 1000 + 2*i;
}
int generate() {
if(0 == left_) {
return 0;
} else {
int index = std::uniform_int_distribution<>{0, left_ - 1}(rg_);
std::swap(vals_[index], vals_[left_-1]);
--left_;
return vals_[left_];
}
}
private:
std::vector<int> vals_;
int left_;
std::random_device rd_;
std::mt19937 rg_;
};
int main() {
ValueGenerator vg1;
// generate some number to see if they are random
for(int i = 0; i < 10; ++i)
std::cout << vg1.generate() << std::endl;
// check if no number was repeated
ValueGenerator vg2;
std::vector<bool> check(10000, false);
for(int i = 0; i < 1000; ++i) check[i] = true;
for(int i = 1001; i < 10000; i += 2) check[i] = true;
for(int i = 0; i < 4500; ++i) {
int val = vg2.generate();
if(check[val]) {
std::cout << "Problem with: " << val << std::endl;
i = 4500;
}
check[val] = true;
}
//check if all values are now set to true
for(int i = 0; i < 10000; ++i)
if(!check[i]) std::cout << "Number " << i << "was not set" << std::endl;
return 0;
}
A working C++11 O(1) in time and O(N) in space solution with test program. I assumed that we work with unsigned numbers, but implementing negative ones would not be a problem wither.
#include <iostream>
#include <vector>
#include <random>
#include <algorithm>
class ValueGenerator{
public:
ValueGenerator() : vals_(4500), left_(4500), rd_(), rg_(rd_()) {
for(int i = 0; i < vals_.size(); ++i)
vals_[i] = 1000 + 2*i;
}
int generate() {
if(0 == left_) {
return 0;
} else {
int index = std::uniform_int_distribution<>{0, left_ - 1}(rg_);
std::swap(vals_[index], vals_[left_-1]);
--left_;
return vals_[left_];
}
}
private:
std::vector<int> vals_;
int left_;
std::random_device rd_;
std::mt19937 rg_;
};
int main() {
ValueGenerator vg1;
// generate some number to see if they are random
for(int i = 0; i < 10; ++i)
std::cout << vg1.generate() << std::endl;
// check if no number was repeated
ValueGenerator vg2;
std::vector<bool> check(10000, false);
for(int i = 0; i < 1000; ++i) check[i] = true;
for(int i = 1001; i < 10000; i += 2) check[i] = true;
for(int i = 0; i < 4500; ++i) {
int val = vg2.generate();
if(check[val]) {
std::cout << "Problem with: " << val << std::endl;
i = 4500;
}
check[val] = true;
}
//check if all values are now set to true
for(int i = 0; i < 10000; ++i)
if(!check[i]) std::cout << "Number " << i << "was not set" << std::endl;
return 0;
}
Here is a working dynamic programming iterative solution in C++11.
The solution is O(ranges*data_size) because it does not fully recalculate all following maximums in the row in a loop, but instead just compares value starting at that element with maximum of the previous element. Otherwise the solution would be O(ranges*data_size*data_size). Size complexity is also O(ranges*data_size).
I have not implemented any protection against invalid inputs.
#include <iostream>
#include <vector>
#include <algorithm>
#include <utility>
#include <numeric>
std::pair<int, std::vector<std::pair<int, int>>> max_ranges(const std::vector<int>& data, int ranges, int range_size) {
std::vector<std::vector<int>> cache(ranges + 1, std::vector<int>(data.size()));
std::vector<std::vector<int>> came_from(ranges + 1, std::vector<int>(data.size(), -1));
std::vector<std::vector<int>> starting_from(ranges + 1, std::vector<int>(data.size(), -1));
int last_valid = data.size() - range_size;
std::vector<int> range_at(last_valid + 1);
range_at[0] = std::accumulate(data.begin(), data.begin() + range_size, 0);
for(int i = 1; i < range_at.size(); ++i)
range_at[i] = range_at[i - 1] - data[i - 1] + data[i + range_size - 1];
cache[1][last_valid] = range_at[last_valid];
starting_from[1][last_valid] = last_valid;
for(int i = last_valid - 1; i >= 0; --i) {
if(range_at[i] >= cache[1][i+1]) {
cache[1][i] = range_at[i];
starting_from[1][i] = i;
} else {
cache[1][i] = cache[1][i+1];
starting_from[1][i] = starting_from[1][i+1];
}
}
for(int range = 2; range <= ranges; ++range) {
last_valid -= range_size;
cache[range][last_valid] = cache[range-1][last_valid + range_size] + range_at[last_valid];
came_from[range][last_valid] = last_valid + range_size;
starting_from[range][last_valid] = last_valid;
}
last_valid = data.size() - range_size;
for(int range = 2; range <= ranges; ++range) {
last_valid -= range_size;
for(int start = last_valid - 1; start >= 0; --start) {
int new_val = range_at[start] + cache[range-1][start + range_size];
if(new_val >= cache[range][start+1]) {
cache[range][start] = new_val;
came_from[range][start] = starting_from[range-1][start + range_size];
starting_from[range][start] = start;
} else {
cache[range][start] = cache[range][start+1];
came_from[range][start] = came_from[range][start+1];
starting_from[range][start] = starting_from[range][start+1];
}
}
}
std::vector<std::pair<int, int>> ranges_res;
for(int range = ranges, cf = 0; range >= 1; --range) {
ranges_res.emplace_back(starting_from[range][cf], starting_from[range][cf] + range_size - 1);
cf = came_from[range][cf];
}
return {cache[ranges][0], std::move(ranges_res)};
}
int main() {
std::vector<int> data = {1, 2, 1, 2, 6, 7, 5, 1};
auto res = max_ranges(data, 3, 2);
std::cout << "Max sum: " << res.first << std::endl;
for(const auto& pr : res.second)
std::cout << pr.first << ", " << pr.second << std::endl;
return 0;
}
Killedsteel's solution is a good starting point, but I think that we have to start the search from every node because it could happen that for example we randomly select a node which has no leaving edges.
Below I present a solution which looks for all intersection pairs with only one path between them because that way it is easier to check the solution, but you could also stop the search as soon as at least one pair is found.
It is not specified, but I supposed that going through a cycle several times does not count as separate paths, thus there is on_stack check (as well as for the sake of not entering an infinite loop).
There is a path counter for each node. As soon as counter reaches two there is no need to consider that node anymore. We could omit that check and get all paths, but that would worsen the worst case performance, especially in dense graphs. When increasing counter from one to two it is necessary to continue with the DFS in order to update counters of the children.
For each starting node we visit edges leaving a certain node at most twice, which leads to the time complexity of O(VE). Space complexity is O(V), if we exclude the resulting vector.
Here is a working C++11 solution with two samples. I could have moved the check to parent's DFS call and thus spare additional functions call, but I find it more readable this way.
#include <iostream>
#include <vector>
#include <memory>
#include <utility>
#include <algorithm>
class Intersection {
public:
Intersection(int id) : id_(id) {}
const std::vector<std::weak_ptr<Intersection>>& get_neighbours() const {
return neighbours_;
}
void add_neighbour(std::weak_ptr<Intersection> neighbour) {
neighbours_.push_back(neighbour);
}
int get_id() const {
return id_;
}
private:
int id_;
std::vector<std::weak_ptr<Intersection>> neighbours_;
};
class CityMap {
public:
std::shared_ptr<Intersection> get_intersection(int id) {
return intersections_[id];
}
const std::shared_ptr<Intersection> get_intersection(int id) const {
return intersections_[id];
}
void add_intersection(std::shared_ptr<Intersection> intersection) {
intersections_.push_back(intersection);
}
std::size_t size() const {
return intersections_.size();
}
private:
std::vector<std::shared_ptr<Intersection>> intersections_;
};
void extract_pairs(const std::vector<int>& ways, int from, std::vector<std::pair<int, int>>* res_ptr) {
std::vector<std::pair<int, int>>& res = *res_ptr;
for(int to = 0; to < ways.size(); ++to) {
if(1 == ways[to]) res.emplace_back(from, to);
}
}
void dfs(const Intersection& curr, std::vector<int>* ways_ptr, std::vector<bool>* on_stack_ptr) {
int curr_id = curr.get_id();
std::vector<int>& ways = *ways_ptr;
std::vector<bool>& on_stack = *on_stack_ptr;
if(ways[curr_id] >= 2 || on_stack[curr_id]) return;
on_stack[curr_id] = true;
//ways[curr_id] += ways[parent];
++ways[curr_id];
for(const std::weak_ptr<Intersection> inters_ptr : curr.get_neighbours()) {
dfs(*(inters_ptr.lock()), ways_ptr, on_stack_ptr);
}
on_stack[curr_id] = false;
}
std::vector<std::pair<int, int>> find_single_ways(const CityMap& cm) {
std::vector<std::pair<int, int>> res;
std::vector<int> ways(cm.size());
std::vector<bool> on_stack(cm.size());
for(int i = 0; i < cm.size(); ++i ) {
std::fill(std::begin(ways), std::end(ways), 0);
std::fill(std::begin(on_stack), std::end(on_stack), false);
on_stack[i] = true;
for(const std::weak_ptr<Intersection> inters_ptr : cm.get_intersection(i)->get_neighbours())
dfs(*(inters_ptr.lock()), &ways, &on_stack);
extract_pairs(ways, i, &res);
}
return std::move(res);
}
int main() {
CityMap cm;
cm.add_intersection(std::make_shared<Intersection>(0));
cm.add_intersection(std::make_shared<Intersection>(1));
cm.add_intersection(std::make_shared<Intersection>(2));
cm.add_intersection(std::make_shared<Intersection>(3));
cm.add_intersection(std::make_shared<Intersection>(4));
cm.add_intersection(std::make_shared<Intersection>(5));
cm.add_intersection(std::make_shared<Intersection>(6));
cm.get_intersection(0)->add_neighbour(cm.get_intersection(1));
cm.get_intersection(0)->add_neighbour(cm.get_intersection(2));
cm.get_intersection(1)->add_neighbour(cm.get_intersection(3));
cm.get_intersection(2)->add_neighbour(cm.get_intersection(1));
cm.get_intersection(3)->add_neighbour(cm.get_intersection(2));
cm.get_intersection(3)->add_neighbour(cm.get_intersection(4));
cm.get_intersection(4)->add_neighbour(cm.get_intersection(5));
cm.get_intersection(4)->add_neighbour(cm.get_intersection(6));
cm.get_intersection(6)->add_neighbour(cm.get_intersection(5));
/*cm.get_intersection(0)->add_neighbour(cm.get_intersection(1));
cm.get_intersection(0)->add_neighbour(cm.get_intersection(4));
cm.get_intersection(0)->add_neighbour(cm.get_intersection(6));
cm.get_intersection(1)->add_neighbour(cm.get_intersection(2));
cm.get_intersection(2)->add_neighbour(cm.get_intersection(3));
cm.get_intersection(2)->add_neighbour(cm.get_intersection(5));
cm.get_intersection(4)->add_neighbour(cm.get_intersection(2));
cm.get_intersection(5)->add_neighbour(cm.get_intersection(4));
cm.get_intersection(5)->add_neighbour(cm.get_intersection(6));*/
auto res = find_single_ways(cm);
for(auto r : res)
std::cout << "From " << r.first << " to " << r.second << std::endl;
return 0;
}
And one note. If I haven't used the input shrinking I could stop the search as soon as I find one proper window because the window size increases so the first one would also be the smallest one. So it's is really about analyzing the input data and discussing it.
- losmi May 03, 2017