popescu.af
BAN USEROK, corrected it, thanks.
- popescu.af January 24, 2015I think this is the only way to do it in constant space.
- popescu.af January 23, 2015You cannot reverse the linked list, since this implies that you either alter the list, or you use variable extra space O(N), both not allowed.
Assuming that reversing in place were allowed, this would take O(N^2) time, not O(N), since the list is single-linked.
A possible solution (remove() is linear in worst case):
template <typename key_type, typename value_type, uint32_t size_value>
class CustomLruCache
{
public:
typedef std::function<uint32_t(const time_t &, const uint32_t &)> tKeyFunction;
public:
CustomLruCache(const tKeyFunction &f)
: mF(f)
, mData()
, mPriorities()
, mPriorityMap()
{ }
value_type & operator[] (const key_type &key)
{
return get(key);
}
const value_type & operator[] (const key_type &key) const
{
return get(key);
}
void insert(const key_type &key, const value_type &value, const uint32_t cost)
{
if (mData.size() == size_value)
remove();
uint32_t prio = mF(time(nullptr), cost);
mData[key] = Element(value, cost, prio);
mPriorities.push(prio);
mPriorityMap[prio].insert(key);
}
private:
struct Element
{
value_type value;
uint32_t cost;
uint32_t priority;
Element()
: value()
, cost(0)
, priority(0)
{ }
Element(const value_type &value, const uint32_t cost, const uint32_t priority)
: value(value)
, cost(cost)
, priority(priority)
{ }
};
private:
value_type & get(const key_type &key)
{
auto& el = mData[key];
mPriorityMap[el.priority].erase(key);
if (mPriorityMap[el.priority].empty())
mPriorityMap.erase(el.priority);
el.priority = mF(time(nullptr), el.cost);
mPriorityMap[el.priority].insert(key);
return el.value;
}
void remove()
{
while (!mPriorities.empty())
{
uint32_t lowestPrio = mPriorities.top();
auto& keysAtLowestPrio = mPriorityMap[lowestPrio];
if (keysAtLowestPrio.empty())
{
mPriorityMap.erase(lowestPrio);
mPriorities.pop();
continue;
}
auto keyIt = keysAtLowestPrio.begin();
mData.erase(*keyIt);
keysAtLowestPrio.erase(keyIt);
if (keysAtLowestPrio.empty())
{
mPriorityMap.erase(lowestPrio);
mPriorities.pop();
}
return;
}
}
private:
tKeyFunction mF;
std::unordered_map<key_type, Element> mData;
std::priority_queue<uint32_t, std::vector<uint32_t>, std::greater<uint32_t>> mPriorities;
std::unordered_map<uint32_t, std::unordered_set<key_type>> mPriorityMap;
};
You can do Mode in O(1), if you use two hash tables, one from value to count and one from count to values. See my solution below.
- popescu.af January 23, 2015As the previous comment says, timestamps should be used instead of elapsed time.
The timestamp function is strictly increasing, thus a Min Priority Queue is enough to tell us
how to remove elements in O(1).
We also need a hashtable, for storing the data and retrieving it from the outside.
Here is my solution, in C++11. Unfortunately, I could not find something better than O(N^2) yet.
std::string findMissingNumber(const std::string &str)
{
auto addOne = [](const std::string &number) -> std::string
{
std::string copy = number;
bool carry = true;
for (auto it = copy.rbegin(); it != copy.rend(); ++it)
{
if (*it == '9')
*it = '0';
else
{
++(*it);
carry = false;
break;
}
}
if (carry)
copy.insert(copy.begin(), '1');
return copy;
};
auto findMissingNumber = [&addOne, &str](const size_t &nbDigits) -> std::string
{
std::string missingNumber = "";
std::string readNumber = str.substr(0, nbDigits);
for (size_t i = nbDigits; i < str.length(); )
{
std::string expectedNumber = addOne(readNumber);
if (i + expectedNumber.length() > str.length())
return "";
readNumber = str.substr(i, expectedNumber.length());
if (readNumber != expectedNumber)
{
if (missingNumber != "")
return "";
std::string nextExpectedNumber = addOne(expectedNumber); // skip one
if (i + nextExpectedNumber.length() > str.length())
return "";
readNumber = str.substr(i, nextExpectedNumber.length());
if (readNumber != nextExpectedNumber)
return "";
missingNumber = expectedNumber;
}
i += readNumber.length();
}
return missingNumber;
};
size_t nbDigits = str.length() / 2;
while (nbDigits > 0)
{
std::string missingNumber = findMissingNumber(nbDigits);
if (missingNumber != "")
return missingNumber;
--nbDigits;
}
return "";
}
Hi,
This seems to be the complete answer, in my opinion.
By using hashing to split the original input into chunks, it is guaranteed that all the duplicates fit into the same chunk, thus any URL that is unique within its chunk is also globally unique.
template<typename value_type>
class DS
{
std::list<value_type> mData;
std::unordered_map<value_type, size_t> mValueToCountMap;
std::unordered_map<size_t, std::unordered_set<value_type>> mCountToValuesMap;
size_t mBestCount;
public:
DS()
: mData()
, mValueToCountMap()
, mCountToValuesMap()
, mBestCount(0)
{ }
void insert(const value_type &value)
{
mData.push_back(value);
if (mValueToCountMap.find(value) == mValueToCountMap.end())
{
mValueToCountMap[value] = 1;
mCountToValuesMap[1].insert(value);
if (mBestCount == 0)
mBestCount = 1;
}
else
{
auto& count = mValueToCountMap[value];
mCountToValuesMap[count].erase(value);
if (mCountToValuesMap[count].empty())
mCountToValuesMap.erase(count);
++count;
mCountToValuesMap[count].insert(value);
if (count > mBestCount)
mBestCount = count;
}
}
void erase()
{
if (mData.empty())
return;
auto& value = mData.front();
auto& count = mValueToCountMap[value];
mCountToValuesMap[count].erase(value);
if (count == mBestCount && mCountToValuesMap[count].empty())
{
mCountToValuesMap.erase(count);
--mBestCount;
}
if (--count != 0)
mCountToValuesMap[count].insert(value);
else
mValueToCountMap.erase(value);
mData.pop_front();
}
const std::unordered_set<value_type> & mode() const
{
auto foundIt = mCountToValuesMap.find(mBestCount);
return foundIt->second;
}
};
Hello,
You can use a hash table (unordered_map) for unique URLs found so far and another hash table (unordered_set) for those which were repeated.
The table for unique URLs would translate from URL value to its index in the file.
Then you would need to do a run on the whole data set to fill the two tables and another run on the unique URL table to extract the entry that has the smallest index.
Space complexity would be O(N), each entry having (AVG URL length + sizeof(int)) bytes.
Time complexity would be 2 * O(N).
Distributed approach: by using a distributed storage, you don't have to hold the whole file into memory, nor the entire hash tables.
Of course, the IO / Network overhead would slow down the process considerably.
Actually, you can reverse the list in O(N) time, I apologize.
- popescu.af January 24, 2015