Amazon Interview Question
Country: United States
Interview Type: Phone Interview
@ChrisK, you forgot to add the message to the msgQueue, other than that I think you nailed it.
Here's the Java code for the same solution.
public static void checkFast(Message[] messages) {
Map<Long, Message> msgByUserId = new HashMap<>();
Queue<Message> pq = new LinkedList<>();
for ( Message m : messages ) {
Message found = msgByUserId.get(m.getUserId());
if ( found != null && found.getTimestamp() >= (m.getTimestamp()-1000) ) {
too_fast(found, m);
}
while ( !pq.isEmpty() && pq.peek().getTimestamp() < (m.getTimestamp()-1000) ) {
Message popped = pq.poll();
/* make sure we don't remove from the map a message with same userId but different timestamp */
if ( msgByUserId.get(popped.getUserId()).equals(popped) ) {
msgByUserId.remove(popped.getUserId());
}
}
pq.offer(m);
msgByUserId.put(m.getUserId(), m);
}
}
#include <unordered_map>
#include <deque>
#include <thread>
#include <chrono>
struct Message
{
uint64_t timestamp;
int user_id, container_id, item_id;
bool success;
};
typedef std::shared_ptr<Message> MessagePtr;
typedef std::deque< MessagePtr > MessageQue_t;
typedef std::unordered_map<int, MessagePtr> UserHash_t;
typedef std::unordered_map<int, MessagePtr>::iterator UserHashIter_t;
MessageQue_t MessageQue;
uint64_t milliseconds_now();
void too_fast(MessagePtr oldMsg, MessagePtr newMsg) {}
/*
* Scan queue, assuming that messages are appended externally
*/
void FindShortIntervals()
{
using namespace std::chrono_literals;
while (true)
{
UserHash_t userMessages;
uint64_t ts = milliseconds_now() - 1000;
// scan queueu tail backwards
for (MessageQue_t::reverse_iterator iter(MessageQue.rbegin()); iter != MessageQue.rend(); ++iter)
{
// scan for messages added during last second only,
// considering prev messages already reported
if ((*iter)->timestamp < ts)
break;
UserHashIter_t userIt = userMessages.find((*iter)->user_id);
if (userIt == userMessages.end())
{ // new user
userMessages.insert(UserHash_t::value_type((*iter)->user_id, *iter) );
}
else
{
if (userIt->second->timestamp > (*iter)->timestamp - 1000)
{
too_fast(userIt->second, *iter);
userIt->second = *iter;
}
}
}
// sleep for next loop
std::this_thread::sleep_for(1000ms);
}
}
package examples;
class Message
{
int container_id;
public int getContainer_id() {
return container_id;
}
public int getItem_id() {
return item_id;
}
public boolean isSuccess() {
return success;
}
public long getTimestamp() {
return timestamp;
}
public int getUserId() {
return userId;
}
int item_id;
boolean success;
long timestamp;
int userId;
Message(int container_id, int item_id, boolean success, long timestamp, int userId)
{
this.container_id = container_id;
this.item_id = item_id;
this.success = success;
this.timestamp = timestamp;
this.userId = userId;
}
}
public class Messages {
public static void main(String[] args )
{
Message[] msgArr = new Message[5];
msgArr[0] = new Message(123,456,true,1499351653,789);
msgArr[1] = new Message(111,222,false,1499351654,333);
msgArr[2] = new Message(444,555,true,1499351654,789);
msgArr[3] = new Message(123,456,true,1499351655,999);
msgArr[4] = new Message(123,456,true,1499351655,789);
Messages msg = new Messages();
msg.iterateMessages(msgArr);
}
public void too_fast(int msgId, int userId)
{
System.out.println("Message "+msgId+" too fast for UserId " +userId);
}
public void iterateMessages(Message[] msgArr)
{
for(int i=0;i<5;i++)
{
for (int j=i+1;j<5;j++)
{
if(msgArr[i].getUserId() == msgArr[j].getUserId() && msgArr[j].getTimestamp()-msgArr[i].getTimestamp() <=1)
{
too_fast(j+1,msgArr[j].userId);
}
}
}
}
}
package examples;
class Message
{
int container_id;
public int getContainer_id() {
return container_id;
}
public int getItem_id() {
return item_id;
}
public boolean isSuccess() {
return success;
}
public long getTimestamp() {
return timestamp;
}
public int getUserId() {
return userId;
}
int item_id;
boolean success;
long timestamp;
int userId;
Message(int container_id, int item_id, boolean success, long timestamp, int userId)
{
this.container_id = container_id;
this.item_id = item_id;
this.success = success;
this.timestamp = timestamp;
this.userId = userId;
}
}
public class Messages {
public static void main(String[] args )
{
Message[] msgArr = new Message[5];
msgArr[0] = new Message(123,456,true,1499351653,789);
msgArr[1] = new Message(111,222,false,1499351654,333);
msgArr[2] = new Message(444,555,true,1499351654,789);
msgArr[3] = new Message(123,456,true,1499351655,999);
msgArr[4] = new Message(123,456,true,1499351655,789);
Messages msg = new Messages();
msg.iterateMessages(msgArr);
}
public void too_fast(int msgId, int userId)
{
System.out.println("Message "+msgId+" too fast for UserId " +userId);
}
public void iterateMessages(Message[] msgArr)
{
for(int i=0;i<5;i++)
{
for (int j=i+1;j<5;j++)
{
if(msgArr[i].getUserId() == msgArr[j].getUserId() && msgArr[j].getTimestamp()-msgArr[i].getTimestamp() <=1)
{
too_fast(j+1,msgArr[j].userId);
}
}
}
}
}
@ChrisK:
approach 2:
===
approach 1 is fine, it works, but the HT will eventually grow large, consuming a lot of memory, whereas we actually only need the messages from unique user_ids from the last second, which I assume, is considerably less than the set of users over a longer period (mustn't be, but for most system it's a good assumption).
===
You were there, but then the question is why don't you expire the hashtable entries? :-)
That is, of course another cleaner, meaner alternative approach.
Now the algorithm looks like:
def fire_func( iterator ){
map = dict()
while ( iterator.hasNext ){
item = iterator.next()
if ( item.user_id @ map ){
// i found it in map, so check old time
diff = item.timestamp - map[user_id].timestamp
if ( diff <= critical ){
// that is the requirement anyways
too_fast( map[user_id], item )
}
}
// new entry, store new time - always
map[user_id] = item
// now, expire items in the map ?
cur_time = int( time() )
// a base delay beyond which message will not come
critical_diff = diff * 10
map = dict ( map ) as {
continue( cur_time - $.value.timestamp > critical_diff )
$.o // else, preserve
}
}
}
assumtions:
1) timestamp is an integer, with 1 ms resolution, large enough it won't overflow
2) if there are more than two messages a second per user id, e.g. with timestamps 100, 101, 102, it's sufficient to call too_fast(100, 101) and too_fast(101, 102) and leave out too_fast(100, 102)
approach 1:
for each message, get the unique user id user_id, put it into a hash table (user_id as key, message as value). Before putting it in, check if there is already a message stored for this user, if so compare the times, if the two are within 1 second, call too_fast
approach 2:
approach 1 is fine, it works, but the HT will eventually grow large, consuming a lot of memory, whereas we actually only need the messages from unique user_ids from the last second, which I assume, is considerably less than the set of users over a longer periode (mustn't be, but for most system it's a good assumption). Therefore I would keep a queue which stores user_id's and times and I would constantly check if I can remove an element from the queue because it's "expired" and if so, I would check if I can remove from the HT because it may be there is a younger message already in the HT for the same user...
in c++
- Chris July 25, 2017