Amazon Interview Question
Country: United States
Interview Type: Phone Interview
WITH stud as
(SELECT student_id, department, start_date, row_number() over(partition by student_id order by start_date) as start_rank from student)
SELECT a.student_id, a.department, a.start_date, b.start_date as end_date FROM stud a
INNER JOIN stud b ON (a.student_id = b.student_id AND a.start_rank = b.start_rank-1);
with cte_student as
(
select student_id, department, start_date, rank() over (partition by student_id order by Start_date) as 'Ranking' from Student
)
select a.student_id, a.department, a.start_date, b.start_date as End_Date from cte_student a
join cte_student b on a.student_id = b.student_id
and a.Ranking = b.ranking - 1
I tested Mahi's answer and Sudarsan's answer, but results are not correct.
here is my SQL, tested correct.
Select a.student_id, a.department_id, a.start_dt , b.start_dt as End_Date
From student a left join student b on a.student_id = b.student_id
and b.start_dt = (select min(start_dt) from student c where c.student_id = a.student_id and c.start_dt > a.start_dt )
order by 1,3
WITH NEW_CASE AS (SELECT STUDENT_ID, DEPARTMENT,START_DATE, RANK () OVER (ORDER BY START_DATE) AS RANK1 FROM STUDENT)
SELECT
A.STUDENT_ID,
A.DEPARTMENT,
A.START_DATE,
B.START_DATE
FROM
(SELECT STUDENT_ID, DEPARTMENT,START_DATE, RANK () OVER (ORDER BY START_DATE) AS RANK1 FROM STUDENT) A,
NEW_CASE B
WHERE
A.STUDENT_ID = B.STUDENT_ID (+) AND A.RANK1+1 = B.RANK1 (+)
WITH NEW_CASE AS (SELECT STUDENT_ID, DEPARTMENT,START_DATE, RANK () OVER (ORDER BY START_DATE) AS RANK1 FROM STUDENT)
SELECT
A.STUDENT_ID,
A.DEPARTMENT,
A.START_DATE,
B.START_DATE
FROM
(SELECT STUDENT_ID, DEPARTMENT,START_DATE, RANK () OVER (ORDER BY START_DATE) AS RANK1 FROM STUDENT) A,
NEW_CASE B
WHERE
A.STUDENT_ID = B.STUDENT_ID (+) AND A.RANK1+1 = B.RANK1 (+)
It is important to not to forget to include the current department in the output. Follwoing are the three solutions:
--Using rank()
with stud as
(
select student_id, department, start_date,
rank() over (partition by student_id order by Start_date) as ranking from Student
)
select a.student_id, a.department, a.start_date, b.start_date as End_Date from stud a
left join stud b on a.student_id = b.student_id
and a.Ranking = b.ranking - 1
--Using row_number()
WITH stud as
(SELECT student_id, department, start_date, row_number() over(partition by student_id order by start_date) as start_rank from student)
SELECT a.student_id, a.department, a.start_date, b.start_date as end_date FROM stud a
LEFT JOIN stud b ON (a.student_id = b.student_id AND a.start_rank = b.start_rank-1);
--Using aggregate function
SELECT student_id,
department,
start_date,
LEAD(START_DATE) OVER (partition by student_id ORDER BY department) END_DATE
FROM STUDENT
With the Sql Server's Lead function:
SELECT StudentID, Department, StartDate, LEAD(StartDate) OVER (ORDER BY StartDate) EndDate FROM Student
Without any function.
SELECT a.studentid, a.department, a.startdate,
(SELECT TOP 1 b.startdate FROM student b WHERE a.startdate < b.startdate
ORDER BY b.startdate) AS EndDate
FROM student a
This a solution using sqlite3 on python so the queries can be checked
import sqlite3
SQL_QUERY = '''
SELECT a.student_id, a.department_id, a.start_dt , b.start_dt as End_Date
FROM student a
LEFT JOIN student b ON a.student_id = b.student_id
WHERE b.start_dt = (SELECT min(start_dt) FROM student c WHERE c.student_id = a.student_id AND c.start_dt > a.start_dt )
ORDER BY 1,3;
'''
conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('''CREATE TABLE Student
(student_id int, department_id text, start_dt text)''')
c.execute("INSERT INTO Student VALUES ('1', 'A', '2017-01-1')")
c.execute("INSERT INTO Student VALUES ('1', 'B', '2017-07-1')")
c.execute("INSERT INTO Student VALUES ('1', 'C', '2017-12-1')")
conn.commit()
for row in c.execute(SQL_QUERY):
print row
conn.close()
There are quite a few wrong answers on here so just be cautious. It helps if you can run a few test cases w/ real sql to verify. Here are two simple validated answers (one w/ window function, one without):
1.
select *, lead(start_date) over (partition by student_id order by start_date) as end_date
from student
2.
select s.*,
(select min(start_date) from student where student_id=s.student_id and start_date > s.start_date)
from student s
I dont think we need to order by or partition by any columns because the question says that they need start_date of the next record as end_date of first record irrespective of its value.
This is what I came up with -
with lead function -
select student_id,department,start_date,lead(start_date,1) over() as "End_date" from student;
without lead function -
SELECT a.student_id, a.department, a.start_date,
(SELECT b.start_date FROM student b WHERE a.start_date < b.start_date
ORDER BY b.start_date limit 1) AS End_Date
FROM student a
Please let me know if this seems correct.
select q1.*, q2.start_date as end_date from
(select row_number() over (partition by t1.student_id order by t1.start_date) as int_id,* from student t1)q1
left join
(select row_number() over (partition by t1.student_id order by t1.start_date) as int_id,* from student t1)q2
on
q1.int_id =q2.int_id-1
and q1.student_id = q2.student_id
select q1.*, q2.start_date as end_date from
(select row_number() over (partition by t1.student_id order by t1.start_date) as int_id,* from student t1)q1
left join
(select row_number() over (partition by t1.student_id order by t1.start_date) as int_id,* from student t1)q2
on
q1.int_id =q2.int_id-1
and q1.student_id = q2.student_id
SELECT student_id,
- sudarsan March 15, 2017department,
start_date,
LEAD(START_DATE) OVER (ORDER BY STUDENT_ID) END_DATE
FROM STUDENT