Amazon Interview Question
Software Engineer / DevelopersCountry: United States
Interview Type: Phone Interview
You probably needed to clarify with questions like:
What does it mean to be efficient? What does it mean by in place?
Does it mean with a low memory profile, only write the file once or really fast?
Because depending on the response you should design the algorithm.
Low memory profile in place:
Might mean write directly at another file and replace the original one once done.
No additional memory than the array then perform just swaps in memory.
Really fast might mean just load into an array in and write in reverse order.
In place probably means you cannot copy data into temp memory and then write to file again..
O(n) space and time
But the big O notation here doens't really matter, the I/O and memory allocation is what matters. In this case, it uses optimal block size to reverse any type of file.
The code is a bit complicated, but it is safe and robust. But to be honest, I doubt that the engineer would require that much...
Also, it only runs on Unix, of course. Why would you even consider having something else? :)
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
inline void swap(unsigned char *a, unsigned char *b)
{
unsigned char tmp = *b;
*b = *a;
*a = tmp;
}
void reverse_file(int fd, size_t file_size, size_t buffer_size)
{
if (file_size <= buffer_size) {
buffer_size = file_size;
unsigned char buffer[buffer_size];
size_t i, io_size = 0;
lseek(fd, 0, SEEK_SET);
do {
io_size += read(fd, buffer + io_size, buffer_size - io_size);
} while (io_size < buffer_size);
for (i = 0; i < buffer_size / 2; ++i)
swap(&buffer[i], &buffer[buffer_size - 1 - i]);
lseek(fd, 0, SEEK_SET);
do {
io_size += write(fd, buffer + io_size, buffer_size - io_size);
} while (io_size < buffer_size);
} else {
size_t total_io_size = 0;
/* make sure buffer_size is at maximum half of the file */
buffer_size = buffer_size > (file_size / 2) ? file_size / 2 : buffer_size;
unsigned char buffer1[buffer_size], buffer2[buffer_size];
do {
size_t i, io_size = 0;
lseek(fd, total_io_size, SEEK_SET);
io_size = 0;
do {
io_size += read(fd, buffer1 + io_size, buffer_size - io_size);
} while (io_size < buffer_size);
lseek(fd, file_size - buffer_size - total_io_size, SEEK_SET);
io_size = 0;
do {
io_size += read(fd, buffer2 + io_size, buffer_size - io_size);
} while (io_size < buffer_size);
for (i = 0; i < buffer_size / 2; ++i) {
swap(&buffer1[i], &buffer1[buffer_size - 1 - i]);
swap(&buffer2[i], &buffer2[buffer_size - 1 - i]);
}
lseek(fd, total_io_size, SEEK_SET);
io_size = 0;
do {
io_size += write(fd, buffer2 + io_size, buffer_size - io_size);
} while (io_size < buffer_size);
lseek(fd, file_size - buffer_size - total_io_size, SEEK_SET);
io_size = 0;
do {
io_size += write(fd, buffer1 + io_size, buffer_size - io_size);
} while (io_size < buffer_size);
total_io_size += buffer_size;
} while (total_io_size < file_size / 2);
}
}
int main(int argc, char *argv[])
{
char *file = argv[1];
struct stat st;
size_t file_size, buffer_size;
int fd;
fd = open(file, O_RDWR);
fstat(fd, &st);
reverse_file(fd, st.st_size, st.st_blksize);
close(fd);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
int main()
{
FILE* fptr = fopen("input.txt","r+");
fseek(fptr,0,SEEK_END);
int sizeOfFile = ftell(fptr);
fseek(fptr,0,SEEK_SET);
sizeOfFile--; // to eliminate EOF
char *buff = (char*)malloc(sizeOfFile);
//read file to buffer
int result = fread(buff,1,sizeOfFile,fptr);
buff[sizeOfFile]=NULL;
//reverse buffer
int i;
for(i=0;i< sizeOfFile/2;i++){
char tmp = buff[i];
buff[i] = buff[sizeOfFile - i - 1];
buff[sizeOfFile - i - 1] = tmp;
}
//write to file
fseek(fptr,0,SEEK_SET);
result = fwrite(buff,1,sizeOfFile,fptr);
fclose(fptr);
return 0;
}
Clearly we cannot swap first character for last character, etc. Way too much I/O consumption (reading from disk is wayyy slow).
What we can do is read in a block B_f of characters from the front of the file and a block of the same size B_e from the end of the file.
Then, reverse B_f and reverse B_e. Now, swap the blocks. Now continue this process with block reversing and swapping like we would have done with character swaps until you reach the center of the file.
abcd
B_f = ab, B_e = cd
Rev(B_f) = ba, Rev(B_e) = dc
Swap(B_f, B_e) => dcba
Thoughts???
I am not sure about the requirement but following programme will solve our purpose:
private void reverseFile(String inputFile, String outputFile){
try {
PrintWriter printWriter = new PrintWriter(outputFile);
FileReader fileReader = new FileReader(new File(inputFile));
BufferedReader bufferedReader = new BufferedReader(fileReader);
String line = null;
while((line = bufferedReader.readLine())!=null){
StringBuffer buffer = new StringBuffer(line);
buffer=buffer.reverse();
String rs=buffer.toString();
printWriter.write(rs);
printWriter.write("\n");
}
bufferedReader.close();
printWriter.flush();
printWriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
I think they want us to read from both ends, swap and replace these lines in the same file. Your approach creates additional file hence it is not in place.
C++ solution using only FILE-API. It can be adjusted to use at most n bytes of extra memory
class MyFILE {
public:
MyFILE(const char* fname)
: f_(fopen(fname, "r+"))
{}
~MyFILE() {
fclose(f_);
}
int Size() {
fseek(f_, 0, SEEK_END);
return ftell(f_);
}
void ReadAt(int pos, int len, uint8_t *v) {
fseek(f_, pos, SEEK_SET);
fread(&v[0], 1, len, f_);
}
void WriteAt(int pos, int len, const uint8_t *v) {
fseek(f_, pos, SEEK_SET);
fwrite(&v[0], 1, len, f_);
}
private:
FILE *f_;
};
void ReverseFileInplace(const char *fname, const int max_bufsize) {
int bufsize = max_bufsize/2;
vector<uint8_t> buf(2*bufsize, 0);
MyFILE f(fname);
int size = f.Size();
int b = 0;
int e = size-bufsize;
while (b+bufsize <= e) {
f.ReadAt(b, bufsize, &buf[0]);
f.ReadAt(e, bufsize, &buf[bufsize]);
reverse(buf.begin(), buf.end());
f.WriteAt(e, bufsize, &buf[bufsize]);
f.WriteAt(b, bufsize, &buf[0]);
b += bufsize;
e -= bufsize;
}
int rest = e+bufsize-b;
f.ReadAt(b, rest, &buf[0]);
reverse(buf.begin(), buf.begin()+rest);
f.WriteAt(b, rest, &buf[0]);
}
Can you elaborate that? It does exactly the same amount of I/O as your solution if called with 2*st_blksize and has a lot less unneeded complexity
I got confused, your I/O is fine if called with the right block size, altough you are not specifing the optiomal size. Also you are always dividing it by half of the file size, which is not optimal if the file size is smaller then the block size or if it is not divisible by the block size.
Some observations:
* The OOP API can be worked out more. You should use STL like implementation, itarators and operators.
* Using a vector<> for that is not optimal since you don't need it. Use array instead. Altough some compilers generate std::vector assembly as arrays when possible, still I wouldn't recommend for best portabilty and performance.
* For C++ implementation fopen family fucntions (C lib std) is not recommented. Use fstreams instead.
* You fstat(), or any OS system call, instead of ftell() for file size.
* The implementation is not robust.
* &v[0] == v
But for an interview for a Jr position should be fine.
Good luck.
#include<stdio.h>
#include<string.h>
int main()
{
FILE *fw;
FILE *fr;
char a[]="HiCarrerCup";
char b,e,t;
int i;
int l;
l = strlen(a);
fw = fopen("myfile","w");
fwrite(a,1,sizeof(a),fw);
fflush(fw);
fr=fopen("my","r");
for(i=0;i<l/2;i++)
{
fseek(fr,i,SEEK_SET);
fread(&b,1,1,fr);
printf(" read char is [%c] \n",b);
fseek(fr,l-i-1,SEEK_SET);
fread(&e,1,1,fr);
printf(" read char is [%c] \n",e);
fseek(fw,i,SEEK_SET);
fwrite(&e,1,sizeof(e),fw);
//fflush(fw);
fseek(fw,l-i-1,SEEK_SET);
//fflush(fw);
fwrite(&b,1,sizeof(b),fw);
}
fclose(fw);
fclose(fr);
As this is a file then reading 1 character is really make IO consumption too high
better is take a good buffer (especially equal to sector size/or divisible of sector size i.e. multiple of 1024 byte, nowadays 4096 is best) say this as BUFFSIZE
This is a general idea. Hope it will help
- Bharat Kumar Arya January 11, 2015