Interview Question
Software EngineersCountry: United States
Interview Type: Phone Interview
#e.g. input
input = "a, \"b, x\", c, \"q, p, r\", n, t"
elem = ""
for x in input.split('\n'):
inQuotes = False
for ch in x:
if ch not in ',"':
elem += ch
elif ch == '"':
inQuotes = not inQuotes
elif ch == ',':
if inQuotes:
elem += ch
else:
print elem
elem = ""
print elem + "\n"
elem = ""
class CSVParser
attr_accessor :str, :strt_ptr, :elements, :cur_ptr, :partial_word , :open_quotes
def initialize(str)
@str = str.split(//)
@elements = []
@strt_ptr = 0
@cur_ptr = 0
@partial_word = ""
@open_quotes = false
end
def run
while(@cur_ptr < @str.size)
slide([',', '"'])
case @str[@cur_ptr]
when '"'
handle_quote
when ','
handle_comma
else
@cur_ptr += 1
end
end
handle_comma
end
def handle_comma
if @strt_ptr == @cur_ptr
raise StandardError.new
else
elements << build_word(@strt_ptr, @cur_ptr, @partial_word)
@partial_word = ""
@cur_ptr += 1
@strt_ptr = @cur_ptr
end
end
def handle_quote
if((str[@cur_ptr] == '"') && (str[@cur_ptr+1] == '"'))
@partial_word = build_word(@strt_ptr, @cur_ptr , @partial_word)
@cur_ptr += 2
@strt_ptr = @cur_ptr
else
if @open_quotes
elements << build_word(@strt_ptr, @cur_ptr, @partial_word)
@partial_word = ""
@cur_ptr += 1 #removing comma
else
@open_quotes = true
end
@cur_ptr += 1
@strt_ptr = @cur_ptr
end
end
private
def build_word(s, e , init)
prev_word = str[s..(e-1)].reduce(:+)
return init + prev_word
end
def slide(delimiters)
while(@cur_ptr < @str.size)
delimiters.each do |delim|
return if @str[@cur_ptr]==delim
end
@cur_ptr += 1
end
end
end
csv_parser = CSVParser.new("abc,\"d\"\"e\"\"f\",efc")
csv_parser.run
puts csv_parser.elements
This takes care of all required CSV scenarios doing a "yield return" which returns each line to the caller as needed and taking only space as needed.
The input is a IEnumerable<char> so that the function can parse the CSV data as it been loaded so no need to load the entire file into memory in a string form to start parsing the same goes for the caller we will only load the required data from the file no matter the size.
This provides a minimal memory footprint which will be at most a single line of the CSV file.
The memory footprint could be optimize even further by parsing a token at a time but because lines don't tend to be bigger than I did not bother.
- Nelson Perez February 27, 2015