I am iterating though the lines in a file using Node.js with CoffeScript and the following function:
each_line_in = (stream, func) ->
fs.stat stream.path, (err, stats) ->
previous = []
stream.on 'data', (d) ->
start = cur = 0
for c in d
cur++
if c == 10
previous.push(d.slice(start, cur))
func previous.join('')
previous = []
start = cur
previous.push(d.slice(start, cur)) if start != cur
Is there a better way to do this without reading the entire file into memory? And by "better" I mean more succinct, built into Node.js, faster, or more correct. If I was writing Python I would do something like this:
def each_line_in(file_obj, func):
[ func(l) for l in file_obj ]
I saw this question which uses Peteris Krumin's "lazy" module, but I would like to accomplish this w/o adding an external dependency.
Here's a fairly efficient approach:
eachLineIn = (filePath, func) ->
blockSize = 4096
buffer = new Buffer(blockSize)
fd = fs.openSync filePath, 'r'
lastLine = ''
callback = (err, bytesRead) ->
throw err if err
if bytesRead is blockSize
fs.read fd, buffer, 0, blockSize, null, callback
lines = buffer.toString('utf8', 0, bytesRead).split '\n'
lines[0] = lastLine + lines[0]
[completeLines..., lastLine] = lines
func(line) for line in completeLines
return
fs.read fd, buffer, 0, blockSize, 0, callback
return
You should benchmark this on your hardware and OS to find the optimal value of blockSize
for large files.
Note that this assumes that file lines are divided by \n
only. If you're not sure what your files use, you should use a regex for split
, e.g.:
.split(/(\\r\\n)|\\r|\\n/)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With