Re: node.jsで行処理

これ。node.jsで行処理 - NullPointer's Blog
リンク先の実装は fs.createReadStream のインスタンスのラッパー。
これって fs.createReadStream を拡張した形で実装できないかなと思って実装試しました。結果、**イケてません**。

each-line.js

'use strict';
var fs = require('fs');

function onData (chunk) {
    this.readStream.pause();
    if (this.buf) chunk = this.buf + chunk;
    var that   = this;
    var chunks = chunk.split(this.spliter);
    var last   = chunks.length - 1;
    chunks.forEach(function (line, i) {
        if (i === last) return that.buf = line;
        that.readStream.emit('line', line, that.lineLength);
        that.lineLength++;
    });
    this.readStream.resume();
}
function onEnd () {
    this.buf && this.readStream.emit('line', this.buf, this.lineLength);
    this.buf = '';
    //this.lineLength = 0;
}

function createReadLine (readStream, option) {
    option = (typeof option === 'object' && option !== null) ? option : {};
    if (! readStream) readStream = process.stdin;
    if (typeof readStream === 'string') readStream = fs.createReadStream(readStream, option);
    if (! readStream.readable) throw Error('"readStream" is not "readable"');
    var that = {
        buf: ''
      , lineLength: 0
      , spliter: '\n'
      , readStream: readStream
    };
    option.encoding || readStream.setEncoding('utf8');
    readStream.on('data', onData.bind(that));
    readStream.on('end',  onEnd.bind(that));
    readStream.on('close', function () { that = undefined; });
    return readStream;
}

module.exports.createReadLine = createReadLine;
module.exports.eachline = function (file, onLine, option) {
    var reader = createReadLine(file, option);
    reader.on('line', onLine);
    reader.on('end', reader.destroy.bind(reader));
};

test_read_line.js

var createReadLine = require('./each-line').createReadLine;
var textFile       = 'path/to/sample.txt';
var rl             = createReadLine(textFile);

rl.on('line', function (line, i) {
    console.log('%d: %s', i, line);
});
rl.on('close', function () {
    console.log('CLOSE %s', textFile);
});

単純に一行毎に処理をするだけなら eachline がショートカットっぽいので、それでもいい。

test_eachline.js

var eachline = require('./each-line').eachline;
var textFile = 'path/to/sample.txt';

eachline(textFile, function (line, i) {
    console.log('%d: %s', i, line);
});

`data` イベントの際に読み込んだ `chunk` を splitで配列にしてるところで メモリー食ってるのがイケてません。


追記 (2012.09.30). 第一引数をファイル名ではなく、`readableStream` に変更しました。
例えば、http.example.com/some.txt を行数も表示するなどは

var createReadLine = require('./each-line').createReadLine;
var http = require('http');

var url = 'http://example.com/some.txt';
http.get( url, function (res) {
	var rs = createReadLine(res);
	rs.on('line', function (line, i) {
		console.log('%d: %s', i, line);
	});
});

標準入力を表示するとか

var util = require('util');
var createReadLine = require('./each-line').createReadLine;
var rs = createReadLine(); // createReadLine( process.stdin );
rs.on('line', function (line, i) {
    /^\.exit/.test(line) && rs.destroy();
    util.log(line);
});
rs.on('close', function () {
    console.log('close readline');
});

rs.resume();