kinjouj.github.io

Hadoop (12) - FSDataInputStream/FSDataOutputStream -

2011-04-12T00:00:00+00:00 Java Hadoop

FSDataInputStream/FSDataOutputStreamを利用する事でHDFS上のファイルをInputStream/OutputStreamインターフェースを利用して操作する事が出来る

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.io.IOUtils;

public class Client {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.setStrings("fs.default.name", "hdfs://localhost:9000");

        FileSystem fs = FileSystem.get(conf);
        FSDataInputStream is = null;
        FSDataOutputStream os = null;

        try {
            is = fs.open(new Path("hdfs://localhost:9000/user/kinjouj/input/data.txt"));

            /* 読み取り
            byte[] b = new byte[4096];
            int i;

            while((i = is.read(b)) != -1) {
                System.out.println(new String(b));
            }

            is.seek(0);
            */

            os = fs.create(new Path("hdfs://localhost:9000/user/kinjouj/input/test.txt"), new Progressable() {
                @Override
                public void progress() {
                }
            });

            // FSDataInputStreamからFSDataOutputStreamにコピー
            IOUtils.copyBytes(is, os, conf);
        } catch(Exception e) {
            e.printStackTrace();
        } finally {
            if(os != null) {
                os.close();
            }
            if(is != null) {
                is.close();
            }
        }
    }
}