Hadoop (13) - Compression -
org.apache.hadoop.io.compress.CompressionCodecを利用してGZIP圧縮をやってみる
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.IOUtils;
public class Client {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setStrings("fs.default.name", "hdfs://localhost:9000");
conf.setStrings("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec");
FileSystem fs = FileSystem.get(conf);
Path = path = new Path("hdfs://localhost:9000/user/kinjouj/input/data.txt.gz");
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(path);
FSDataInputStream is = null;
CompressionOutputStream os = null;
try {
is = fs.open(new Path("hdfs://localhost:9000/user/kinjouj/input/data.txt"));
os = codec.createOutputStream(fs.create(path));
IOUtils.copyBytes(is, os, conf);
} catch(Exception e) {
e.printStackTrace();
} finally {
if(os != null) {
os.close();
}
if(is != null) {
is.close();
}
}
}
}