Skip to content

Commit

Permalink
ACCUMULO-375 made min input split size configurable
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245684 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Adam Fuchs committed Feb 17, 2012
1 parent 2e366aa commit e24faaf
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public class WikipediaConfiguration {
public final static String BULK_INGEST_DIR = "wikipedia.bulk.ingest.dir";
public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir";
public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size";
public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size";


public static String getUser(Configuration conf) {
Expand Down Expand Up @@ -130,6 +131,10 @@ public static Path getPartitionedArticlesPath(Configuration conf) {
return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY));
}

public static long getMinInputSplitSize(Configuration conf) {
return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27);
}

public static boolean runPartitioner(Configuration conf) {
return conf.getBoolean(RUN_PARTITIONER, false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ private int runIngestJob() throws Exception
// setup input format
ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
// TODO make split size configurable
SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf));

// setup output format
ingestJob.setMapOutputKeyClass(Text.class);
Expand Down

0 comments on commit e24faaf

Please sign in to comment.