Skip to content

Commit

Permalink
FileSpout: spread the work based on the number of instances,fix #1125
Browse files Browse the repository at this point in the history
Signed-off-by: Julien Nioche <[email protected]>
  • Loading branch information
jnioche committed Nov 28, 2023
1 parent ad706a4 commit 00f319b
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ public class FileSpout extends BaseRichSpout {
protected LinkedList<byte[]> buffer = new LinkedList<>();
protected boolean active;
private boolean withDiscoveredStatus = false;
protected int totalTasks;
protected int taskIndex;

/**
* @param dir containing the seed files
Expand Down Expand Up @@ -139,7 +141,17 @@ protected void populateBuffer() throws IOException {
while (linesRead < BATCH_SIZE && (line = currentBuffer.readLine()) != null) {
if (StringUtils.isBlank(line)) continue;
if (line.startsWith("#")) continue;
buffer.add(line.trim().getBytes(StandardCharsets.UTF_8));
// check whether this entry should be skipped?
// totalTasks could be at 0 if a subclass forgot to
// call this classe's open()
if (totalTasks == 0 || linesRead % totalTasks == taskIndex) {
LOG.debug(
"Adding to buffer for spout {} -> line ({}) {}",
taskIndex,
linesRead,
line);
buffer.add(line.trim().getBytes(StandardCharsets.UTF_8));
}
linesRead++;
}

Expand All @@ -154,11 +166,11 @@ protected void populateBuffer() throws IOException {
public void open(
Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
try {
populateBuffer();
} catch (IOException e) {
throw new RuntimeException(e);
}

// if more than one instance is used we expect their number to be the
// same as the number of shards
totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
taskIndex = context.getThisTaskIndex();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,9 @@ private void addVerbatimHttpHeaders(
@Override
public void open(
Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;

super.open(conf, context, collector);

record = Optional.empty();

maxContentSize = ConfUtils.getInt(conf, "http.content.limit", -1);
Expand Down

0 comments on commit 00f319b

Please sign in to comment.