Skip to content

Commit

Permalink
Update CellRangerVDJWrapper to support CR9
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber committed Nov 22, 2024
1 parent 60c109d commit cb7d5e1
Showing 1 changed file with 22 additions and 12 deletions.
34 changes: 22 additions & 12 deletions singlecell/src/org/labkey/singlecell/run/CellRangerVDJWrapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import au.com.bytecode.opencsv.CSVReader;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.stream.IntStreams;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.json.JSONObject;
Expand Down Expand Up @@ -61,6 +62,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;

public class CellRangerVDJWrapper extends AbstractCommandWrapper
{
Expand Down Expand Up @@ -862,6 +864,7 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp
Map<String, Integer> chimericCallsRecovered = new HashMap<>();
int restoredTRDVAV = 0;

final Map<String, Integer> headerIdx = new HashMap<>();
int lineIdx = 0;
while ((line = reader.readLine()) != null)
{
Expand All @@ -873,49 +876,56 @@ private static void processCSV(PrintWriter writer, boolean printHeader, File inp
writer.println(line + ",chain_type");
}

String[] header = line.split(",");
IntStream.range(0, header.length).forEach(idx -> headerIdx.put(header[idx], idx));
continue;
}

//Infer correct chain from the V, J and C genes
String[] tokens = line.split(",", -1); // -1 used to preserve trailing empty strings

// Restore original value for TRD/TRA
if (tokens[6].contains("TRDV") && tokens[6].contains("/") && tokens[6].contains("AV"))
final int vGeneIdx = headerIdx.get("v_gene");
final int jGeneIdx = headerIdx.get("j_gene");
final int cGeneIdx = headerIdx.get("c_gene");
final int chainIdx = headerIdx.get("chain");

if (tokens[vGeneIdx].contains("TRDV") && tokens[vGeneIdx].contains("/") && tokens[vGeneIdx].contains("AV"))
{
restoredTRDVAV++;
String[] split = tokens[6].split("/");
tokens[6] = "TR" + split[1] + "/" + split[0].replaceAll("TR", "");
String[] split = tokens[vGeneIdx].split("/");
tokens[vGeneIdx] = "TR" + split[1] + "/" + split[0].replaceAll("TR", "");
}

List<String> chains = new ArrayList<>();
String vGeneChain = null;
String jGeneChain = null;
String cGeneChain = null;
for (int idx : new Integer[]{6,8,9})
for (int idx : new Integer[]{vGeneIdx,jGeneIdx,cGeneIdx})
{
String val = StringUtils.trimToNull(tokens[idx]);
if (val != null)
{
val = val.substring(0, 3);

chains.add(val);
if (idx == 6)
if (idx == vGeneIdx)
{
vGeneChain = val;
}
if (idx == 8)
if (idx == jGeneIdx)
{
jGeneChain = val;
}
else if (idx == 9)
else if (idx == cGeneIdx)
{
cGeneChain = val;
}
}
}

Set<String> uniqueChains = new HashSet<>(chains);
String originalChain = StringUtils.trimToNull(tokens[5]);
String originalChain = StringUtils.trimToNull(tokens[chainIdx]);

// Recover TRDV/TRAJ/TRAC:
if (uniqueChains.size() > 1)
Expand All @@ -925,7 +935,7 @@ else if (idx == 9)
{
uniqueChains.clear();
uniqueChains.add(cGeneChain);
String key = originalChain + "->" + cGeneChain + " (based on C-GENE)";
String key = vGeneChain + ":" + jGeneChain + ":" + originalChain + "->" + cGeneChain + " (based on C-GENE)";
chimericCallsRecovered.put(key, chimericCallsRecovered.getOrDefault(key, 0) + 1);
}
else if (uniqueChains.size() == 2)
Expand All @@ -950,14 +960,14 @@ else if (uniqueChains.size() == 2)
if (uniqueChains.size() == 1)
{
String chain = uniqueChains.iterator().next();
tokens[5] = chain;
tokens[chainIdx] = chain;
}
else
{
log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[6] + "/" + tokens[8] + "/" + tokens[9]);
log.info("Multiple chains detected [" + StringUtils.join(chains, ",")+ "], leaving original call alone: " + originalChain + ". " + tokens[vGeneIdx] + "/" + tokens[jGeneIdx] + "/" + tokens[cGeneIdx]);
}

if (acceptableChains.contains(tokens[5]))
if (acceptableChains.contains(tokens[chainIdx]))
{
writer.println(StringUtils.join(tokens, ",") + "," + chainType);
}
Expand Down

0 comments on commit cb7d5e1

Please sign in to comment.