Skip to content

Commit

Permalink
fix minor bugs in sex prediction
Browse files Browse the repository at this point in the history
  • Loading branch information
vyepez88 authored Oct 5, 2024
1 parent 9568515 commit 2aba823
Showing 1 changed file with 27 additions and 21 deletions.
48 changes: 27 additions & 21 deletions drop/modules/aberrant-expression-pipeline/Counting/Summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,13 @@ if(isEmpty(sex_idx)){
} else{

# Verify if both XIST and UTY were counted
xist_id <- 'XIST'
uty_id <- 'UTY'

if(grepl('ENSG0', rownames(ods)[1])){
xist_id <- 'ENSG00000229807'
uty_id <- 'ENSG00000183878'
xist_idx <- grep('ENSG00000229807', rownames(ods))
uty_idx <- grep('ENSG00000183878', rownames(ods))
} else{
xist_idx <- which(rownames(ods) == 'XIST')
uty_idx <- which(rownames(ods) == 'UTY')
}
xist_idx <- grep(xist_id, rownames(ods))
uty_idx <- grep(uty_id, rownames(ods))

if(isEmpty(xist_idx) | isEmpty(uty_idx)){
print('Either XIST or UTY is not expressed')
Expand All @@ -222,23 +220,31 @@ if(isEmpty(sex_idx)){
# Train only in male/female in case there are other values
train_dt <- sex_dt[SEX %in% c('f', 'm', 'female', 'male')]

library("MASS")
lda <- lda(SEX ~ log2(XIST+1) + log2(UTY+1), data = train_dt)
if(uniqueN(train_dt$SEX) > 1){
library("MASS")
lda <- lda(SEX ~ log2(XIST+1) + log2(UTY+1), data = train_dt)

sex_dt[, predicted_sex := predict(lda, sex_dt)$class]
sex_dt[, match_sex := SEX == predicted_sex]
table(sex_dt[, .(SEX, predicted_sex)])
sex_dt[, predicted_sex := predict(lda, sex_dt)$class]
sex_dt[, match_sex := SEX == predicted_sex]
table(sex_dt[, .(SEX, predicted_sex)])

g <- ggplot(sex_dt, aes(XIST+1, UTY+1)) +
geom_point(aes(col = SEX, shape = predicted_sex, alpha = match_sex)) +
scale_x_log10(limits = c(1,NA)) + scale_y_log10(limits = c(1,NA)) +
scale_alpha_manual(values = c(1, .1)) +
theme_cowplot() + background_grid(major = 'xy', minor = 'xy') +
annotation_logticks(sides = 'bl') +
labs(color = 'Sex', shape = 'Predicted sex', alpha = 'Matches sex')
plot(g)
g <- ggplot(sex_dt, aes(XIST+1, UTY+1)) +
geom_point(aes(col = SEX, shape = predicted_sex, alpha = match_sex)) +
scale_x_log10(limits = c(1,NA)) + scale_y_log10(limits = c(1,NA)) +
scale_alpha_manual(values = c(1, .1)) +
theme_cowplot() + background_grid(major = 'xy', minor = 'xy') +
annotation_logticks(sides = 'bl') +
labs(color = 'Sex', shape = 'Predicted sex', alpha = 'Matches sex')
plot(g)

DT::datatable(sex_dt[match_sex == F], caption = 'Sex mismatches')
DT::datatable(sex_dt[match_sex == F], caption = 'Sex mismatches')
} else {
g <- ggplot(sex_dt, aes(XIST+1, UTY+1)) + geom_point(aes(col = SEX)) +
scale_x_log10(limits = c(1,NA)) + scale_y_log10(limits = c(1,NA)) +
theme_cowplot() + background_grid(major = 'xy', minor = 'xy') +
annotation_logticks(sides = 'bl')
plot(g)
}

# Write table
fwrite(sex_dt, gsub('ods_unfitted.Rds', 'xist_uty.tsv', snakemake@input$ods),
Expand Down

0 comments on commit 2aba823

Please sign in to comment.