diff --git a/hamazaki1990/.gitignore b/hamazaki1990/.gitignore new file mode 100644 index 0000000..86c26cf --- /dev/null +++ b/hamazaki1990/.gitignore @@ -0,0 +1,2 @@ +*.csv +shukudai.py diff --git a/hamazaki1990/individual.py b/hamazaki1990/individual.py index b1ee698..a06c778 100644 --- a/hamazaki1990/individual.py +++ b/hamazaki1990/individual.py @@ -1,10 +1,14 @@ -from decimal import Decimal +import random +import copy class Individual: + mutationrate = 0.01 + def __init__(self, n, f=1.0): self._id = n self._fitness = f + self._genotype = [] def get_id(self): return self._id @@ -12,9 +16,20 @@ def get_id(self): def get_fitness(self): return self._fitness + def get_genotype(self): + return self._genotype + + def acquire_mutation(self): + r = random.random() + next_ind = copy.deepcopy(self) + next_genotype = next_ind._genotype + if r < Individual.mutationrate: + next_genotype.append(random.random()) + next_ind._genotype = next_genotype + return next_ind + def __repr__(self): return str(self._id) - return Decimal(self._fitness) def main(): @@ -22,6 +37,27 @@ def main(): print(ind.get_id()) print(ind.get_fitness()) + ind2 = Individual(30) + print(ind2.get_id()) + print(ind2.get_fitness()) + print(ind2.get_genotype()) + ind2_1 = ind2.acquire_mutation() + print(ind2.get_genotype()) + print(ind2_1.get_genotype()) + ind2.acquire_mutation() + print(ind2.get_genotype()) + print(ind2.get_id()) + print(ind2.get_fitness()) + + ind3 = Individual(0) + ind4 = Individual(1) + pop = [ind3, ind4] + print([x.get_genotype() for x in pop]) + pop1 = [x.acquire_mutation() for x in pop] + print([x.get_genotype() for x in pop1]) + [x.acquire_mutation() for x in pop] + print([x.get_genotype() for x in pop]) + if __name__ == '__main__': main() diff --git a/hamazaki1990/population.py b/hamazaki1990/population.py index f46fed1..bae86e7 100644 --- a/hamazaki1990/population.py +++ b/hamazaki1990/population.py @@ -10,10 +10,10 @@ def roulettechoice(individuals, cumsum_fitness): class Population: - def __init__(self, n, mutantrate=0, s=0): - num_mutant = int(n * mutantrate) + def __init__(self, n, mutant=0, s=0): + num_mutant = int(n * mutant) mutant_inds = [Individual(x, 1 + s) for x in range(num_mutant)] - wild_inds = [Individual(x) for x in range(num_mutant, n)] + wild_inds = [Individual(x, 1) for x in range(num_mutant, n)] self._inds = mutant_inds + wild_inds def get_ids(self): @@ -23,10 +23,9 @@ def get_fitnesses(self): fitness = [x.get_fitness() for x in self._inds] return fitness - def get_mutantfreq(self): - fitness = [x.get_fitness() for x in self._inds] - mutantfreq = 1 - fitness.count(1.0)/len(fitness) - return mutantfreq + def get_genotypes(self): + genotypes = [x.get_genotype() for x in self._inds] + return genotypes def next_genwf(self): fitness = [x.get_fitness() for x in self._inds] @@ -34,7 +33,9 @@ def next_genwf(self): cumsum_fitness = [sum(fitness[:i]) for i in range(1, size + 1)] next_generation = [] for x in range(size): - next_generation.append(roulettechoice(self._inds, cumsum_fitness)) + parent_inds = roulettechoice(self._inds, cumsum_fitness) + next_inds = parent_inds.acquire_mutation() + next_generation.append(next_inds) self._inds = next_generation def next_genmo(self): @@ -42,7 +43,39 @@ def next_genmo(self): size = len(self._inds) cumsum_fitness = [sum(fitness[:i]) for i in range(1, size + 1)] i_dying = random.randrange(size) - self._inds[i_dying] = roulettechoice(self._inds, cumsum_fitness) + parent_inds = roulettechoice(self._inds, cumsum_fitness) + next_inds = parent_inds.acquire_mutation() + self._inds[i_dying] = next_inds + + def list_mutation(self): + genotypes = [x.get_genotype() for x in self._inds] + m_sites = [] + for x in genotypes: + m_sites.extend(x) + m_sites = sorted(list(set(m_sites))) + m_list = ([[0 for x in range(len(m_sites))] + for y in range(len(self._inds))]) + for i in range(len(self._inds)): + for j in range(len(genotypes[i])): + k = m_sites.index(genotypes[i][j]) + m_list[i][k] += 1 + return m_list + + def mutantfreq_per_site(self): + genotypes = [x.get_genotype() for x in self._inds] + m_sites = [] + for x in genotypes: + m_sites.extend(x) + m_sites = sorted(list(set(m_sites))) + m_count = [0 for x in range(len(m_sites))] + for i in range(len(self._inds)): + for j in range(len(genotypes[i])): + k = m_sites.index(genotypes[i][j]) + m_count[k] += 1 + mutantfreq = [x/len(self._inds) for x in m_count] + mutantfreq_per_site = ([[m_sites[i], mutantfreq[i]] + for i in range(len(m_sites))]) + return mutantfreq_per_site def is_not_fixed(self): for x in range(1, len(self._inds)): @@ -51,60 +84,47 @@ def is_not_fixed(self): else: return False - def mutation_is_not_fixed(self): - fitness = [x.get_fitness() for x in self._inds] - for x in range(1, len(fitness)): - if fitness[0] != fitness[x]: - return True - else: - return False - def main(): p1_1 = Population(10) print(p1_1.get_ids()) print(p1_1.get_fitnesses()) print(p1_1.is_not_fixed()) - print(p1_1.mutation_is_not_fixed()) - for x in range(20): + for x in range(10): p1_1.next_genwf() print(p1_1.get_ids()) - print(p1_1.get_fitnesses()) + print(p1_1.get_genotypes()) + print(p1_1.list_mutation()) + print(p1_1.mutantfreq_per_site()) + print(p1_1.is_not_fixed()) - print(p1_1.mutation_is_not_fixed()) + p1_1.list_mutation() p1_2 = Population(10, 0.3, 0.2) print(p1_2.get_ids()) print(p1_2.get_fitnesses()) print(p1_2.is_not_fixed()) - print(p1_2.mutation_is_not_fixed()) - print(p1_2.get_mutantfreq()) - - for x in range(20): - p1_2.next_genwf() - print(p1_2.get_ids()) - print(p1_2.get_fitnesses()) - print(p1_2.is_not_fixed()) - print(p1_2.mutation_is_not_fixed()) p2_1 = Population(10) print(p2_1.get_ids()) print(p2_1.get_fitnesses()) - for x in range(20): + for x in range(10): p2_1.next_genmo() print(p2_1.get_ids()) print(p2_1.get_fitnesses()) + p2_1.list_mutation() p2_2 = Population(10, 0.3, 0.2) print(p2_2.get_ids()) print(p2_2.get_fitnesses()) - for x in range(20): + for x in range(10): p2_2.next_genmo() print(p2_2.get_ids()) print(p2_2.get_fitnesses()) + p2_2.list_mutation() if __name__ == '__main__': diff --git a/hamazaki1990/visualize.py b/hamazaki1990/visualize.py index d4f43d4..7857cf3 100644 --- a/hamazaki1990/visualize.py +++ b/hamazaki1990/visualize.py @@ -2,37 +2,65 @@ from population import Population -def change_allelewf(population): +def get_step(t, mutations): + t_mutations = [] + for i in mutations: + t_mutation = [t] + t_mutation.extend(i) + t_mutations.append(t_mutation) + return t_mutations + + +def change_allelewf(population, generation): t = 0 - fixprocess = [[t, population.get_mutantfreq()]] - while population.mutation_is_not_fixed(): + change_allele = population.mutantfreq_per_site() + derived_allelefreq = get_step(t, change_allele) + for x in range(generation): t += 1 population.next_genwf() - fixprocess.append([t, population.get_mutantfreq()]) - return fixprocess + change_allele = population.mutantfreq_per_site() + next_gen = get_step(t, change_allele) + derived_allelefreq.extend(next_gen) + return derived_allelefreq -def change_allelemo(population): +def change_allelemo(population, generation): t = 0 - fixprocess = [[t, population.get_mutantfreq()]] - while population.mutation_is_not_fixed(): + change_allele = population.mutantfreq_per_site() + derived_allelefreq = get_step(t, change_allele) + for x in range(generation): t += 1 population.next_genmo() - fixprocess.append([t, population.get_mutantfreq()]) - return fixprocess + change_allele = population.mutantfreq_per_site() + next_gen = get_step(t, change_allele) + derived_allelefreq.extend(next_gen) + return derived_allelefreq -def output_allelechange(filename, population, function): +def output_allelechange(filename, function, population, generation): with open(filename, "w", encoding="utf-8") as outfile: writer = csv.writer(outfile) - writer.writerow(["generation", "derived_allele_frequency"]) - fixprocess = function(population) - for x in range(len(fixprocess)): - writer.writerow(fixprocess[x]) + freq = function(population, generation) + col_name = ["step", "locus", "frequency"] + writer.writerow(col_name) + for x in range(len(freq)): + writer.writerow(freq[x]) + + +example = [[0.1, 0.2], [0.2, 0.2], [0.8, 0.1]] +print(example[1]) +print(get_step(2, example)) + +p1_1 = Population(10) +fixprocess1 = change_allelewf(p1_1, 10) +print(fixprocess1) +p1_2 = Population(10) +fixprocess2 = change_allelemo(p1_2, 20) +print(fixprocess2) -p1 = Population(10, 0.1, 0.2) -output_allelechange("allelefreqwf.csv", p1, change_allelewf) +p1 = Population(1000) +output_allelechange("allelefreqwf.csv", change_allelewf, p1, 1000) -p2 = Population(10, 0.1, 0.5) -output_allelechange("allelefreqmo.csv", p2, change_allelemo) +p2 = Population(1000) +output_allelechange("allelefreqmo.csv", change_allelemo, p2, 1000) diff --git a/hamazaki1990/visualize.r b/hamazaki1990/visualize.r index 9e100f6..fdf9975 100644 --- a/hamazaki1990/visualize.r +++ b/hamazaki1990/visualize.r @@ -1,13 +1,16 @@ library("tidyverse") setwd("~/github/practice-py/hamazaki1990/") freq_wf <- read_csv("allelefreqwf.csv") %>% - dplyr::mutate(model = "wf") + dplyr::arrange(locus, step) freq_wf +plotwf <- ggplot(freq_wf, aes(x=step, y=frequency, group=locus)) +plotwf <-plotwf +geom_line() +plotwf <- plotwf + ylim(0, 1) +print(plotwf) freq_mo <- read_csv("allelefreqmo.csv") %>% - dplyr::mutate(model = "moran") + dplyr::arrange(locus, step) freq_mo -freq <- dplyr::bind_rows(freq_wf, freq_mo) -print(freq) -freqplot <- ggplot(freq, aes(x=generation, y=derived_allele_frequency, group=model, color=model)) -freqplot <-freqplot + geom_line() -print(freqplot) +plotmo <- ggplot(freq_mo, aes(x=step, y=frequency, group=locus)) +plotmo <-plotmo +geom_line() +plotmo <- plotmo + ylim(0, 1) +print(plotmo)