From 66c07118722c5dc1b98cbce2290ecae6aad17fb1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:36:16 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...s by genre category- Numbers over time.Rmd | 562 +++++++++--------- src/narges/RQ-Trello.Rmd | 56 +- 2 files changed, 319 insertions(+), 299 deletions(-) diff --git a/code/narges/Visualization- Society-related genres by genre category- Numbers over time.Rmd b/code/narges/Visualization- Society-related genres by genre category- Numbers over time.Rmd index defffd0f6..4930b37d7 100644 --- a/code/narges/Visualization- Society-related genres by genre category- Numbers over time.Rmd +++ b/code/narges/Visualization- Society-related genres by genre category- Numbers over time.Rmd @@ -22,19 +22,21 @@ pp <- function(percentage, accuracy = 0.01) { ``` ```{r} -#fbs_records_a +# fbs_records_a ``` ```{r} -fbs_links_a <- select(fbs_links_a,-c('method')) +fbs_links_a <- select(fbs_links_a, -c("method")) ``` ```{r} -fbs_records <- fbs_records_a %>% inner_join(vd17_id_a) %>% inner_join(fbs_links_a, by=('record_number')) %>% - inner_join(fbs_metadata_a%>% select(GND,member_number,First_name_new,Last_name_new,Estimated_admission_year))%>% - mutate(first_name=First_name_new,last_name=Last_name_new,admission_year=Estimated_admission_year)%>% - collect()%>% - select('record_number','vd17_id','member_number','method','set','first_name','last_name','admission_year') +fbs_records <- fbs_records_a %>% + inner_join(vd17_id_a) %>% + inner_join(fbs_links_a, by = ("record_number")) %>% + inner_join(fbs_metadata_a %>% select(GND, member_number, First_name_new, Last_name_new, Estimated_admission_year)) %>% + mutate(first_name = First_name_new, last_name = Last_name_new, admission_year = Estimated_admission_year) %>% + collect() %>% + select("record_number", "vd17_id", "member_number", "method", "set", "first_name", "last_name", "admission_year") ``` @@ -45,37 +47,37 @@ vd17_normalized_years_local <- vd17_normalized_years_a %>% ```{r} -#vd17_genres_local <- vd17_genres_a %>% - #collect()%>% - #select(record_number,genre) +# vd17_genres_local <- vd17_genres_a %>% +# collect()%>% +# select(record_number,genre) -#genre_cat1 <- vd17_genres_local%>% - #filter(grepl(paste(c(genre_list1_a,genre_list1_b,genre_list1_c,genre_list1_d,genre_list1_e,genre_list1_f,genre_list1_g,genre_list1_h,genre_list1_i),collapse="|"), genre))%>% - #select(record_number,genre) +# genre_cat1 <- vd17_genres_local%>% +# filter(grepl(paste(c(genre_list1_a,genre_list1_b,genre_list1_c,genre_list1_d,genre_list1_e,genre_list1_f,genre_list1_g,genre_list1_h,genre_list1_i),collapse="|"), genre))%>% +# select(record_number,genre) ``` ```{r} core_society <- fbs_records %>% - filter(set=="Society-contributed and society-purpose related") + filter(set == "Society-contributed and society-purpose related") society_interest <- fbs_records %>% - filter(set=="Society-contributed") + filter(set == "Society-contributed") ``` ```{r} vd17_normalized_langs_local <- vd17_normalized_langs_a %>% - collect()%>% - filter(publication_language=="ger") + collect() %>% + filter(publication_language == "ger") ``` # Visualization: Overall output of Society members in German: Numbers over time ```{r} fbs_german <- fbs_records %>% - inner_join(vd17_normalized_langs_local, by=c("record_number"))%>% + inner_join(vd17_normalized_langs_local, by = c("record_number")) %>% left_join(vd17_normalized_years_local, by = c("record_number")) %>% filter(normalized_year >= 1600, normalized_year <= 1700) %>% - filter(nchar(normalized_year)==4)%>% - select(record_number,normalized_year,vd17_id)%>% + filter(nchar(normalized_year) == 4) %>% + select(record_number, normalized_year, vd17_id) %>% group_by(normalized_year) %>% summarize(records = n_distinct(record_number), .groups = "drop") %>% mutate(phase = case_when( @@ -84,50 +86,52 @@ fbs_german <- fbs_records %>% normalized_year >= 1651 & normalized_year <= 1667 ~ "phase 2", normalized_year >= 1668 & normalized_year <= 1682 ~ "phase 3", normalized_year > 1682 ~ "> 1682" - ))%>% + )) %>% distinct() -group.colors <- c("< 1617" = "grey", "phase 1" = "cyan", "phase 2" ="#9633FF", "phase 3" = "pink", "> 1682" = "grey") -fbs_german%>% - ggplot(aes(x=normalized_year, y=records, fill = phase)) + - geom_col()+ - xlab("Year") + ylab("")+ - ggtitle("Member Publications in German")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(breaks = seq(0, 500, by = 50))+ - theme_hsci_discrete()+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+ - theme(plot.title = element_text(hjust = 0.5))+ - scale_fill_manual(values=group.colors) - +group.colors <- c("< 1617" = "grey", "phase 1" = "cyan", "phase 2" = "#9633FF", "phase 3" = "pink", "> 1682" = "grey") +fbs_german %>% + ggplot(aes(x = normalized_year, y = records, fill = phase)) + + geom_col() + + xlab("Year") + + ylab("") + + ggtitle("Member Publications in German") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(breaks = seq(0, 500, by = 50)) + + theme_hsci_discrete() + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + + theme(plot.title = element_text(hjust = 0.5)) + + scale_fill_manual(values = group.colors) ``` # Visualization: Overall output of Society members in German: Average output by member over time # Considering 10 years prior to admission_year ```{r} fbs_records_german <- fbs_records %>% - inner_join(vd17_normalized_langs_local, by=c("record_number"))%>% + inner_join(vd17_normalized_langs_local, by = c("record_number")) %>% left_join(vd17_normalized_years_local, by = c("record_number")) %>% filter(normalized_year >= 1600, normalized_year <= 1700) %>% - filter(nchar(normalized_year)==4)%>% - select(record_number,normalized_year,vd17_id,member_number,admission_year) - -fbs_records_10 <-fbs_records_german%>% - filter((admission_year>=(normalized_year-10))&(admission_year<=normalized_year))%>% - with(aggregate(member_number ~ normalized_year,FUN=function(x){length(unique(x))}))%>% - mutate(n_active_members=member_number)%>% - select(n_active_members,normalized_year) - -fbs_records_german_10 <- fbs_records_german %>% - left_join(fbs_records_10,by=c("normalized_year"))%>% + filter(nchar(normalized_year) == 4) %>% + select(record_number, normalized_year, vd17_id, member_number, admission_year) + +fbs_records_10 <- fbs_records_german %>% + filter((admission_year >= (normalized_year - 10)) & (admission_year <= normalized_year)) %>% + with(aggregate(member_number ~ normalized_year, FUN = function(x) { + length(unique(x)) + })) %>% + mutate(n_active_members = member_number) %>% + select(n_active_members, normalized_year) + +fbs_records_german_10 <- fbs_records_german %>% + left_join(fbs_records_10, by = c("normalized_year")) %>% distinct() - + number_year_10 <- fbs_records_german_10 %>% - group_by(normalized_year,n_active_members) %>% + group_by(normalized_year, n_active_members) %>% summarize(number_pub = n_distinct(record_number), .groups = "drop") -number_year_10$average_output=round(number_year_10$number_pub / number_year_10$n_active_members) -number_year_10$number_of_years_prior_to_admission="10" +number_year_10$average_output <- round(number_year_10$number_pub / number_year_10$n_active_members) +number_year_10$number_of_years_prior_to_admission <- "10" number_year_10 <- number_year_10 %>% distinct() @@ -135,23 +139,24 @@ number_year_10 <- number_year_10 %>% # Considering 20 years prior to admission_year ```{r} - -fbs_records_20 <-fbs_records_german%>% - filter((admission_year>=(normalized_year-20))&(admission_year<=normalized_year))%>% - with(aggregate(member_number ~ normalized_year,FUN=function(x){length(unique(x))}))%>% - mutate(n_active_members=member_number)%>% - select(n_active_members,normalized_year) - -fbs_records_german_20 <- fbs_records_german %>% - left_join(fbs_records_20,by=c("normalized_year"))%>% +fbs_records_20 <- fbs_records_german %>% + filter((admission_year >= (normalized_year - 20)) & (admission_year <= normalized_year)) %>% + with(aggregate(member_number ~ normalized_year, FUN = function(x) { + length(unique(x)) + })) %>% + mutate(n_active_members = member_number) %>% + select(n_active_members, normalized_year) + +fbs_records_german_20 <- fbs_records_german %>% + left_join(fbs_records_20, by = c("normalized_year")) %>% distinct() - + number_year_20 <- fbs_records_german_20 %>% - group_by(normalized_year,n_active_members) %>% + group_by(normalized_year, n_active_members) %>% summarize(number_pub = n_distinct(record_number), .groups = "drop") -number_year_20$average_output=round(number_year_20$number_pub / number_year_20$n_active_members) -number_year_20$number_of_years_prior_to_admission="20" +number_year_20$average_output <- round(number_year_20$number_pub / number_year_20$n_active_members) +number_year_20$number_of_years_prior_to_admission <- "20" number_year_20 <- number_year_20 %>% distinct() @@ -159,23 +164,24 @@ number_year_20 <- number_year_20 %>% # Considering 30 years prior to admission_year ```{r} - -fbs_records_30 <-fbs_records_german%>% - filter((admission_year>=(normalized_year-30))&(admission_year<=normalized_year))%>% - with(aggregate(member_number ~ normalized_year,FUN=function(x){length(unique(x))}))%>% - mutate(n_active_members=member_number)%>% - select(n_active_members,normalized_year) - -fbs_records_german_30 <- fbs_records_german %>% - left_join(fbs_records_30,by=c("normalized_year"))%>% +fbs_records_30 <- fbs_records_german %>% + filter((admission_year >= (normalized_year - 30)) & (admission_year <= normalized_year)) %>% + with(aggregate(member_number ~ normalized_year, FUN = function(x) { + length(unique(x)) + })) %>% + mutate(n_active_members = member_number) %>% + select(n_active_members, normalized_year) + +fbs_records_german_30 <- fbs_records_german %>% + left_join(fbs_records_30, by = c("normalized_year")) %>% distinct() - + number_year_30 <- fbs_records_german_30 %>% - group_by(normalized_year,n_active_members) %>% + group_by(normalized_year, n_active_members) %>% summarize(number_pub = n_distinct(record_number), .groups = "drop") -number_year_30$average_output=round(number_year_30$number_pub / number_year_30$n_active_members) -number_year_30$number_of_years_prior_to_admission="30" +number_year_30$average_output <- round(number_year_30$number_pub / number_year_30$n_active_members) +number_year_30$number_of_years_prior_to_admission <- "30" number_year_30 <- number_year_30 %>% distinct() @@ -183,111 +189,116 @@ number_year_30 <- number_year_30 %>% # Considering 40 years prior to admission_year ```{r} - -fbs_records_40 <-fbs_records_german%>% - filter((admission_year>=(normalized_year-40))&(admission_year<=normalized_year))%>% - with(aggregate(member_number ~ normalized_year,FUN=function(x){length(unique(x))}))%>% - mutate(n_active_members=member_number)%>% - select(n_active_members,normalized_year) - -fbs_records_german_40 <- fbs_records_german %>% - left_join(fbs_records_40,by=c("normalized_year"))%>% +fbs_records_40 <- fbs_records_german %>% + filter((admission_year >= (normalized_year - 40)) & (admission_year <= normalized_year)) %>% + with(aggregate(member_number ~ normalized_year, FUN = function(x) { + length(unique(x)) + })) %>% + mutate(n_active_members = member_number) %>% + select(n_active_members, normalized_year) + +fbs_records_german_40 <- fbs_records_german %>% + left_join(fbs_records_40, by = c("normalized_year")) %>% distinct() - + number_year_40 <- fbs_records_german_40 %>% - group_by(normalized_year,n_active_members) %>% + group_by(normalized_year, n_active_members) %>% summarize(number_pub = n_distinct(record_number), .groups = "drop") -number_year_40$average_output=round(number_year_40$number_pub / number_year_40$n_active_members) -number_year_40$number_of_years_prior_to_admission="40" +number_year_40$average_output <- round(number_year_40$number_pub / number_year_40$n_active_members) +number_year_40$number_of_years_prior_to_admission <- "40" number_year_40 <- number_year_40 %>% distinct() ``` ```{r} -avg_number_per_year <- rbind(number_year_10,number_year_20,number_year_30,number_year_40) - +avg_number_per_year <- rbind(number_year_10, number_year_20, number_year_30, number_year_40) ``` ```{r} -#avg_number_per_year%>% - #ggplot(aes(x=normalized_year,y=average_output,fill=number_of_years_prior_to_admission))+ - #geom_col()+ - #ggtitle("Average Number of Member Publications in German")+ - #xlab("") + ylab("")+ - #scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - #scale_y_continuous(breaks = seq(0, 300, by = 10))+ - #theme_hsci_discrete()+ - #theme(plot.title = element_text(hjust = 0.5))+ - #theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +# avg_number_per_year%>% +# ggplot(aes(x=normalized_year,y=average_output,fill=number_of_years_prior_to_admission))+ +# geom_col()+ +# ggtitle("Average Number of Member Publications in German")+ +# xlab("") + ylab("")+ +# scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ +# scale_y_continuous(breaks = seq(0, 300, by = 10))+ +# theme_hsci_discrete()+ +# theme(plot.title = element_text(hjust = 0.5))+ +# theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) ``` ```{r} library(ggalt) -avg10<-number_year_10%>% - ggplot(aes(x=normalized_year,y=average_output))+ - geom_xspline(color="blue")+ - ggtitle("10 years")+ - xlab("") + ylab("")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(breaks = seq(0, 300, by = 10))+ - theme_hsci_discrete()+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) - -avg20<-number_year_20%>% - ggplot(aes(x=normalized_year,y=average_output))+ - geom_xspline(color="green")+ - ggtitle("20 years")+ - xlab("") + ylab("")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(breaks = seq(0, 300, by = 10))+ - theme_hsci_discrete()+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) - -avg30<-number_year_30%>% - ggplot(aes(x=normalized_year,y=average_output))+ - geom_xspline(color="red")+ - ggtitle("30 years")+ - xlab("") + ylab("")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(breaks = seq(0, 300, by = 10))+ - theme_hsci_discrete()+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) - -avg40<-number_year_40%>% - ggplot(aes(x=normalized_year,y=average_output))+ - geom_xspline(color="purple")+ - ggtitle("40 years")+ - xlab("") + ylab("")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(breaks = seq(0, 300, by = 10))+ - theme_hsci_discrete()+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) - -plot <- ggarrange(avg10,avg20,avg30,avg40,nrow = 2,ncol = 2) -annotate_figure(plot, top = text_grob("Average number of member publications in German per active person per year, with active persons calculated as having joined at most 20/40/60 years prior to the year under calculation", - color = "black", face = "bold", size = 9)) +avg10 <- number_year_10 %>% + ggplot(aes(x = normalized_year, y = average_output)) + + geom_xspline(color = "blue") + + ggtitle("10 years") + + xlab("") + + ylab("") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(breaks = seq(0, 300, by = 10)) + + theme_hsci_discrete() + + theme(plot.title = element_text(hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + +avg20 <- number_year_20 %>% + ggplot(aes(x = normalized_year, y = average_output)) + + geom_xspline(color = "green") + + ggtitle("20 years") + + xlab("") + + ylab("") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(breaks = seq(0, 300, by = 10)) + + theme_hsci_discrete() + + theme(plot.title = element_text(hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + +avg30 <- number_year_30 %>% + ggplot(aes(x = normalized_year, y = average_output)) + + geom_xspline(color = "red") + + ggtitle("30 years") + + xlab("") + + ylab("") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(breaks = seq(0, 300, by = 10)) + + theme_hsci_discrete() + + theme(plot.title = element_text(hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + +avg40 <- number_year_40 %>% + ggplot(aes(x = normalized_year, y = average_output)) + + geom_xspline(color = "purple") + + ggtitle("40 years") + + xlab("") + + ylab("") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(breaks = seq(0, 300, by = 10)) + + theme_hsci_discrete() + + theme(plot.title = element_text(hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + +plot <- ggarrange(avg10, avg20, avg30, avg40, nrow = 2, ncol = 2) +annotate_figure(plot, top = text_grob("Average number of member publications in German per active person per year, with active persons calculated as having joined at most 20/40/60 years prior to the year under calculation", + color = "black", face = "bold", size = 9 +)) ``` ```{r} vd17_genre_categorisation_a ``` ```{r} -#list_society_genre <- c("Lyrik/Poetry","Drama/Theatre","Epik/Prose","Other Literature Related","Linguistic and Language","Virtue, Civility and Ethics","Other Society Related","Occasional Literature","German as a scientific language") -vd17_society_genres <- vd17_genre_categorisation_a%>% - left_join(vd17_genres_a,by=c("genre"))%>% - collect()%>% - filter(!(group_2=="Formats"))%>% - mutate(genre_category=group_2,genre_group=group_1)%>% - select(record_number,genre_category,genre_group,genre) - -vd17_society_genres$genre_category[vd17_society_genres$genre_category == 'General works'] <- 'General Works' +# list_society_genre <- c("Lyrik/Poetry","Drama/Theatre","Epik/Prose","Other Literature Related","Linguistic and Language","Virtue, Civility and Ethics","Other Society Related","Occasional Literature","German as a scientific language") +vd17_society_genres <- vd17_genre_categorisation_a %>% + left_join(vd17_genres_a, by = c("genre")) %>% + collect() %>% + filter(!(group_2 == "Formats")) %>% + mutate(genre_category = group_2, genre_group = group_1) %>% + select(record_number, genre_category, genre_group, genre) + +vd17_society_genres$genre_category[vd17_society_genres$genre_category == "General works"] <- "General Works" ``` # Visualization: Society-related genres by genre category: Numbers over time @@ -295,17 +306,17 @@ vd17_society_genres$genre_category[vd17_society_genres$genre_category == 'Genera core_society_genre <- core_society %>% left_join(vd17_normalized_years_local, by = c("record_number")) %>% filter(normalized_year >= 1600, normalized_year <= 1700) %>% - filter(nchar(normalized_year)==4)%>% - left_join(vd17_society_genres,by = c("record_number"))%>% - select(record_number,normalized_year,vd17_id,genre_category,genre_group,genre)%>% + filter(nchar(normalized_year) == 4) %>% + left_join(vd17_society_genres, by = c("record_number")) %>% + select(record_number, normalized_year, vd17_id, genre_category, genre_group, genre) %>% distinct() society_interest_genre <- society_interest %>% left_join(vd17_normalized_years_local, by = c("record_number")) %>% filter(normalized_year >= 1600, normalized_year <= 1700) %>% - filter(nchar(normalized_year)==4)%>% - left_join(vd17_society_genres,by = c("record_number"))%>% - select(record_number,normalized_year,vd17_id,genre_category,genre_group,genre)%>% + filter(nchar(normalized_year) == 4) %>% + left_join(vd17_society_genres, by = c("record_number")) %>% + select(record_number, normalized_year, vd17_id, genre_category, genre_group, genre) %>% distinct() ``` @@ -315,163 +326,172 @@ colour_list <- c( "dodgerblue2", "#E31A1C", "green4", "#6A3D9A", - "#FF7F00", + "#FF7F00", "black", "gold1", "skyblue2", "#FB9A99", "palegreen2", - "#CAB2D6", + "#CAB2D6", "#FDBF6F", "gray70", "khaki2", "maroon", "orchid1", "deeppink1" ) -plot1 <-core_society_genre%>% - group_by(normalized_year)%>% - count(genre_category,genre_group)%>% - arrange(desc(n))%>% - distinct()%>% - ggplot(aes(x = normalized_year, y = n, fill= genre_category,colour = genre_category)) + geom_point(size = 3)+ - ggtitle("Member Publications in Society-Related Genre Categories (core_society)")+ - ylab("")+xlab("Year")+ +plot1 <- core_society_genre %>% + group_by(normalized_year) %>% + count(genre_category, genre_group) %>% + arrange(desc(n)) %>% + distinct() %>% + ggplot(aes(x = normalized_year, y = n, fill = genre_category, colour = genre_category)) + + geom_point(size = 3) + + ggtitle("Member Publications in Society-Related Genre Categories (core_society)") + + ylab("") + + xlab("Year") + scale_x_continuous(breaks = seq(1000, 2000, by = 3)) + scale_y_continuous(breaks = seq(0, 500, by = 50)) + theme_hsci_discrete() + - theme(legend.position="bottom",legend.text=element_text(size=15))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),,text = element_text(size = 15))+ - theme(plot.title = element_text(hjust = 0.5))+ - facet_wrap(~ genre_group) - -plot2 <-society_interest_genre%>% - group_by(normalized_year)%>% - count(genre_category,genre_group)%>% - arrange(desc(n))%>% - distinct()%>% - ggplot(aes(x = normalized_year, y = n, fill= genre_category,colour = genre_category)) + geom_point(size = 3)+ - ggtitle("Member Publications in Society-Related Genre Categories (links_of_interest)")+ - ylab("")+xlab("Year")+ + theme(legend.position = "bottom", legend.text = element_text(size = 15)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), , text = element_text(size = 15)) + + theme(plot.title = element_text(hjust = 0.5)) + + facet_wrap(~genre_group) + +plot2 <- society_interest_genre %>% + group_by(normalized_year) %>% + count(genre_category, genre_group) %>% + arrange(desc(n)) %>% + distinct() %>% + ggplot(aes(x = normalized_year, y = n, fill = genre_category, colour = genre_category)) + + geom_point(size = 3) + + ggtitle("Member Publications in Society-Related Genre Categories (links_of_interest)") + + ylab("") + + xlab("Year") + scale_x_continuous(breaks = seq(1000, 2000, by = 3)) + scale_y_continuous(breaks = seq(0, 500, by = 50)) + theme_hsci_discrete() + - theme(legend.position="bottom",legend.text=element_text(size=15))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),text = element_text(size = 15))+ - theme(plot.title = element_text(hjust = 0.5))+ - facet_wrap(~ genre_group) - -ggarrange(plot1,plot2,nrow = 2,ncol = 1) -ggsave(file="plot1.jpg", width=20, height=20, dpi=200) + theme(legend.position = "bottom", legend.text = element_text(size = 15)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), text = element_text(size = 15)) + + theme(plot.title = element_text(hjust = 0.5)) + + facet_wrap(~genre_group) +ggarrange(plot1, plot2, nrow = 2, ncol = 1) +ggsave(file = "plot1.jpg", width = 20, height = 20, dpi = 200) ``` # Visualization: Society-related genres by genre category: Proportion over time ```{r} -core_society_frequency <- core_society_genre%>% - select(vd17_id,record_number,normalized_year,genre,genre_category,genre_group)%>% - distinct(vd17_id,record_number,normalized_year,genre,genre_category,genre_group)%>% - group_by(normalized_year)%>% - count(genre_category,genre_group)%>% - arrange(desc(n))%>% - mutate(percentage=n/sum(n))%>% +core_society_frequency <- core_society_genre %>% + select(vd17_id, record_number, normalized_year, genre, genre_category, genre_group) %>% + distinct(vd17_id, record_number, normalized_year, genre, genre_category, genre_group) %>% + group_by(normalized_year) %>% + count(genre_category, genre_group) %>% + arrange(desc(n)) %>% + mutate(percentage = n / sum(n)) %>% distinct() -society_interest_frequency <- society_interest_genre%>% - select(vd17_id,record_number,normalized_year,genre,genre_category,genre_group)%>% - distinct(vd17_id,record_number,normalized_year,genre,genre_category,genre_group)%>% - group_by(normalized_year)%>% - count(genre_category,genre_group)%>% - arrange(desc(n))%>% - mutate(percentage=n/sum(n))%>% +society_interest_frequency <- society_interest_genre %>% + select(vd17_id, record_number, normalized_year, genre, genre_category, genre_group) %>% + distinct(vd17_id, record_number, normalized_year, genre, genre_category, genre_group) %>% + group_by(normalized_year) %>% + count(genre_category, genre_group) %>% + arrange(desc(n)) %>% + mutate(percentage = n / sum(n)) %>% distinct() - ``` ```{r} -p1 <-core_society_frequency%>% - ggplot(aes(x = normalized_year, y = percentage, fill= genre_category,colour = genre_category)) + geom_point(size = 3)+ - ggtitle("Member Publications in Society-Related Genres (core_society)")+ - ylab("")+xlab("Year")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(labels = scales::percent)+ - theme_hsci_discrete()+ - theme(legend.position="bottom",legend.text=element_text(size=15))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),,text = element_text(size = 15))+ - theme(plot.title = element_text(hjust = 0.5))+ - facet_wrap(~ genre_group) - -p2 <-society_interest_frequency%>% - ggplot(aes(x = normalized_year, y = percentage, fill= genre_category,colour = genre_category)) + geom_point(size = 3)+ - ggtitle("Member Publications in Society-Related Genres (links_of_interest)")+ - ylab("")+xlab("Year")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(labels = scales::percent)+ - theme_hsci_discrete()+ - theme(legend.position="bottom",legend.text=element_text(size=15))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),,text = element_text(size = 15))+ - theme(plot.title = element_text(hjust = 0.5))+ - facet_wrap(~ genre_group) - -ggarrange(p1,p2,nrow = 2,ncol = 1) -ggsave(file="plot2.jpg", width=20, height=20, dpi=200) +p1 <- core_society_frequency %>% + ggplot(aes(x = normalized_year, y = percentage, fill = genre_category, colour = genre_category)) + + geom_point(size = 3) + + ggtitle("Member Publications in Society-Related Genres (core_society)") + + ylab("") + + xlab("Year") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(labels = scales::percent) + + theme_hsci_discrete() + + theme(legend.position = "bottom", legend.text = element_text(size = 15)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), , text = element_text(size = 15)) + + theme(plot.title = element_text(hjust = 0.5)) + + facet_wrap(~genre_group) + +p2 <- society_interest_frequency %>% + ggplot(aes(x = normalized_year, y = percentage, fill = genre_category, colour = genre_category)) + + geom_point(size = 3) + + ggtitle("Member Publications in Society-Related Genres (links_of_interest)") + + ylab("") + + xlab("Year") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(labels = scales::percent) + + theme_hsci_discrete() + + theme(legend.position = "bottom", legend.text = element_text(size = 15)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), , text = element_text(size = 15)) + + theme(plot.title = element_text(hjust = 0.5)) + + facet_wrap(~genre_group) + +ggarrange(p1, p2, nrow = 2, ncol = 1) +ggsave(file = "plot2.jpg", width = 20, height = 20, dpi = 200) ``` # Visualization: Comparison with VD17: Society-related genre categories ```{r} -vd17_local_id <- vd17_id_a%>% +vd17_local_id <- vd17_id_a %>% collect() genres_vd17_society <- vd17_local_id %>% - inner_join(vd17_normalized_langs_local, by=c("record_number"))%>% + inner_join(vd17_normalized_langs_local, by = c("record_number")) %>% left_join(vd17_normalized_years_local, by = c("record_number")) %>% filter(normalized_year >= 1600, normalized_year <= 1700) %>% - filter(nchar(normalized_year)==4)%>% - left_join(vd17_society_genres,by = c("record_number"))%>% - select(record_number,normalized_year,vd17_id,genre_category,genre_group,genre)%>% + filter(nchar(normalized_year) == 4) %>% + left_join(vd17_society_genres, by = c("record_number")) %>% + select(record_number, normalized_year, vd17_id, genre_category, genre_group, genre) %>% distinct() -genres_vd17_society%>% - group_by(normalized_year)%>% - count(genre_category,genre_group)%>% - arrange(desc(n))%>% - distinct()%>% - ggplot(aes(x = normalized_year, y = n, fill= genre_category,colour = genre_category)) + geom_point(size = 3)+ - ggtitle("Seventeenth-Century Publications in Society-Related Genres")+ - ylab("")+xlab("Year")+ +genres_vd17_society %>% + group_by(normalized_year) %>% + count(genre_category, genre_group) %>% + arrange(desc(n)) %>% + distinct() %>% + ggplot(aes(x = normalized_year, y = n, fill = genre_category, colour = genre_category)) + + geom_point(size = 3) + + ggtitle("Seventeenth-Century Publications in Society-Related Genres") + + ylab("") + + xlab("Year") + scale_x_continuous(breaks = seq(1000, 2000, by = 3)) + scale_y_continuous(breaks = seq(0, 700, by = 50)) + theme_hsci_discrete() + - theme(legend.position="bottom",legend.text=element_text(size=15))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),,text = element_text(size = 15))+ - theme(plot.title = element_text(hjust = 0.5))+ - facet_wrap(~ genre_group) + theme(legend.position = "bottom", legend.text = element_text(size = 15)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), , text = element_text(size = 15)) + + theme(plot.title = element_text(hjust = 0.5)) + + facet_wrap(~genre_group) -ggsave(file="plot3.jpg", width=20, height=20, dpi=200) +ggsave(file = "plot3.jpg", width = 20, height = 20, dpi = 200) ``` # Visualization: Comparison with VD17: Society output in German ```{r} -vd17_fbs <- vd17_normalized_langs_local%>% +vd17_fbs <- vd17_normalized_langs_local %>% left_join(vd17_normalized_years_local, by = c("record_number")) %>% filter(normalized_year >= 1600, normalized_year <= 1700) %>% - filter(nchar(normalized_year)==4)%>% - left_join(fbs_records,by=c('record_number')) %>% - select ('normalized_year','record_number','vd17_id','member_number')%>% - mutate(group = case_when( + filter(nchar(normalized_year) == 4) %>% + left_join(fbs_records, by = c("record_number")) %>% + select("normalized_year", "record_number", "vd17_id", "member_number") %>% + mutate(group = case_when( (is.na(member_number)) ~ "VD17", (!is.na(member_number)) ~ "Society" - ))%>% - distinct()%>% + )) %>% + distinct() %>% group_by(normalized_year) %>% - count(group)%>% + count(group) %>% arrange(desc(n)) -vd17_fbs%>% - ggplot(aes(x=normalized_year,y=n,fill=group))+ - geom_col()+ - ggtitle("Seventeenth-Century Publications in German")+ - xlab("") + ylab("")+ - scale_x_continuous(breaks = seq(1000, 2000, by = 2))+ - scale_y_continuous(breaks = seq(0, 10000, by = 200))+ - theme_hsci_discrete()+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) -``` \ No newline at end of file +vd17_fbs %>% + ggplot(aes(x = normalized_year, y = n, fill = group)) + + geom_col() + + ggtitle("Seventeenth-Century Publications in German") + + xlab("") + + ylab("") + + scale_x_continuous(breaks = seq(1000, 2000, by = 2)) + + scale_y_continuous(breaks = seq(0, 10000, by = 200)) + + theme_hsci_discrete() + + theme(plot.title = element_text(hjust = 0.5)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +``` diff --git a/src/narges/RQ-Trello.Rmd b/src/narges/RQ-Trello.Rmd index 31c0a75ca..b79987908 100644 --- a/src/narges/RQ-Trello.Rmd +++ b/src/narges/RQ-Trello.Rmd @@ -85,28 +85,27 @@ unified_places_of_publications <- unified_places_of_publication %>% collect() vd17_normalized_years <- vd17_normalized_years_a %>% - select(record_number,normalized_year)%>% + select(record_number, normalized_year) %>% collect() vd17_genres <- vd17_genres_a %>% collect() -vd17_genre_categorisation <-vd17_genre_categorisation_a %>% +vd17_genre_categorisation <- vd17_genre_categorisation_a %>% collect() ``` ```{r} - fbsgenre_place <- fbs_records_a %>% - filter(set=="Active member substantive role and society purpose related") %>% - inner_join(vd17_id_a, join_by(vd17_id)) %>% - collect()%>% - inner_join(vd17_genres, by=c("record_number")) %>% - inner_join(vd17_normalized_years, by=c("record_number")) %>% - left_join(vd17_genre_categorisation)%>% - left_join(unified_places_of_publications,by=c("record_number")) - -fbsgenre_place <- select(fbsgenre_place,-c('3','7')) + filter(set == "Active member substantive role and society purpose related") %>% + inner_join(vd17_id_a, join_by(vd17_id)) %>% + collect() %>% + inner_join(vd17_genres, by = c("record_number")) %>% + inner_join(vd17_normalized_years, by = c("record_number")) %>% + left_join(vd17_genre_categorisation) %>% + left_join(unified_places_of_publications, by = c("record_number")) + +fbsgenre_place <- select(fbsgenre_place, -c("3", "7")) ``` ```{r} @@ -114,27 +113,27 @@ colour_list <- c( "dodgerblue2", "#E31A1C", "green4", "#6A3D9A", - "#FF7F00", + "#FF7F00", "black", "gold1", "skyblue2", "#FB9A99", "palegreen2", - "#CAB2D6", + "#CAB2D6", "#FDBF6F", "gray70", "khaki2", "maroon", "orchid1", "deeppink1" ) fbsgenre_place %>% - count(genre, places_of_publication,group_1, group_3) %>% - filter(group_1=="Society-related") %>% - group_by(places_of_publication,group_3)%>% - #summarise(average=mean(n),.groups="drop") %>% - ggplot(aes(x=places_of_publication,y=n,color=group_3)) + + count(genre, places_of_publication, group_1, group_3) %>% + filter(group_1 == "Society-related") %>% + group_by(places_of_publication, group_3) %>% + # summarise(average=mean(n),.groups="drop") %>% + ggplot(aes(x = places_of_publication, y = n, color = group_3)) + geom_point() + - theme_hsci_discrete()+ - theme(legend.position="bottom",legend.text=element_text(size=8))+ - theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5),text = element_text(size = 8,face = "bold"))+ - theme(plot.title = element_text(hjust = 0.5))+ - scale_fill_manual(values=colour_list) + theme_hsci_discrete() + + theme(legend.position = "bottom", legend.text = element_text(size = 8)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), text = element_text(size = 8, face = "bold")) + + theme(plot.title = element_text(hjust = 0.5)) + + scale_fill_manual(values = colour_list) ``` @@ -164,11 +163,12 @@ gs4_auth(cache = ".secrets", email = "narges.azizifard@gmail.com") # Creating the google spreadsheet ```{r} -fbs_genre_place_link <- fbsgenre_place%>% - mutate(vd17_id=gs4_formula(str_c('=HYPERLINK("https://kxp.k10plus.de/DB=1.28/CMD?ACT=SRCHA&IKT=8079&TRM=%27',vd17_id,'%27","',vd17_id,'")'))) +fbs_genre_place_link <- fbsgenre_place %>% + mutate(vd17_id = gs4_formula(str_c('=HYPERLINK("https://kxp.k10plus.de/DB=1.28/CMD?ACT=SRCHA&IKT=8079&TRM=%27', vd17_id, '%27","', vd17_id, '")'))) -fbs_genre_place<- gs4_create( +fbs_genre_place <- gs4_create( "fbs_genre_place", - sheets = fbs_genre_place_link) + sheets = fbs_genre_place_link +) fbs_genre_place ```