Added more R scripts to Scripts library

africanmathsinitiative · Nov 22, 2024 · 55920ee · 55920ee
1 parent d91d395
commit 55920ee
Show file tree

Hide file tree

Showing 5 changed files with 742 additions and 0 deletions.
diff --git a/data/Scripts/Express start r-script.R b/data/Scripts/Express start r-script.R
@@ -0,0 +1,101 @@
+## Express Start
+## This R-Instat introduction is intended for professional users who have some experience with similar software
+
+# Dialog: Import Dataset
+
+# Get the current working directory
+current_dir <- getwd()
+
+# Construct the desired file path by navigating up two directories and into "Library"
+file_path <- file.path(dirname(dirname(current_dir)), "Library", "tutorials_after.RDS")
+
+# Read the RDS file
+new_RDS <- readRDS(file = file_path)
+
+# Dialog: Import Dataset
+data_book$import_RDS(data_RDS=new_RDS)
+rm(new_RDS)
+## Pivot tables in R-Instat as an excellent tool for data exploration
+
+# Producing a Pivot Table in R-Instat via the Pivot table dialog
+
+dodoma <- data_book$get_data_frame(data_name="dodoma")
+last_table <- rpivotTable::rpivotTable(data=dodoma, rows=c("year","day"), cols="month_abbr", val="tmax", rendererName="Table", aggregatorName="Average")
+data_book$add_object(data_name="dodoma", object_name="last_table", object_type_label="table", object_format="html", object=last_table)
+data_book$get_object_data(data_name="dodoma", object_name="last_table", as_file=TRUE)
+rm(list=c("last_table", "dodoma"))
+
+
+# Producing a Pivot Table in R-Instat with the factor levels
+
+dodoma <- data_book$get_data_frame(data_name="dodoma")
+survey_levels <- stringr::str_flatten(string=paste0("\"", levels(x=dodoma $ month_abbr), "\","))
+
+dodoma <- data_book$get_data_frame(data_name="dodoma")
+relevel_variables <- paste0("function(attr) {  var sortAs = $.pivotUtilities.sortAs;  return sortAs([", survey_levels,"]); }")
+last_table <- rpivotTable::rpivotTable(data=dodoma, rows=c("year","day"), cols="month_abbr", sorters=relevel_variables, val="tmax", rendererName="Table", aggregatorName="Average")
+data_book$add_object(data_name="dodoma", object_name="last_table", object_type_label="table", object_format="html", object=last_table)
+data_book$get_object_data(data_name="dodoma", object_name="last_table", as_file=TRUE)
+rm(list=c("last_table", "dodoma", "relevel_variables", "survey_levels"))
+
+
+## Calculations in R-Instat using our powerful calculator dialog
+
+# Calculating the price per carat
+
+diamonds <- data_book$get_data_frame(data_name="diamonds", use_current_filter=FALSE)
+attach(what=diamonds)
+scalars <- data_book$get_scalars(data_name="diamonds")
+attach(what=scalars)
+price_c <- price / carat
+data_book$add_columns_to_data(data_name="diamonds", col_name="price_c", col_data=price_c, before=FALSE, adjacent_column="carat")
+
+detach(name=diamonds, unload=TRUE)
+detach(name=scalars, unload=TRUE)
+data_book$append_to_variables_metadata(data_name="diamonds", col_names="price_c", property="labels", new_val="")
+rm(list=c("price_c", "diamonds", "scalars"))
+
+
+# Calculating the mean of the price per carat
+
+diamonds <- data_book$get_data_frame(data_name="diamonds", use_current_filter=FALSE)
+attach(what=diamonds)
+scalars <- data_book$get_scalars(data_name="diamonds")
+attach(what=scalars)
+mean_price <- mean(rep( price_c ), na.rm=TRUE)
+mean_price
+data_book$add_scalar(data_name="diamonds", scalar_name="mean_price", scalar_value=mean_price)
+detach(name=diamonds, unload=TRUE)
+detach(name=scalars, unload=TRUE)
+rm(list=c("mean_price", "diamonds", "scalars"))
+
+
+## Righ click and filters in R-Instat
+
+# Right click menu: Remove Current Filter
+data_book$remove_current_filter(data_name="dodoma")
+
+
+## Reshaping data into summaries in R-Instat
+
+# Using the Column Summaries to get the annual rainfalls from daily values
+
+data_book$calculate_summary(data_name="dodoma", columns_to_summarise="rain", factors="year", 
+store_results=TRUE, j=1, summaries=c("summary_count_non_missing", "summary_max", "summary_sum"), silent=TRUE)
+
+
+## The describe menu for a graph
+
+# Create Filter subdialog: Created new filter
+data_book$add_filter(filter=list(C0=list(column="color", operation="%in%", value=c("D","E","F"))), data_name="diamonds", filter_name="filter")
+
+# Data Options subdialog: Set the current filter
+data_book$set_current_filter(data_name="diamonds", filter_name="filter")
+
+# Creating a General graph tp highlight the vast flexibility of our system
+
+diamonds <- data_book$get_data_frame(data_name="diamonds")
+last_graph <- ggplot2::ggplot(data=diamonds, mapping=ggplot2::aes(x=clarity, y=carat, fill=clarity)) + ggplot2::geom_boxplot() + theme_grey() + ggplot2::theme(legend.position="none") + ggplot2::facet_grid(facets=cut ~ color, margins=TRUE, space="fixed")
+data_book$add_object(data_name="diamonds", object_name="last_graph", object_type_label="graph", object_format="image", object=check_graph(graph_object=last_graph))
+data_book$get_object_data(data_name="diamonds", object_name="last_graph", as_file=TRUE)
+rm(list=c("last_graph", "diamonds"))
diff --git a/data/Scripts/Prepare Menu practice r-script.R b/data/Scripts/Prepare Menu practice r-script.R
@@ -0,0 +1,185 @@
+# Prepare menu practice r-script
+
+# Importing the 2013 stats data of garuoua dataset to preview
+# Export the data frame to an Excel file in the user's Documents directory
+# Current working directory
+current_dir <- getwd()
+
+# Construct the desired file path
+file_path <- file.path(dirname(dirname(current_dir)), "Library", "Climatic","Cameroon", "garoua_1999_tr.xlsx") 
+
+# Importing the Dodoma Dataset into R-Instat
+
+X2013Stats <- rio::import(file=file_path, guess_max=Inf, which=1)
+data_book$import_data(data_tables=list(X2013Stats=X2013Stats))
+# Delete the 2013 stats data frame so that it can be imported again in the right format
+
+data_book$delete_dataframes(data_names="X2013Stats")
+
+
+# Importing the garoua_1999_tr dataset into R-Instat with a specified number of rows to import
+current_dir <- getwd()
+
+# Construct the desired file path
+file_path <- file.path(dirname(dirname(current_dir)), "Library", "Climatic","Cameroon", "garoua_1999_tr.xlsx") 
+
+garoua_1999_tr_list <- rio::import_list(file = file_path, guess_max = Inf, na = c("", ""), n_max = 31)
+data_book$import_data(data_tables=garoua_1999_tr_list, data_names=c("X2013.Stats", "X2012.Stats", "X2011.Stats", "X2010.Stats", "X2009.Stats", "X2008.Stats", "X2007.Stats", "X2006.Sats", "X2005.Stats", "X2004.Stats", "X2003.Stats", "X2002.Stats", "X2001.Stats", "X2000.Stats", "X1999.Stats"))
+
+rm(garoua_1999_tr)
+
+
+# Putting all the data sheets together by appending them to achieve a more systematic and reliable way to tidy the data
+
+append <- dplyr::bind_rows(data_book$get_data_frame(data_name=c("X2013.Stats","X2012.Stats","X2011.Stats","X2010.Stats","X2009.Stats","X2008.Stats","X2007.Stats","X2006.Sats","X2005.Stats","X2004.Stats","X2003.Stats","X2002.Stats","X2001.Stats","X2000.Stats","X1999.Stats")), .id="id")
+data_book$import_data(data_tables=list(append=append))
+
+rm(append)
+
+
+# Transforming the text column to extract the year
+
+id <- data_book$get_columns_from_data(data_name="append", col_names="id", use_current_filter=FALSE)
+year <- stringr::str_sub(string=id, start=2, end=5)
+data_book$add_columns_to_data(data_name="append", col_name="year", col_data=year, before=FALSE, adjacent_column="id")
+
+rm(list=c("year", "id"))
+
+
+# Rename the "date" column to day
+data_book$rename_column_in_data(data_name="append", column_name="Date", new_val="day", label="")
+
+
+# Stacking the months to go down rather than across
+
+append <- data_book$get_data_frame(data_name="append")
+append_stacked <- tidyr::pivot_longer(append  %>%  dplyr::select(January, February, March, April, May, June, July, August, September, October, November, December, year, day), cols=c("January","February","March","April","May","June","July","August","September","October","November","December"), names_to="month",  values_to="rain")
+data_book$import_data(data_tables=list(append_stacked=append_stacked))
+
+rm(list=c("append_stacked", "append"))
+
+
+# Making a date variable
+
+date <- data_book$make_date_yearmonthday(data_name="append_stacked", day="day", month="month", year="year", year_format="%Y", month_format="%B")
+data_book$add_columns_to_data(data_name="append_stacked", col_name="date", col_data=date, before=TRUE, adjacent_column="year")
+
+rm(date)
+
+
+# Create Filter subdialog: Created new filter
+data_book$add_filter(filter=list(C0=list(column="date", operation="is.na")), data_name="append_stacked", filter_name="filter")
+
+
+# Filtering the date column to find missing dates
+
+data_book$set_current_filter(data_name="append_stacked", filter_name="filter")
+
+
+# Right click menu: Delete Row(s)
+data_book$remove_rows_in_data(data_name="append_stacked", row_names=c("338","350","362","364","366","369","371","722","734","736","738","741","743","1082","1094","1106","1108","1110","1113","1115","1454","1466","1478","1480","1482","1485","1487","1826","1838","1850","1852","1854","1857","1859","2210","2222","2224","2226","2229","2231","2570","2582","2594","2596","2598","2601","2603","2942","2954","2966","2968","2970","2973","2975","3314","3326","3338","3340","3342","3345","3347","3698","3710","3712","3714","3717","3719","4058","4070","4082","4084","4086","4089","4091","4430","4442","4454","4456","4458","4461","4463","4802","4814","4826","4828","4830","4833","4835","5186","5198","5200","5202","5205","5207","5546","5558","5570","5572","5574","5577","5579"))
+
+
+# Right click menu: Remove Current Filter
+data_book$remove_current_filter(data_name="append_stacked")
+
+
+# Sorting the date column to make it into the right order
+
+data_book$sort_dataframe(data_name="append_stacked", col_names="date")
+
+# Right click menu: Convert Column(s) To Numeric
+data_book$convert_column_to_type(data_name="append_stacked", col_names="year", to_type="numeric", ignore_labels=TRUE)
+
+# Right click menu: Convert Column(s) To Factor
+data_book$convert_column_to_type(data_name="append_stacked", col_names="month", to_type="factor")
+
+
+# Using the One Variable Summarise dialog to check the data
+
+append_stacked <- data_book$get_data_frame(data_name="append_stacked")
+last_summary <- summary(data=append_stacked, object=data_book$get_columns_from_data(data_name="append_stacked", col_names=c("date","year","day","month","rain")), maxsum=12, na.rm=FALSE)
+data_book$add_object(data_name="append_stacked", object_name="last_summary", object_type_label="summary", object_format="text", object=last_summary)
+data_book$get_object_data(data_name="append_stacked", object_name="last_summary", as_file=TRUE)
+rm(list=c("last_summary", "append_stacked"))
+
+
+# Generating a rainday column
+
+rain <- data_book$get_columns_from_data(data_name="append_stacked", col_names="rain", use_current_filter=FALSE)
+rainday <- rain >= 1
+data_book$add_columns_to_data(data_name="append_stacked", col_name="rainday", col_data=rainday, before=FALSE, adjacent_column="rain")
+
+data_book$append_to_variables_metadata(data_name="append_stacked", col_names="rainday", property="labels", new_val="")
+rm(list=c("rainday", "rain"))
+
+
+# Using the Column Summaries dialog to make the annual rainfall summaries
+
+data_book$calculate_summary(data_name="append_stacked", columns_to_summarise=c("rain","rainday"), factors="year", store_results=TRUE, j=1, summaries=c("summary_count_non_missing", "summary_sum"), silent=TRUE)
+
+
+# Using the Column Summaries dialog to make the annual rainfall summaries without the missing values
+
+data_book$calculate_summary(data_name="append_stacked", columns_to_summarise=c("rain","rainday"), factors="year", store_results=TRUE, na.rm=TRUE, na_type=c(), j=1, summaries=c("summary_count_non_missing", "summary_sum"), silent=TRUE)
+
+
+# Using the Column Summaries dialog to find the maximum daily rainfall yearly
+
+data_book$calculate_summary(data_name="append_stacked", columns_to_summarise="rain", factors="year", store_results=TRUE, na.rm=TRUE, na_type=c(), j=1, summaries=c("summary_count_non_missing", "summary_max", "summary_sum"), silent=TRUE)
+
+
+# Using the Column Summaries dialog to find the maximum daily rainfall monthly
+
+data_book$calculate_summary(data_name="append_stacked", columns_to_summarise="rain", store_results=TRUE, factors=c("year","month"), na.rm=TRUE, na_type=c(), j=1, summaries=c("summary_count_non_missing", "summary_max", "summary_sum"), silent=TRUE)
+
+
+# Using the Column Summaries dialog to find the sumof the raindays monthly
+
+data_book$calculate_summary(data_name="append_stacked", columns_to_summarise=c("rain","rainday"), store_results=TRUE, factors=c("year","month"), na.rm=TRUE, na_type=c(), j=1, summaries=c("summary_sum"), silent=TRUE)
+
+
+# Sorting the year column to organize the data by year
+
+data_book$sort_dataframe(data_name="append_stacked_by_year_month", col_names="year")
+
+
+# Save the data for later use
+
+saveRDS(file=file.path(Sys.getenv("USERPROFILE"), "Documents", "garoua.RDS"), object=data_book)
+
+
+# Exporting datasets as Excel files
+
+rio::export(x = data_book$get_data_frame(data_name = "append_stacked"),file = file.path(Sys.getenv("USERPROFILE"), "Documents", "append_stacked.xlsx"))
+rio::export(x = data_book$get_data_frame(data_name = "append_stacked_by_year"),file = file.path(Sys.getenv("USERPROFILE"), "Documents", "append_stacked_by_year.xlsx"))
+rio::export( x = data_book$get_data_frame(data_name="append_stacked_by_year_month"), file = file.path(Sys.getenv("USERPROFILE"), "Documents", "append_stacked_by_year_month.xlsx"))
+
+# Renaming the append_stacked Data Frame
+
+data_book$rename_dataframe(data_name="append_stacked", new_value="garoua", label="")
+
+
+# Renaming the append_stacked_by_year Data Frame
+
+data_book$rename_dataframe(data_name="append_stacked_by_year", new_value="garoua_by_year", label="")
+
+
+# Renaming the append_stacked_by_year_by_month Data Frame
+
+data_book$rename_dataframe(data_name="append_stacked_by_year_month", new_value="garouar_month", label="")
+
+
+# File > Save: save file
+saveRDS(file=file.path(Sys.getenv("USERPROFILE"), "Documents", "garoua.RDS"), object=data_book)
+
+
+
+# Adding a  comment to make a report about the data
+
+data_book$add_new_comment(data_name="garoua_by_year", row="2", column="sum_rainday", comment="Missing value on February 29th in each leap year.")
+
+
+# Adding a  comment to make a report about the data
+
+data_book$add_new_comment(data_name="garoua_by_year", row="8", column="sum_rainday", comment="31st September was wrongly identified as having rainfall and September has 30 days")