diff --git a/Presentation/Figures/Images/Styling-Line-Graphs/styling_line_graphs_Python.png b/Presentation/Figures/Images/Styling-Line-Graphs/styling_line_graphs_Python.png index 422e2b28..7eb0e741 100644 Binary files a/Presentation/Figures/Images/Styling-Line-Graphs/styling_line_graphs_Python.png and b/Presentation/Figures/Images/Styling-Line-Graphs/styling_line_graphs_Python.png differ diff --git a/Presentation/Figures/styling_line_graphs.md b/Presentation/Figures/styling_line_graphs.md index 7a593c19..3f38c0b7 100644 --- a/Presentation/Figures/styling_line_graphs.md +++ b/Presentation/Figures/styling_line_graphs.md @@ -24,75 +24,63 @@ import pandas as pd import seaborn.objects as so import numpy as np import matplotlib.pyplot as plt +from seaborn import axes_style + + + # Download the economics dataset (from ggplot2 so comparison is apples-to-apples) url = "https://raw.githubusercontent.com/tidyverse/ggplot2/main/data-raw/economics.csv" economics = pd.read_csv(url) -# Disclaimer on seaborn.objects. -""" -seaborn.objects is still under development.. Which means some things are sub-optimal -This includes how it handles datetime x-variables, for example. -It skips far too many years on the xticks, so, a way around that, -is to manually extract just the year value from the date column -and plot that. However, that would mean you have to aggregate per year, which could be -not what you want. -""" + +# Quick manipulation of dataframe to convert column to datetime df = ( economics .assign( - date = lambda df: pd.to_datetime(df['date']), - year = lambda df: df['date'].dt.year - ) - .groupby(by='year') - .aggregate( - Mean_Unemploy = ('unemploy', 'mean'), - Mean_psavert = ('psavert', 'mean'), - Mean_unempmed = ('uempmed', 'mean') + date = lambda df: pd.to_datetime(df['date']) ) - .reset_index() ) -# Basic Plot with the most basic renamings + +# Default plots (Notice the xaxis only has 2 years! We'll fix this in p2) p1 = ( - so.Plot(data=df, x='year', y='Mean_unempmed') + so.Plot(data=df, x='date', y='uempmed') .add(so.Line()) - ) - p1 -## Change line color and chart labels +## Change line color and chart labels, and fix xaxis ## Note here that color is inside of the Line call, so this would color the line. ## If color were instead *inside* the so.Plot() object, SO would assign it -## a different line for each value of the factor variable (column), colored differently. -# Howeever, in our case, we can pass a color directly. +## a different line for each value of the factor variable (column), colored differently. (Commonly referred to as hue in seaborn) +# However, in our case, we can pass a color directly. p2 = ( - so.Plot(data=df, x='year', y='Mean_unempmed') + so.Plot(data=df, x='date', y='uempmed') .add(so.Line(color='purple')) - .label(title='Annual avg of median unemployment duration', x='Year', y='Mean Unemployed') - + .label(title='Median Duration of Unemploymeny', x='Date', y='') + .scale(x=so.Temporal().tick(upto=10)) #Needed for current configuration of seaborn.objects so xaxis prints more than 2 ticks + .theme(axes_style("whitegrid")) #use a function from parent seaborn library, that will pass a prebuilt selection based on what you pass ) p2 - - ## plotting multiple charts (of different line types and sizes) p3 = ( so.Plot(data=df) - .add(so.Line(color='darkblue', linewidth=5), x='year', y='Mean_unempmed') - .add(so.Line(color='red', linewidth=2, linestyle='dotted'), x='year', y='Mean_psavert') - .label(title='Economics', + .add(so.Line(color='darkblue', linewidth=5), x='date', y='uempmed') + .add(so.Line(color='red', linewidth=2, linestyle='dotted'), x='date', y='psavert') + .label(title='Unemployment Duration (Blue)\n & Savings Rate (Red)', x='Year', y='') - + .scale(x=so.Temporal().tick(upto=10)) #Needed for current configuration of seaborn.objects so xaxis prints more than 2 ticks + .theme(axes_style("whitegrid")) #use a function from parent seaborn library, that will pass a prebuilt selection based on what you pass ) p3 @@ -101,23 +89,21 @@ p3 ## Plotting a different line type for each group ## There isn't a natural factor in this data so let's just duplicate the data and make one up df['fac'] = 1 - df2 = df.copy() - df2['fac'] = 2 - -df2['Mean_unempmed'] = df2['Mean_unempmed'] - 2 + np.random.normal(size=len(df2)) - +df2['uempmed'] = df2['uempmed'] - 2 + np.random.normal(size=len(df2)) df_final = pd.concat([df, df2], ignore_index=True).astype({'fac':'category'}) p4 = ( - so.Plot(data=df_final, x='year', y='Mean_unempmed', color='fac') + so.Plot(data=df_final, x='date', y='uempmed', color='fac') .add(so.Line()) .label(title = "Median Duration of Unemployment", x = "Date", y = "", color='Random Factor') + .scale(x=so.Temporal().tick(upto=10)) #Needed for current configuration of seaborn.objects so xaxis prints more than 2 ticks + .theme(axes_style("whitegrid")) #use a function from parent seaborn library, that will pass a prebuilt selection based on what you pass ) p4 @@ -144,6 +130,7 @@ The four plots generated by the code are (in order p1, p2, then p3 and p4): + ## R ```r ## If necessary