Skip to content

Commit

Permalink
Fixed the xaxis date use (and showing the default plots as not having…
Browse files Browse the repository at this point in the history
… that fix)
  • Loading branch information
RommelArtola committed Aug 20, 2024
1 parent f2ec2de commit 3edc0f2
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 40 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
67 changes: 27 additions & 40 deletions Presentation/Figures/styling_line_graphs.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,75 +24,63 @@ import pandas as pd
import seaborn.objects as so
import numpy as np
import matplotlib.pyplot as plt
from seaborn import axes_style




# Download the economics dataset (from ggplot2 so comparison is apples-to-apples)
url = "https://raw.githubusercontent.com/tidyverse/ggplot2/main/data-raw/economics.csv"
economics = pd.read_csv(url)

# Disclaimer on seaborn.objects.
"""
seaborn.objects is still under development.. Which means some things are sub-optimal
This includes how it handles datetime x-variables, for example.
It skips far too many years on the xticks, so, a way around that,
is to manually extract just the year value from the date column
and plot that. However, that would mean you have to aggregate per year, which could be
not what you want.
"""

# Quick manipulation of dataframe to convert column to datetime
df = (
economics
.assign(
date = lambda df: pd.to_datetime(df['date']),
year = lambda df: df['date'].dt.year
)
.groupby(by='year')
.aggregate(
Mean_Unemploy = ('unemploy', 'mean'),
Mean_psavert = ('psavert', 'mean'),
Mean_unempmed = ('uempmed', 'mean')
date = lambda df: pd.to_datetime(df['date'])
)
.reset_index()
)


# Basic Plot with the most basic renamings

# Default plots (Notice the xaxis only has 2 years! We'll fix this in p2)
p1 = (
so.Plot(data=df, x='year', y='Mean_unempmed')
so.Plot(data=df, x='date', y='uempmed')
.add(so.Line())

)

p1




## Change line color and chart labels
## Change line color and chart labels, and fix xaxis
## Note here that color is inside of the Line call, so this would color the line.
## If color were instead *inside* the so.Plot() object, SO would assign it
## a different line for each value of the factor variable (column), colored differently.
# Howeever, in our case, we can pass a color directly.
## a different line for each value of the factor variable (column), colored differently. (Commonly referred to as hue in seaborn)
# However, in our case, we can pass a color directly.
p2 = (
so.Plot(data=df, x='year', y='Mean_unempmed')
so.Plot(data=df, x='date', y='uempmed')
.add(so.Line(color='purple'))
.label(title='Annual avg of median unemployment duration', x='Year', y='Mean Unemployed')

.label(title='Median Duration of Unemploymeny', x='Date', y='')
.scale(x=so.Temporal().tick(upto=10)) #Needed for current configuration of seaborn.objects so xaxis prints more than 2 ticks
.theme(axes_style("whitegrid")) #use a function from parent seaborn library, that will pass a prebuilt selection based on what you pass
)

p2





## plotting multiple charts (of different line types and sizes)
p3 = (
so.Plot(data=df)
.add(so.Line(color='darkblue', linewidth=5), x='year', y='Mean_unempmed')
.add(so.Line(color='red', linewidth=2, linestyle='dotted'), x='year', y='Mean_psavert')
.label(title='Economics',
.add(so.Line(color='darkblue', linewidth=5), x='date', y='uempmed')
.add(so.Line(color='red', linewidth=2, linestyle='dotted'), x='date', y='psavert')
.label(title='Unemployment Duration (Blue)\n & Savings Rate (Red)',
x='Year',
y='')

.scale(x=so.Temporal().tick(upto=10)) #Needed for current configuration of seaborn.objects so xaxis prints more than 2 ticks
.theme(axes_style("whitegrid")) #use a function from parent seaborn library, that will pass a prebuilt selection based on what you pass
)

p3
Expand All @@ -101,23 +89,21 @@ p3
## Plotting a different line type for each group
## There isn't a natural factor in this data so let's just duplicate the data and make one up
df['fac'] = 1

df2 = df.copy()

df2['fac'] = 2

df2['Mean_unempmed'] = df2['Mean_unempmed'] - 2 + np.random.normal(size=len(df2))

df2['uempmed'] = df2['uempmed'] - 2 + np.random.normal(size=len(df2))
df_final = pd.concat([df, df2], ignore_index=True).astype({'fac':'category'})


p4 = (
so.Plot(data=df_final, x='year', y='Mean_unempmed', color='fac')
so.Plot(data=df_final, x='date', y='uempmed', color='fac')
.add(so.Line())
.label(title = "Median Duration of Unemployment",
x = "Date",
y = "",
color='Random Factor')
.scale(x=so.Temporal().tick(upto=10)) #Needed for current configuration of seaborn.objects so xaxis prints more than 2 ticks
.theme(axes_style("whitegrid")) #use a function from parent seaborn library, that will pass a prebuilt selection based on what you pass
)

p4
Expand All @@ -144,6 +130,7 @@ The four plots generated by the code are (in order p1, p2, then p3 and p4):




## R
```r
## If necessary
Expand Down

0 comments on commit 3edc0f2

Please sign in to comment.