-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path03_analyses_tutorial.py
143 lines (122 loc) · 5.66 KB
/
03_analyses_tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import load_markdown_file_combined
import os
import json
def plot_barplot(data, column, ax, distribution_text, count_text):
counts = data[column].value_counts(sort=False)
ax.bar([2, 3, 4], [counts.get(2, 0), counts.get(3, 0), counts.get(4, 0)], alpha=0.7, color='orange')
ax.set_xlabel(column)
ax.set_ylabel(f"{count_text}")
ax.set_title(f"{distribution_text} {column}")
ax.set_xticks([2, 3, 4])
ax.set_ylim(0, 500) # Fixed y-axis
def plot_histogram(data, column, ax, histogram_text, frequency_text,bins=20):
ax.hist(data[column], bins=np.linspace(0, 700, bins + 1), alpha=0.7)
ax.set_xlabel(column)
ax.set_ylabel(f"{frequency_text}")
ax.set_title(f"{histogram_text} {column}")
ax.set_xlim(-30, 700) # Fixed x-axis
ax.set_ylim(0, 190) # Fixed y-axis
def run(selected_language):
# Opening the file with the extras analysis part
extras_file_path = os.path.join('docs', selected_language.lower(), 'analyses/tutorial', 'extras.json')
# Load the extras JSON file
try:
with open(extras_file_path, 'r', encoding='utf-8') as cuts_file:
extras = json.load(cuts_file)
except FileNotFoundError:
st.error(f"Higgs file not found for language: {selected_language}. Check the file path.")
st.stop() # Stop the app if the file is missing
except json.JSONDecodeError:
st.error("Failed to decode the higgs JSON file. Please check its format.")
st.stop() # Stop the app if the JSON is malformed
# Load mock data
data = pd.read_csv("event_dataset.csv")
# Streamlit app
# Introduction to tutorial
load_markdown_file_combined(filename='intro.md',
folder='analyses/tutorial',
language=selected_language)
load_markdown_file_combined(filename='cuts.md',
folder='analyses/tutorial',
language=selected_language)
load_markdown_file_combined(filename='dataset.md',
folder='analyses/tutorial',
language=selected_language)
st.dataframe(data)
# Column info
load_markdown_file_combined(filename='columns.md',
folder='analyses/tutorial',
language=selected_language,
data = data.shape[0])
# Initial visualization
col1, col2 = st.columns(2)
with col1:
fig, ax = plt.subplots()
plot_barplot(data, "nLeptons", ax, extras['distribution'], extras['count'])
st.pyplot(fig)
with col2:
fig, ax = plt.subplots()
plot_histogram(data, "LeadingLeptonEnergy", ax, extras['histogram'], extras['frequency'])
st.pyplot(fig)
# Selection: Filter by number of particles
load_markdown_file_combined(filename='selection_cut.md',
folder='analyses/tutorial',
language=selected_language)
selected_nLeptons = st.multiselect(
"Choose the number of particles to include:",
options=[2, 3, 4],
default=[]
)
filtered_data = data[data["nLeptons"].isin(selected_nLeptons)]
filtered_data.reset_index(inplace=True, drop=True)
load_markdown_file_combined(filename='visualization.md',
folder='analyses/tutorial',
language=selected_language,
filtered_data_size=len(filtered_data),
filtered_data=filtered_data)
col1, col2 = st.columns(2)
with col1:
fig, ax = plt.subplots()
plot_barplot(filtered_data, "nLeptons", ax, extras['distribution'], extras['count'])
st.pyplot(fig)
with col2:
fig, ax = plt.subplots()
plot_histogram(filtered_data, "LeadingLeptonEnergy", ax, extras['histogram'], extras['frequency'])
st.pyplot(fig)
# Range: Filter by energy range
load_markdown_file_combined(filename='range_cut.md',
folder='analyses/tutorial',
language=selected_language,
filtered_data_size=len(filtered_data),
filtered_data=filtered_data)
min_energy, max_energy = st.slider(
"Select energy range (GeV):",
min_value=int(data["LeadingLeptonEnergy"].min()),
max_value=int(data["LeadingLeptonEnergy"].max()),
value=(20, 150)
)
filtered_data = filtered_data[
(filtered_data["LeadingLeptonEnergy"] >= min_energy) & (filtered_data["LeadingLeptonEnergy"] <= max_energy)
]
filtered_data.reset_index(inplace=True, drop=True)
load_markdown_file_combined(filename='visualization_again.md',
folder='analyses/tutorial',
language=selected_language,
filtered_data_size=len(filtered_data),
filtered_data=filtered_data)
col1, col2 = st.columns(2)
with col1:
fig, ax = plt.subplots()
plot_barplot(filtered_data, "nLeptons", ax, extras['distribution'], extras['count'])
st.pyplot(fig)
with col2:
fig, ax = plt.subplots()
plot_histogram(filtered_data, "LeadingLeptonEnergy", ax, extras['histogram'], extras['frequency'])
st.pyplot(fig)
load_markdown_file_combined(filename='summary.md',
folder='analyses/tutorial',
language=selected_language)