new imgs

eunomia-bpf · Sep 17, 2024 · bb901f9 · bb901f9
1 parent ed9f0c1
commit bb901f9
Show file tree

Hide file tree

Showing 32 changed files with 310,017 additions and 163 deletions.
diff --git a/README.md b/README.md
@@ -11,7 +11,6 @@ Code-Survey helps you `explore` and `analyze`. the world's largest and most intr
 
 With the power of AI and Large Language Models (LLMs), you can ask questions, run queries, and gain a deeper understanding of how systems evolve over time. AI Agents can also help you analysis that. Whether you're a developer, researcher, or enthusiast, Code-Survey bridges the gap between `design`, `implementation`, `maintenance`, `reliability` and `security`, making complex systems more accessible.
 
-
 Unlike other approaches:
 
 - **No human can do that before, but AI can.**

diff --git a/analysis/bpf/pie_commits.py b/analysis/bpf/pie_commits.py
@@ -21,7 +21,7 @@ def plot_commit_classification_pie():
     truncated_labels = [label[:20] + '...' if len(label) > 10 else label for label in value_counts.index]
 
     # Plot the pie chart
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(7, 5))
     ax.pie(value_counts, labels=truncated_labels, autopct='%1.1f%%', startangle=90)
     ax.set_title('Commit Classification', fontsize=12)
 
@@ -43,7 +43,7 @@ def plot_commit_complexity_pie():
     truncated_labels = [label[:20] + '...' if len(label) > 10 else label for label in value_counts.index]
 
     # Plot the pie chart
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(7, 5))
     ax.pie(value_counts, labels=truncated_labels, autopct='%1.1f%%', startangle=90)
     ax.set_title('Commit Complexity', fontsize=12)
 
@@ -65,7 +65,7 @@ def plot_implementation_component_pie():
     truncated_labels = [label[:20] + '...' if len(label) > 10 else label for label in value_counts.index]
 
     # Plot the pie chart
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(7, 5))
     ax.pie(value_counts, labels=truncated_labels, autopct='%1.1f%%', startangle=90)
     ax.set_title('Major Implementation Component', fontsize=12)
 
@@ -87,7 +87,7 @@ def plot_logic_component_pie():
     truncated_labels = [label[:20] + '...' if len(label) > 10 else label for label in value_counts.index]
 
     # Plot the pie chart
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(7, 5))
     ax.pie(value_counts, labels=truncated_labels, autopct='%1.1f%%', startangle=90)
     ax.set_title('Major Logic Component', fontsize=12)
 
@@ -135,7 +135,7 @@ def plot_usecases_or_submodule_pie():
     truncated_labels = [label[:20] + '...' if len(label) > 10 else label for label in value_counts.index]
 
     # Plot the pie chart
-    fig, ax = plt.subplots(figsize=(8, 8))
+    fig, ax = plt.subplots(figsize=(7, 5))
     ax.pie(value_counts, labels=truncated_labels, autopct='%1.1f%%', startangle=90)
     ax.set_title('Use Cases or Submodule Events', fontsize=12)
 

diff --git a/analysis/bpf/timeline_commits.py → analysis/bpf/timeline_commits_3m.py b/analysis/bpf/timeline_commits.py → analysis/bpf/timeline_commits_3m.py
@@ -70,14 +70,14 @@ def get_significant_categories(series, max_labels, threshold=0.01):
     """
     # Calculate frequency proportion
     freq = series.value_counts(normalize=True)
-    
+
     # Determine categories above the threshold
     significant = freq[freq >= threshold].index.tolist()
-    
+
     # If significant categories exceed max_labels, take the top max_labels -1 and add 'Other'
     if len(significant) > (max_labels - 1):
         significant = freq.nlargest(max_labels - 1).index.tolist()
-    
+
     return significant
 
 # Function to plot frequency-based timeline charts for categorical fields
@@ -93,17 +93,18 @@ def plot_frequency_timeline(field_name, title, max_labels, threshold, save_path)
     - save_path: File path to save the chart.
     """
     print(f"\nGenerating timeline for: {title}")
-
-    # Group by month and category, count commits
-    monthly_counts = filtered_data.resample('M')[field_name].value_counts().unstack(fill_value=0)
-
+
+    # Group by 3-month intervals and category, count commits
+    # '3M' stands for 3-month frequency; alternatively, you can use 'Q' for quarters
+    monthly_counts = filtered_data.resample('3M')[field_name].value_counts().unstack(fill_value=0)
+
     # Determine significant categories
     if field_name == 'usecases_or_submodule_events':
         data_series = flattened_usecases
     else:
         data_series = filtered_data[field_name]
     significant_categories = get_significant_categories(data_series, max_labels, threshold)
-    
+
     # If 'Other' needs to be added, sum the non-significant categories
     if len(significant_categories) < len(monthly_counts.columns):
         # Ensure 'Other' is not already a category
@@ -114,28 +115,32 @@ def plot_frequency_timeline(field_name, title, max_labels, threshold, save_path)
             monthly_counts['Other'] += monthly_counts.drop(significant_categories, axis=1).sum(axis=1)
         # Keep only significant categories and 'Other'
         monthly_counts = monthly_counts[significant_categories + ['Other']]
-    
+
     # Plotting
-    fig, ax = plt.subplots(figsize=(14, 14))
-    
+    fig, ax = plt.subplots(figsize=(14, 8))
+
     # Plot each category
     for column in monthly_counts.columns:
         ax.plot(monthly_counts.index, monthly_counts[column], label=column)
-    
+
     # Formatting the x-axis with date labels
     ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
     plt.xticks(rotation=45)
-    
+
     ax.set_title(title, fontsize=16)
-    ax.set_xlabel('Time (Monthly)', fontsize=14)
+    ax.set_xlabel('Time (3-Month Intervals)', fontsize=14)
     ax.set_ylabel('Number of Commits', fontsize=14)
-    ax.legend(loc='upper left', bbox_to_anchor=(1,1))  # Place legend outside the plot
-
-    # plt.tight_layout()
+
+    truncated_labels = [label[:20] + '...' if len(label) > 20 else label for label in monthly_counts.columns]
+
+    ax.legend(truncated_labels, loc='upper left', bbox_to_anchor=(1,1))  # Place legend outside the plot
+
+    plt.tight_layout()
     plt.savefig(save_path)
     plt.close()
-    
+
     print(f"Saved timeline chart to {save_path}")
+
 # Function to plot timeline for use cases or submodule events
 def plot_usecases_timeline(title, save_path, max_labels=8, threshold=0.005):
     """
@@ -148,7 +153,7 @@ def plot_usecases_timeline(title, save_path, max_labels=8, threshold=0.005):
     - threshold: Minimum frequency proportion to consider as significant.
     """
     print(f"\nGenerating timeline for: {title}")
-    
+
     # Explode the 'parsed_usecases' lists into separate rows
     exploded_data = filtered_data.explode('parsed_usecases')
 
@@ -159,12 +164,12 @@ def plot_usecases_timeline(title, save_path, max_labels=8, threshold=0.005):
     # Remove NaN entries
     exploded_data = exploded_data.dropna(subset=['parsed_usecases'])
 
-    # Group by month and use case, count commits
-    monthly_counts = exploded_data.resample('ME')['parsed_usecases'].value_counts().unstack(fill_value=0)
-    
+    # Group by 3-month intervals and use case, count commits
+    monthly_counts = exploded_data.resample('3M')['parsed_usecases'].value_counts().unstack(fill_value=0)
+
     # Determine significant categories
     significant_categories = get_significant_categories(flattened_usecases, max_labels, threshold)
-    
+
     # Only drop categories that exist in the DataFrame
     columns_to_drop = [col for col in monthly_counts.columns if col not in significant_categories]
 
@@ -173,32 +178,32 @@ def plot_usecases_timeline(title, save_path, max_labels=8, threshold=0.005):
         # Add 'Other' column as the sum of non-significant categories
         monthly_counts['Other'] = monthly_counts[columns_to_drop].sum(axis=1)
         monthly_counts = monthly_counts.drop(columns=columns_to_drop, axis=1)
-    
+
     # Plotting
-    fig, ax = plt.subplots(figsize=(14, 14))
-    
+    fig, ax = plt.subplots(figsize=(14, 8))
+
     # Plot each category
     for column in monthly_counts.columns:
         ax.plot(monthly_counts.index, monthly_counts[column], label=column)
-    
+
     # Formatting the x-axis with date labels
     ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
     plt.xticks(rotation=45)
-    
+
     ax.set_title(title, fontsize=16)
-    ax.set_xlabel('Time (Monthly)', fontsize=14)
+    ax.set_xlabel('Time (3-Month Intervals)', fontsize=14)
     ax.set_ylabel('Number of Commits', fontsize=14)
-    
+
     # Truncate long labels for the legend
     truncated_labels = [label[:20] + '...' if len(label) > 20 else label for label in monthly_counts.columns]
 
     # Add the legend with truncated labels
     ax.legend(truncated_labels, loc='upper left', bbox_to_anchor=(1, 1))  # Place legend outside the plot
-    
+
     plt.tight_layout()
     plt.savefig(save_path)
     plt.close()
-    
+
     print(f"Saved timeline chart to {save_path}")
 
 # Define thresholds and max_labels per field
@@ -224,7 +229,7 @@ def plot_usecases_timeline(title, save_path, max_labels=8, threshold=0.005):
 plot_usecases_timeline(
     title='Use Cases or Submodule Events Over Time',
     save_path='imgs/timeline_usecases_or_submodule_events.png',
-    max_labels=10,
+    max_labels=12,
     threshold=0.005  # Adjusted threshold for more use cases
 )
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,7 +11,6 @@ Code-Survey helps you `explore` and `analyze`. the world's largest and most intr

		With the power of AI and Large Language Models (LLMs), you can ask questions, run queries, and gain a deeper understanding of how systems evolve over time. AI Agents can also help you analysis that. Whether you're a developer, researcher, or enthusiast, Code-Survey bridges the gap between `design`, `implementation`, `maintenance`, `reliability` and `security`, making complex systems more accessible.


		Unlike other approaches:

		- No human can do that before, but AI can.
Expand Down