openclimatefix · rachel-labri-tipton · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/examples/india_box_plots.py b/examples/india_box_plots.py
@@ -0,0 +1,120 @@
+"""Box plots of max cumulative energy generation per system"""
+import h5py
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+
+# load hdf file with the generation data for each system
+pv_systems_hdf = os.environ.get("PV_DATA_HDF")
+
+systems_with_data = [
+    "56151",
+    "56709",
+    "58780",
+    "59687",
+    "59710",
+    "60294",
+    "60602",
+    "60673",
+    "66634",
+    "67861",
+    "71120",
+    "72742",
+    "73347",
+    "77684",
+    "77710",
+    "78186",
+    "79612",
+    "81408",
+    "82081",
+    "85738",
+    "86244",
+    "87410",
+    "90559",
+    "91554",
+    "97094",
+    "99833",
+]
+
+
+pv_systems = []
+
+with h5py.File(pv_systems_hdf, "r") as f:
+    for system_id in systems_with_data:
+        df_pv_system = pd.DataFrame(np.array(f["timeseries"][system_id]["table"]))
+        df_pv_system["index"] = pd.to_datetime(df_pv_system["index"], unit="ns")
+        df_pv_system = df_pv_system.groupby(pd.Grouper(key="index", freq="D")).max()
+        df_pv_system["System ID"] = system_id
+        df_pv_system = pd.DataFrame(df_pv_system, columns=["cumulative_energy_gen_Wh", "System ID"])
+        df_pv_system["cumulative_energy_gen_kWh"] = (
+            df_pv_system["cumulative_energy_gen_Wh"] / 1000
+        ).astype(float)
+        pv_systems.append(df_pv_system)
+
+
+fig = go.Figure()
+for pv_system in pv_systems:
+    fig.add_trace(
+        go.Box(
+            y=pv_system["cumulative_energy_gen_kWh"],
+            x=pv_system["System ID"],
+            name=pv_system["System ID"][0],
+            boxpoints="suspectedoutliers",
+            jitter=0.5,
+            whiskerwidth=0.2,
+            fillcolor="rgba(93, 164, 214, 0.5)",
+            marker_size=2,
+            line_width=1,
+        )
+    )
+    fig.update_layout(
+        title="Daily max values of cumulative energy generation per system",
+        yaxis=dict(
+            autorange=True,
+            showgrid=True,
+            zeroline=True,
+            gridcolor="rgb(255, 255, 255)",
+            gridwidth=1,
+            zerolinecolor="rgb(255, 255, 255)",
+            zerolinewidth=2,
+        ),
+    ),
+
+    y_data = df_pv_system["cumulative_energy_gen_kWh"]
+    x_data = df_pv_system["System ID"]
+
+    colors = [
+        "rgba(93, 164, 214, 0.5)",
+        "rgba(255, 144, 14, 0.5)",
+        "rgba(44, 160, 101, 0.5)",
+        "rgba(255, 65, 54, 0.5)",
+        "rgba(207, 114, 255, 0.5)",
+        "rgba(127, 96, 0, 0.5)",
+    ]
+
+    margin = (
+        dict(
+            l=40,
+            r=30,
+            b=80,
+            t=100,
+        ),
+    )
+    paper_bgcolor = ("rgb(243, 243, 243)",)
+    plot_bgcolor = ("rgb(243, 243, 243)",)
+    showlegend = (False,)
+    title = ("Daily Max Production per System for India",)
+    xaxis = (
+        dict(
+            autorange=True,
+            showgrid=True,
+            zeroline=True,
+            dtick=5,
+            gridcolor="rgb(255, 255, 255)",
+            gridwidth=1,
+            zerolinecolor="rgb(255, 255, 255)",
+            zerolinewidth=2,
+        ),
+    )
+
+fig.show()
diff --git a/examples/india_chart_per_system.py b/examples/india_chart_per_system.py
@@ -0,0 +1,62 @@
+"""Chart generation for a single system. This shows the cumulative energy
+generation or instantaneous power generation over time. Some of the commented out code can be used to filter the data by date or plot max, mean, min, median values, etc.
+"""
+import os
+
+import h5py
+import numpy as np
+import pandas as pd
+import plotly.express as px
+
+# load hdf file with the generation data for each system
+pv_systems_hdf = os.environ.get("PV_DATA_HDF")
+
+# this plots one system as a line chart in plotly
+# plot the data as line graph in plotly
+
+# can choose a system id to input here
+system_id = "SYSTEM_ID"
+# read the hdf file and get the data for the system id
+with h5py.File(pv_systems_hdf, "r") as f:
+    pv_system_data = pd.DataFrame(np.array(f["timeseries"][system_id]["table"]))
+    pv_system_data["index"] = pd.to_datetime(pv_system_data["index"], unit="ns")
+    pv_system_data = pv_system_data.set_index("index", inplace=False)
+    # this code can be used to get the mean weekly ("W") or monthly ("M")
+    # production per system or the max daily production ("D")
+    # pv_system_data = pv_system_data.groupby(pd.Grouper(key="index", freq="M").mean
+
+    pv_system_data["System ID"] = system_id
+    pv_system_data = pd.DataFrame(
+        pv_system_data,
+        columns=[
+            "index",
+            "cumulative_energy_gen_Wh",
+            "System ID",
+            "instantaneous_power_gen_W",
+        ],
+    )
+    # filter the data by date
+    # pv_system_data = pv_system_data.loc[
+    #     pv_system_data.index > pd.Timestamp("2019-05-01 00:00:00")
+    # ]
+    # pv_system_data = pv_system_data.loc[
+    #     pv_system_data.index < pd.Timestamp("2019-07-01 00:00:00")
+    # ]
+
+    pv_system_data["cumulative_energy_gen_kWh"] = (
+        pv_system_data["cumulative_energy_gen_Wh"] / 1000
+    ).astype(float)
+
+    # plot the data as a line chart
+    fig = px.line(
+        pv_system_data,
+        x=pv_system_data.index,
+        # here you can swap between cumulative energy generation and instantaneous
+        # power generation on the y axis
+        y=pv_system_data["cumulative_energy_gen_kWh"],
+        title=f"Generation for System ID: {system_id}",
+    )
+    fig.update_layout(
+        xaxis_title="Time",
+    )
+    fig.show()
diff --git a/examples/india_gantt_chart.py b/examples/india_gantt_chart.py
@@ -0,0 +1,82 @@
+"""Gantt chart for India PV systems. This shows where there are gaps in the data."""
+import os
+
+import h5py
+import numpy as np
+import pandas as pd
+import plotly.express as px
+
+# load hdf file with the generation data for each system
+pv_data_hdf = os.environ.get("PV_DATA_HDF")
+
+# these are the current systems with some data in the hdf file for india
+systems_with_data = [
+    "56151",
+    "56709",
+    "58780",
+    "59687",
+    "59710",
+    "60294",
+    "60602",
+    "66634",
+    "71120",
+    "72742",
+    "73347",
+    "77684",
+    "77710",
+    "78186",
+    "79612",
+    "81408",
+    "82081",
+    "85738",
+    "86244",
+    "87410",
+    "90559",
+    "91554",
+    "97094",
+    "99833",
+    "100451",
+]
+
+
+pv_systems = []
+
+# read the hdf file and get start and end dates of available data per site
+with h5py.File(pv_data_hdf, "r") as f:
+    # loop through each pv system in the hdf file
+    for system_id in systems_with_data:
+        df = pd.DataFrame(np.array(f["timeseries"][system_id]["table"]))
+        df["index"] = pd.to_datetime(df["index"], unit="ns")
+        df = df[df["index"] > pd.Timestamp("2018-01-01")]
+        # set a value for the end date otherwise it registers as NaT
+        end_date = df["index"].iloc[-1]
+        df["index_difference"] = df["index"].diff()
+        # get startpoints of gaps in the data
+        df = df[df["index_difference"] > pd.Timedelta("1D")]
+        # get endpoints of gaps by looking at the difference between indexes
+        df["previous_endpoint"] = df["index"] - df["index_difference"]
+        df["endpoints"] = df["previous_endpoint"].shift(-1)
+        # set the last endpoint to the end date otherwise it registers as NaT
+        if len(df["endpoints"]) > 0:
+            df["endpoints"].iloc[-1] = end_date
+
+        # make a dictionary for the gantt chart to plot
+        # loop over the start and end dates and add to start_end_data dictionary
+        for index, row in df.iterrows():
+            start_end_data = {}
+            start_end_data["System ID"] = system_id
+            start_end_data["Start"] = row["index"]
+            start_end_data["Finish"] = row["endpoints"]
+
+            pv_systems.append(start_end_data)
+
+    # plot the data as gantt chart in plotly
+    fig = px.timeline(
+        pv_systems,
+        x_start="Start",
+        x_end="Finish",
+        y="System ID",
+        color="System ID",
+        title="Gantt Chart of PV Systems in India",
+    )
+    fig.show()
diff --git a/examples/india_map.py b/examples/india_map.py
@@ -0,0 +1,27 @@
+""" Example of plotting PVOutput India system locations on a map."""
+import pandas as pd
+import plotly.express as px
+
+# load csv file with system metadata
+pv_system_metadata = "./examples/pv_data/PVOutput_India_systems.csv"
+# pv_system_metadata = os.environ.get("PV_SYSTEM_FILE")
+pv_system_metadata = pd.read_csv(pv_system_metadata)
+pv_systems_lat_lon = pd.DataFrame(
+    pv_system_metadata, columns=["system_id", "latitude", "longitude", "system_size_W"]
+)
+# remove systems that don't have a lat/lon coordinate
+if pv_systems_lat_lon["latitude"].isnull().values.any():
+    pv_systems_lat_lon = pv_systems_lat_lon.dropna()
+
+
+fig = px.scatter_geo(
+    pv_systems_lat_lon,
+    lat="latitude",
+    lon="longitude",
+    size="system_size_W",
+    color="system_size_W",
+    hover_name="system_id",
+    scope="asia",
+    title="PVOutput India System Locations",
+)
+fig.show()
diff --git a/examples/india_mean_production.py b/examples/india_mean_production.py
@@ -0,0 +1,103 @@
+"""Mean Production per System by time interval (month, week,etc.)
+This example shows the mean daily production per system by month.
+This makes a plot with 2 columns and half as many rows as there are systems with data.
+"""
+import os
+
+import h5py
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+# load hdf file with the generation data for each system
+pv_data_hdf_file = os.environ.get("SYSTEM_DATA")
+
+# these are the current systems with data in the hdf file for India
+systems_with_data = [
+    "56151",
+    "56709",
+    "58780",
+    "59687",
+    "59710",
+    "60294",
+    "60602",
+    "60673",
+    "66634",
+    "67861",
+    "71120",
+    "72742",
+    "73347",
+    "77684",
+    "77710",
+    "78186",
+    "79612",
+    "81408",
+    "82081",
+    "85738",
+    "86244",
+    "87410",
+    "90559",
+    "91554",
+    "97094",
+    "99833",
+]
+
+
+# for the subplot titles, this function is used to get the row number
+def row(row):
+    for row in range(0, len(pv_systems)):
+        if i == 1:
+            row = 1
+        elif i % 2 == 0:
+            row = int(i / 2)
+        else:
+            row = int((i + 1) / 2)
+    return row
+
+
+pv_systems = []
+# read the hdf file
+with h5py.File(pv_data_hdf_file, "r") as f:
+    # loop through each pv system in the hdf file. some of the lines are commented out but can
+    # be used to filter the data by date or to get the mean weekly production per system
+    for system_id in systems_with_data:
+        df = pd.DataFrame(np.array(f["timeseries"][system_id]["table"]))
+        df["index"] = pd.to_datetime(df["index"], unit="ns")
+        # df["index"] = df[df["index"] > pd.Timestamp("2019-01-01")]
+        df_pv_system = df.groupby(pd.Grouper(key="index", freq="M")).mean()
+        df_pv_system["System ID"] = system_id
+        df_pv_system = pd.DataFrame(df_pv_system, columns=["cumulative_energy_gen_Wh", "System ID"])
+        # convert Wh to kWh
+        df_pv_system["cumulative_energy_gen_kWh"] = (
+            df_pv_system["cumulative_energy_gen_Wh"] / 1000
+        ).astype(float)
+        pv_systems.append(df_pv_system)
+    i = 1
+    # make the plot with subplots
+    fig = make_subplots(
+        rows=len(pv_systems),
+        cols=2,
+        shared_xaxes=False,
+        horizontal_spacing=0.2,
+        vertical_spacing=0.02,
+        subplot_titles=[system_id for system_id in systems_with_data],
+    )
+    # loop through each system and add a line to the subplot
+    for i in range(1, len(pv_systems)):
+        if len(pv_systems[i - 1]) > 0:
+            fig.add_trace(
+                go.Scatter(
+                    x=pv_systems[i - 1].index,
+                    y=pv_systems[i - 1]["cumulative_energy_gen_Wh"],
+                    name=pv_systems[i - 1]["System ID"][0],
+                    mode="lines",
+                ),
+                row=row(i),
+                col=[2 if i % 2 == 0 else 1],
+            )
+        i += 1
+    fig.update_yaxes(title_text="kWh")
+    fig.update_layout(height=3000, width=750, title_text="Mean Monthly Production per System")
+    fig.update_annotations(font_size=12)
+    fig.show()