diagram_ph/postComputation.py

123 lines
6.4 KiB
Python

import pandas as pd
class DataSegmentProcessor:
def __init__(self, dataframe):
self.df = dataframe
def round_and_deduplicate(self, df):
return df.round(2).drop_duplicates()
def filter_and_deduplicate_all_segments(self, tol):
"""
Filters and deduplicates the DataFrame based on a tolerance value around the maximum and minimum pressure values.
Args:
tol (float): The tolerance value to use for filtering around the maximum and minimum pressure values.
Returns:
Tuple[pandas.DataFrame, pandas.DataFrame, pandas.DataFrame]: A tuple containing three DataFrames:
- max_range: The rows where the pressure is within the tolerance range of the maximum pressure.
- min_range: The rows where the pressure is within the tolerance range of the minimum pressure.
- remaining_df: The remaining rows after removing the max_range and min_range rows from the original DataFrame.
"""
max_pressure = self.df['Pressure'].max()
min_pressure = self.df['Pressure'].min()
max_range = self.df[(self.df['Pressure'] >= max_pressure - tol) & (self.df['Pressure'] <= max_pressure + tol)]
min_range = self.df[(self.df['Pressure'] >= min_pressure - tol) & (self.df['Pressure'] <= min_pressure + tol)]
remaining_df = self.df.drop(max_range.index).drop(min_range.index)
max_range = self.round_and_deduplicate(max_range)
min_range = self.round_and_deduplicate(min_range)
remaining_df = self.round_and_deduplicate(remaining_df)
return max_range, min_range, remaining_df
def split_based_on_pressure_difference(self, final_circ,pressure_diff_threshold):
"""
Splits a given DataFrame into two halves based on a pressure difference threshold.
Args:
final_circ (pandas.DataFrame): The input DataFrame to be split.
pressure_diff_threshold (float): The pressure difference threshold value.
Returns:
Tuple[pandas.DataFrame, pandas.DataFrame]: A tuple containing the lower and upper halves of the input DataFrame.
If the input DataFrame is empty, both halves will be empty DataFrames.
If the input DataFrame has only one row, the lower half will be an empty DataFrame, and the upper half will be the original DataFrame.
"""
if len(final_circ) == 0 :
return pd.DataFrame(),pd.DataFrame()
sorted_df = final_circ.sort_values(by='Pressure')
if len(sorted_df) == 1 :
return pd.DataFrame(),sorted_df
pressure_diff = sorted_df['Pressure'].diff()
split_index = pressure_diff[pressure_diff > pressure_diff_threshold].first_valid_index()
if split_index is not None and not sorted_df.empty:
lower_half = final_circ.loc[:split_index]
upper_half = final_circ.loc[split_index:]
return lower_half, upper_half
return pd.DataFrame()
def sort_and_assign_orders(self, max_range, min_range, upper_half):
# Sorting based on specific criteria
max_range.sort_values(by=['Pressure', 'Enthalpy'], inplace=True, ascending=False)
min_range.sort_values(by=['Enthalpy','Pressure'], inplace=True, ascending=[True, False])
last_upper_order = 1
if len(upper_half) !=0 :
# Assigning order
upper_half.sort_values(by=['Enthalpy','Pressure'], inplace=True, ascending=[True, False])
upper_half['Order'] = range(1, len(upper_half) + 1)
last_upper_order = upper_half['Order'].iloc[-1] if not upper_half.empty else 0
max_range['Order'] = range(last_upper_order + 1, len(max_range) + last_upper_order + 1)
last_max_order = max_range['Order'].iloc[-1]
min_range['Order'] = range(last_max_order + 1, len(min_range) + last_max_order + 1)
else:
max_range['Order'] = range(last_upper_order + 1, len(max_range) + last_upper_order + 1)
last_max_order = max_range['Order'].iloc[-1]
min_range['Order'] = range(last_max_order + 1, len(min_range) + last_max_order + 1)
combined_df = pd.concat([upper_half, max_range, min_range])
# Implement sorting and order assignment
return combined_df
def group_by_enthalpy_and_pressure(self, combined_df):
# Identifier les lignes avec la même enthalpie et une différence de pression > 100 kPa
combined_df['Group'] = None # Initialiser la colonne 'Group'
group_id = 1
# Trier le DataFrame par 'Enthalpy' pour regrouper les valeurs identiques
PHsorted = combined_df.sort_values(by='Enthalpy')
for enthalpy, group in PHsorted.groupby('Enthalpy'):
# Calculer la différence de pression max - min dans le groupe
pressure_diff = group['Pressure'].max() - group['Pressure'].min()
if pressure_diff > 10000 :
# Attribuer un identifiant de groupe unique si la condition est remplie
PHsorted.loc[group.index, 'Group'] = group_id
group_id += 1
for enthalpy, group in PHsorted.groupby('Enthalpy'):
# Calculer la différence de pression max - min dans le groupe
pressure_diff = group['Pressure'].max() - group['Pressure'].min()
if pressure_diff > 10000:
# print(pressure_diff)
# Attribuer un identifiant de groupe unique si la condition est remplie
PHsorted.loc[group.index, 'Group'] = group_id
group_id += 1
PHsorted.sort_values('Order',inplace=True)
PHsorted.at[PHsorted.index[-1], 'Group'] = group_id
quality_dernier_element = PHsorted.at[PHsorted.index[-1], 'Quality']
idx_first_positive_quality = PHsorted[PHsorted['Quality'] > quality_dernier_element].index[0]
PHsorted.at[idx_first_positive_quality, 'Group'] = group_id
return PHsorted
def run(self,pressure_diff_threshold=120e3):
max_range_circ, min_range_circ, final_circ = self.filter_and_deduplicate_all_segments(pressure_diff_threshold)
lower_half, upper_half = self.split_based_on_pressure_difference(final_circ,8000)
combined_df = self.sort_and_assign_orders(max_range_circ, min_range_circ, upper_half)
grouped_df = self.group_by_enthalpy_and_pressure(combined_df)
return grouped_df