import pandas as pd import re import json #refactor the following class class ExcelDataProcessor: def __init__(self, config_path, config_file=True): self.errors = [] # Initialize the error list if config_file: self.config = self.load_configuration(config_path) if self.config: self.dict_df = self.load_data_from_excel() else: self.dict_df = {} def setconfiguration(self, config): try: self.config = config self.dict_df = self.load_data_from_excel() except Exception as e: self.errors.append(f"Failed to set configuration: {e}") def load_configuration(self, config_path): """Load the configuration from a JSON file.""" try: with open(config_path, 'r') as file: return json.load(file) except Exception as e: self.errors.append(f"Failed to load configuration: {e}") return None def load_data_from_excel(self): """Load data from an Excel file based on the configuration.""" try: sheet_names = list(self.config["sheet_names"].values()) excel_data = pd.read_excel( self.config["excel_file_path"], sheet_name=sheet_names, engine='openpyxl', ) return {key: excel_data[value] for key, value in self.config["sheet_names"].items() if value in excel_data} except Exception as e: self.errors.append(f"Failed to load Excel data: {e}") return {} def rename_columns(self,circuit): """Rename columns based on a mapping DataFrame.""" data = self.dict_df.get("data") mapping = self.dict_df.get("mapping") if data is None or mapping is None: self.errors.append("Data or mapping is missing.") return try: rename_dict = { row[circuit]: row['Variable in List'] for _, row in mapping.iterrows() if row[circuit] in data.columns and row[circuit] != row['Variable in List'] } # Applying the renaming renamed_data = data.rename(columns=rename_dict, inplace=False) # Use inplace=False for debugging self.dict_df['data'] = renamed_data # Update the data only if rename is successful except Exception as e: self.errors.append(f"Error renaming columns: {e}") def evaluate_equations(self): """Evaluate equations and apply them to the data, highlighting inconsistencies.""" data = self.dict_df.get("data") equations = self.dict_df.get("equations") if data is None or equations is None: self.errors.append("Data or equations are missing.") return used_columns = set() equation_columns = [] # List to store the names of the columns created by equations # Extracting specific equations based on configuration for _, equation in equations[self.config['processing_rules']['equation_columns'][0]].items(): if isinstance(equation, str): try: variables = re.findall(r'\b[\w\.]+\b', equation) non_numeric_variables = [var for var in variables if not var.isnumeric() and var in data.columns] if all(var in data.columns for var in non_numeric_variables): # Define a function to evaluate the equation: def evaluate_equation(row, eq=equation): # Default parameter captures the equation for use in lambda namespace = row.to_dict() return eval(eq, {}, namespace) # Apply this function to each row of the data DataFrame: data[equation] = data.apply(evaluate_equation, axis=1) equation_columns.append(equation) used_columns.update(non_numeric_variables) else: self.errors.append({"equation": equation, "error": "Contains unknown variables."}) except Exception as e: self.errors.append({"equation": equation, "error": str(e)}) # If there's an error in evaluation, mark the entire column to highlight inconsistencies data[equation] = 'Inconsistent' # Mark all values in this new column as 'Inconsistent' else: self.errors.append({"equation": str(equation), "error": "Not a string."}) equation_columns= equation_columns+ ['Conditions','Units'] data_r = data [equation_columns] data_r.columns = equation_columns return data_r# Return the modified data at the end of the function, not inside the loop def process(self,circuit): """Method to execute the entire processing.""" self.rename_columns(circuit) return self.evaluate_equations() # if self.errors: # print("Errors occurred during processing:") # for error in self.errors: # print(error) # else: # print("Processing completed without errors.") # config_path = r"C:\Users\serameza\impact\EMEA_MBD_GitHub\CheckLabdata\config.json" # processor = ExcelDataProcessor(config_path) # processor.process() # After processing, you can access the processed DataFrame via processor.dict_df["Labdata"] and any errors in processor.errors