Skip to content

fix: Perform the last action with the column name modified. #42

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
5121800
Fixed an error when there were many missing bool columns in the input…
Oct 24, 2023
829c3c2
Merge branch 'main' of https://github.com/tashiro-akira/core
Oct 24, 2023
96ecff3
Merge branch 'sapientml:main' into main
tashiro-akira Oct 27, 2023
d9e4704
Merge branch 'sapientml:main' into main
tashiro-akira Oct 31, 2023
b684d5c
fix:Add action to return column names
Nov 7, 2023
e47bebc
Merge branch 'sapientml:main' into main
tashiro-akira Nov 8, 2023
5572a54
fix:Reflects the findings of the review
Nov 14, 2023
334f690
fix:Fixed an error with mixed target column types.
Nov 20, 2023
5264b21
Merge branch 'sapientml:main' into main
tashiro-akira Nov 21, 2023
83438c0
fix: Return changes to move modifications to another branch.
Nov 21, 2023
8358cee
Merge branch 'sapientml:main' into main
tashiro-akira Dec 7, 2023
acabc47
Merge branch 'sapientml:main' into main
tashiro-akira Dec 11, 2023
5244dae
fix: Merged master modifications.
Dec 11, 2023
2f43a68
fix:Reflected review results
Dec 12, 2023
e678461
style:Removed unnecessary blank lines.
Dec 12, 2023
5254cfe
fix:Fixed to return column names in csv file
Dec 22, 2023
a64f3d3
Merge branch 'sapientml:main' into #8_jinja
tashiro-akira Jan 11, 2024
8381704
fix:Reflected the content of the review
Jan 11, 2024
bfb31c1
Merge branch 'main' into #8_jinja
kimusaku Feb 6, 2024
e1e4644
fix:Save Changes Temporarily
Feb 27, 2024
0a91bba
Merge branch 'sapientml:main' into #8_jinja
tashiro-akira Mar 11, 2024
1619b10
fix:Reflect Modifications
Mar 11, 2024
440db93
Merge branch '#8_jinja' of https://github.com/tashiro-akira/core into…
Mar 11, 2024
1cee451
fix:Reflected review results
Mar 22, 2024
78d1aad
fix:Remove Unnecessary Modifications
Mar 22, 2024
7724cd1
fix:The format has been modified.
Mar 22, 2024
969ded3
fix:Reflect Review Results
tashiro-akira Apr 25, 2024
426a32d
fix:Fixed error in running lint
tashiro-akira Apr 25, 2024
9f2bea0
Merge branch 'main' into #8_jinja
AkiraUra May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions sapientml_core/explain/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
from typing import Literal, Optional

import pandas as pd
from sapientml.params import CancellationToken
from sapientml.util.logging import setup_logger
from sapientml_preprocess.generator import check_cols_has_symbols, remove_symbols

from .AutoEDA import EDA
from .AutoVisualization import AutoVisualization_Class
Expand Down Expand Up @@ -81,12 +83,46 @@ def process(
if visualization:
# Call AutoVisualization to generate visualization codes
AV = AutoVisualization_Class()
visualization_code = AV.AutoVisualization(
df=dataframe,
target_columns=target_columns,
problem_type=problem_type,
ignore_columns=ignore_columns,
)
cols_has_symbols = check_cols_has_symbols(dataframe.columns.to_list())
no_symbol_columns = [col for col in dataframe.columns.values if col not in cols_has_symbols]
rename_dict = {}
if cols_has_symbols:
df = list(
dataframe.rename(columns=lambda col: remove_symbols(col) if col in cols_has_symbols else col).columns
)
rename_dict = {}
same_column = {k: v for k, v in collections.Counter(df).items() if v > 1 and k in no_symbol_columns}
for target, org_column in zip(df, dataframe.columns.tolist()):
if target in same_column.keys():
rename_dict[target + str(same_column[target] - 1)] = org_column
same_column[target] = same_column[target] - 1
else:
rename_dict[target] = org_column

df = list(rename_dict.values())

if len(rename_dict) != 0:
col_has_target = [target for target in rename_dict.keys() if rename_dict.values() == target_columns]
visualization_code = AV.AutoVisualization(
df=dataframe,
target_columns=col_has_target,
problem_type=problem_type,
ignore_columns=ignore_columns,
)
else:
visualization_code = AV.AutoVisualization(
df=dataframe,
target_columns=target_columns,
problem_type=problem_type,
ignore_columns=ignore_columns,
)
else:
visualization_code = AV.AutoVisualization(
df=dataframe,
target_columns=target_columns,
problem_type=problem_type,
ignore_columns=ignore_columns,
)
else:
visualization_code = None

Expand Down
37 changes: 36 additions & 1 deletion sapientml_core/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,43 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
for pipeline in sapientml_results:
pipeline.validation = code_block.validation + pipeline.validation
pipeline.test = code_block.test + pipeline.test
pipeline.train = code_block.train + pipeline.train
pipeline.predict = code_block.predict + pipeline.predict
if "cols_has_symbols" in pipeline.test:
pipeline.test = pipeline.test.replace(
'"feature": feature_train.columns',
'"feature": feature_train.rename(columns=rename_symbol_cols).columns',
)
pipeline.test = pipeline.test.replace(
"prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
)

pipeline.predict = pipeline.predict.replace(
'"feature": feature_train.columns',
'"feature": feature_train.rename(columns=rename_symbol_cols).columns',
)
pipeline.predict = pipeline.predict.replace(
"prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
)

pipeline.validation = pipeline.validation.replace(
'"feature": feature_train.columns',
'"feature": feature_train.rename(columns=rename_symbol_cols).columns',
)
pipeline.validation = pipeline.validation.replace(
"prediction.to_csv", "prediction.rename(columns=rename_symbol_cols).to_csv"
)

def replace_targets(match_obj):
return match_obj[0].replace(
"TARGET_COLUMNS", "[rename_symbol_cols.get(v, v) for v in TARGET_COLUMNS]"
)

pat = r"prediction = pd.DataFrame\(y_prob, columns=.?TARGET_COLUMNS.*, index=feature_test.index\)"
pipeline.test = re.sub(pat, replace_targets, pipeline.test)
pipeline.predict = re.sub(pat, replace_targets, pipeline.predict)
pipeline.validation = re.sub(pat, replace_targets, pipeline.validation)

pipeline.train = code_block.train + pipeline.train
result_pipelines.append(pipeline)

logger.info("Executing generated pipelines...")
Expand Down
Loading