Source code for tests.test_pandas_transform

import numpy as np
import pandas as pd
from pandas.testing import assert_frame_equal

from firelink.pandas_transform import (
    Agg,
    Apply,
    Assign,
    Astype,
    Drop_duplicates,
    Fillna,
    Filter,
    Groupby,
    Query,
    Select_dtypes,
)
from firelink.pipeline import FirePipeline


[docs]def test_filter(test_pandas_df): expected = pd.DataFrame( {"a": range(10), "e": [None, "d", "a", "d", "e", "e", "a", "a", "d", "d"]} ) output = Filter(["a", "e"]).fit_transform(test_pandas_df) assert_frame_equal(output, expected)
[docs]def test_drop_duplicates(test_pandas_df): expected = pd.DataFrame({"d": ["a", "n", "d", "f", "g", "h", "j", "q", "w"]}) output = ( Drop_duplicates().fit_transform(test_pandas_df[["d"]]).reset_index(drop=True) ) assert_frame_equal(output, expected)
[docs]def test_select_dtypes(test_pandas_df): expected = pd.DataFrame( { "a": range(10), "b": range(10, 20), "c": range(20, 30), } ) output = Select_dtypes(include=["int64"]).fit_transform(test_pandas_df) assert_frame_equal(output, expected)
[docs]def test_query(test_pandas_df): expected = pd.DataFrame( { "a": range(7, 9), "b": range(17, 19), "c": range(27, 29), "d": ["j", "q"], "e": ["a", "d"], } ) output = ( Query("a>5 and d in ['j', 'q']") .fit_transform(test_pandas_df) .reset_index(drop=True) ) assert_frame_equal(output, expected)
[docs]def test_astype(test_pandas_df): expected = "O" output = Astype("object").fit_transform(test_pandas_df["a"]).dtype assert output == expected
[docs]def test_apply(test_pandas_df): expected = pd.DataFrame( { "index": ["a", "b", "c"], 0: [45, 145, 245], } ) output = ( Apply(np.sum, axis=0) .fit_transform(test_pandas_df[["a", "b", "c"]]) .reset_index(drop=False) ) assert_frame_equal(output, expected)
[docs]def test_groupby(test_pandas_df): expected = pd.DataFrame( {"e": ["a", "d", "e"], "a": [15, 21, 9], "b": [45, 61, 29], "c": [75, 101, 49]} ) output = Groupby("e").fit_transform(test_pandas_df).sum().reset_index(drop=False) assert_frame_equal(output, expected)
[docs]def test_agg(test_pandas_df): expected = pd.DataFrame({"a": 0, "b": 10, "c": 20, "d": ["a"]}, index=["min"]) output = Agg(["min"]).fit_transform(test_pandas_df) assert_frame_equal(output, expected)
[docs]def test_assign(test_pandas_df): expected = pd.DataFrame( {"temp": [32.0, 33.8, 35.6, 37.4, 39.2, 41.0, 42.8, 44.6, 46.4, 48.2]} ) output = Assign({"temp": lambda x: x.a * 9 / 5 + 32}).fit_transform(test_pandas_df)[ ["temp"] ] assert_frame_equal(output, expected)
[docs]def test_fillna(test_pandas_df): expected = pd.DataFrame({"e": [-1, "d", "a", "d", "e", "e", "a", "a", "d", "d"]}) output = Fillna(-1).fit_transform(test_pandas_df)[["e"]] assert_frame_equal(output, expected)