Skip to content

helper

visualize_dataframe(ax, df, xticks=None, group_by='degree', threshold=0.5, sort_by=np.nanmax, cmap='tab10', x_offset=0, min_color_coef=0.5, fontsize=12, step_linewidth=0.5, bottom_min=None, zero_offset=True)

Visualizes IPC results stored in a DataFrame using a bar plot.

Parameters:

Name Type Description Default
ax Axes

Matplotlib Axes object to plot on.

required
df DataFrame

polars.DataFrame containing IPC results.

required
xticks list | ndarray | None

X-axis tick positions. If None, default positions are used.

None
group_by str

Grouping method for IPC components. Choose from 'degree', 'component', or 'detail'.

'degree'
threshold float

Threshold for displaying IPC components. Components with values below this threshold are grouped into 'rest'.

0.5
sort_by callable

Function to sort IPC components.

nanmax
cmap str | Colormap

Colormap for coloring IPC components.

'tab10'
x_offset float

Horizontal offset for the x-axis.

0
min_color_coef float

Minimum color coefficient for coloring. Only used when group_by is 'component' or 'detail'.

0.5
fontsize int

Font size for labels.

12
step_linewidth float

Line width for step lines. If 0, no lines are drawn.

0.5
bottom_min ndarray | None

Minimum bottom values for bars.

None
zero_offset bool

Whether the delay offset starts from zero.

True
Notes

group_by determines how IPC components are grouped and colored:

  • 'degree': Groups by sum of degrees (e.g., 3 for (3,), (2, 1), (1, 1, 1)).
  • 'component': Groups by tuple of degrees (e.g., (3,), (2, 1), (1, 1, 1) are distinct).
  • 'detail': Groups by tuple of degrees and delays.

Since the number of unique components can grow rapidly, using 'component' or 'detail' may result in many distinct colors, making it time-consuming to render. Especially for 'detail', consider setting a higher threshold to limit the number of displayed components (e.g., threshold=1.0). Use a positive threshold value to group less significant components into a rest category.

Source code in src/ipc_module/helper.py
def visualize_dataframe(
    ax: Axes,
    df: pl.DataFrame,
    xticks: list | np.ndarray | None = None,
    group_by: str = "degree",
    threshold: float = 0.5,
    sort_by: callable = np.nanmax,
    cmap: str | plt.Colormap = "tab10",
    x_offset: float = 0,
    min_color_coef: float = 0.5,
    fontsize: int = 12,
    step_linewidth: float = 0.5,
    bottom_min: np.ndarray | None = None,
    zero_offset: bool = True,
):
    """

    Visualizes IPC results stored in a DataFrame using a bar plot.

    Parameters:
        ax (Axes): Matplotlib Axes object to plot on.
        df (pl.DataFrame): `polars.DataFrame` containing IPC results.
        xticks (list | np.ndarray | None, optional): X-axis tick positions. If `None`, default positions are used.
        group_by (str, optional): Grouping method for IPC components. Choose from 'degree', 'component', or 'detail'.
        threshold (float, optional): Threshold for displaying IPC components. Components with values below this threshold are grouped into 'rest'.
        sort_by (callable, optional): Function to sort IPC components.
        cmap (str | plt.Colormap, optional): Colormap for coloring IPC components.
        x_offset (float, optional): Horizontal offset for the x-axis.
        min_color_coef (float, optional): Minimum color coefficient for coloring. Only used when `group_by` is 'component' or 'detail'.
        fontsize (int, optional): Font size for labels.
        step_linewidth (float, optional): Line width for step lines. If 0, no lines are drawn.
        bottom_min (np.ndarray | None, optional): Minimum bottom values for bars.
        zero_offset (bool, optional): Whether the delay offset starts from zero.

    Notes:
        `group_by` determines how IPC components are grouped and colored:

        - `'degree'`: Groups by sum of degrees (e.g., `3` for `(3,)`, `(2, 1)`, `(1, 1, 1)`).
        - `'component'`: Groups by tuple of degrees (e.g., `(3,)`, `(2, 1)`, `(1, 1, 1)` are distinct).
        - `'detail'`: Groups by tuple of degrees and delays.

        Since the number of unique components can grow rapidly, using `'component'` or `'detail'` may result in many distinct colors, making it time-consuming to render.
        Especially for `'detail'`, consider setting a higher threshold to limit the number of displayed components (e.g., `threshold=1.0`).
        Use a positive `threshold` value to group less significant components into a `rest` category.
    """

    ipc_columns = [column for column in df.columns if "ipc" in column]
    assert group_by in ["degree", "component", "detail"], "invalid `group_by` argments"
    col_cmp = sorted([column for column in df.columns if column.startswith("cmp")])
    col_del = sorted([column for column in df.columns if column.startswith("del")])
    group_by_columns = dict(degree=["degree"], component=col_cmp, detail=col_cmp + col_del)
    if type(cmap) is str:
        cmap = plt.get_cmap(cmap)

    def shape_segment(segment, get_delay=False):
        if group_by == "degree":
            return tuple(segment)
        elif group_by == "component":
            return tuple(val for val in segment if val >= 0)
        elif group_by == "detail":
            degrees = tuple(val for val in segment[: len(segment) // 2] if val >= 0)
            if get_delay:
                delays = tuple(val for val in segment[len(segment) // 2 :] if val >= 0)
                return degrees, delays
            else:
                return degrees

    def get_color_index(segment):
        if group_by == "degree":
            return segment[0], 0, 1
        elif group_by == "component":
            degrees = shape_segment(segment)
            degree = sum(degrees)
            degree_list = make_degree_list(degree)
            index = dict(zip(degree_list[::-1], range(len(degree_list)), strict=False))[degrees]
            max_index = len(degree_list)
            return degree, index, max_index
        elif group_by == "detail":
            degrees, delays = shape_segment(segment, get_delay=True)
            degree = sum(degrees)
            degree_list = make_degree_list(degree)
            index = dict(zip(degree_list[::-1], range(len(degree_list)), strict=False))[degrees]
            max_index = len(degree_list)
            return degree, index + max(0, 1 - 0.9 ** (max(delays) - (not zero_offset))), max_index

    def color_func(segment):
        white = np.ones(4)
        degree, index, max_index = get_color_index(segment)
        coef = (index / max_index) * min_color_coef
        out = np.array(cmap(degree - 1))
        out = (1 - coef) * out + coef * white
        return out

    def label_func(segment):
        if group_by == "degree":
            return str(segment[0])
        elif group_by == "component":
            out_str = str(shape_segment(segment))
            return out_str.replace("(", "{").replace(",)", "}").replace(")", "}")
        elif group_by == "detail":
            degrees, delays = shape_segment(segment, get_delay=True)
            out_str = str(tuple(zip(degrees, delays, strict=False)))
            return out_str.replace("(", "{").replace(",)", "}").replace(")", "}")

    def hatch_func(segment):
        hatches = ["//", "\\\\", "||", "--", "++", "xx", "oo", "OO", "..", "**"]
        if group_by == "degree":
            return None
        elif group_by == "component":
            return None
        elif group_by == "detail":
            _degrees, delays = shape_segment(segment, get_delay=True)
            return hatches[(max(delays) - (not zero_offset)) % len(hatches)]

    def sort_func(arg):
        segment, val = arg
        if sort_by(val) > threshold:
            if group_by == "degree":
                return segment
            elif group_by == "component":
                degrees = shape_segment(segment)
                return (sum(degrees), *(-d for d in degrees))
            elif group_by == "detail":
                degrees = shape_segment(segment)
                return (
                    sum(degrees),
                    *(-s for s in segment[: (len(segment) // 2)]),
                    *segment[(len(segment) // 2) :],
                )
        else:
            return (np.inf,)

    # Aggregation process.
    out = defaultdict(list)
    segments = df[group_by_columns[group_by]].unique()
    for column in ipc_columns:
        df_agg = df.group_by(group_by_columns[group_by]).agg(pl.col(column).sum())
        for segment in segments.iter_rows():
            out[segment].append(0)
        for *segment, val in df_agg.iter_rows():
            out[tuple(segment)][-1] = val

    # Visualization process.
    bottom, rest, legend_cnt = 0.0, 0.0, 1
    if xticks is None:
        pos = x_offset + np.arange(-1, len(ipc_columns) + 1)
        width = 1.0
    else:
        pos = np.zeros(len(ipc_columns) + 2)
        pos[1:-1] = xticks
        width = pos[1] - pos[0]
        pos[0] = pos[1] - width
        pos[-1] = pos[-2] + width

    legend_cnt = 1
    bottom = np.zeros_like(pos, dtype=float)
    rest = np.zeros_like(pos, dtype=float)
    for segment, val in sorted(out.items(), key=sort_func):
        ipc = np.zeros_like(bottom)
        ipc[1:-1] = val
        if sort_by(ipc) > threshold:
            ax.bar(
                pos[1:-1],
                ipc[1:-1],
                bottom=bottom[1:-1] if bottom_min is None else np.maximum(bottom[1:-1], bottom_min),
                width=width,
                linewidth=0.0,
                label=label_func(segment),
                color=color_func(segment),
                hatch=hatch_func(segment),
            )
            if step_linewidth > 0:
                ax.step(
                    pos,
                    ipc + bottom if bottom_min is None else np.maximum(ipc + bottom, bottom_min),
                    "#333333",
                    where="mid",
                    linewidth=step_linewidth,
                )
            legend_cnt += 1
            bottom += ipc
        else:
            rest += ipc
    if threshold > 0:
        ax.bar(
            pos[1:-1],
            rest[1:-1],
            bottom=bottom[1:-1],
            width=width,
            label="rest",
            color="darkgray",
            hatch="/",
            linewidth=0.0,
        )
        if step_linewidth > 0:
            ax.step(pos, rest + bottom, "#333333", where="mid", linewidth=step_linewidth)
    ax.set_ylim([0, None])
    ax.tick_params(axis="both", which="major", labelsize=fontsize)
    ax.legend(
        loc="upper left",
        bbox_to_anchor=(1.05, 1.0),
        borderaxespad=0,
        ncol=math.ceil(legend_cnt / 18),
        fontsize=fontsize,
    )
    return out