Index Plot Gallery¶
Index plots are used for comparing different categories or segments against a baseline or average value, typically set at 100. They're ideal for retail analytics to identify which categories over- or underperform relative to the average.
Index plots excel at:
- Performance Comparison: Compare sales performance across product categories or regions against a baseline
- Segment Analysis: Analyze customer segments against overall average behavior
- Benchmarking: Evaluate stores or regions relative to company-wide metrics
- Opportunity Identification: Highlight high-potential areas for growth or investment
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from openretailscience.plots import index
Basic Index Plot¶
Create a basic index plot using default aggregation function (agg_func="sum") and sorting by group.
# Create sample sales data that guarantees bars outside 80-120 range
sales_data = pd.DataFrame(
{
"product_category": (["Electronics"] * 20 + ["Apparel"] * 20 + ["Home"] * 20 + ["Sports"] * 20),
"region": ["North", "South", "East", "West"] * 20,
"sales_amount": (
[100, 20, 30, 50] * 5 # Electronics (baseline)
+ [10, 120, 15, 25] * 5 # Apparel
+ [15, 25, 150, 30] * 5 # Home
+ [20, 30, 40, 110] * 5 # Sports
),
},
)
index.plot(
sales_data,
value_col="sales_amount",
group_col="region",
index_col="product_category",
value_to_index="Electronics",
title="Regional Sales Performance Index",
y_label="Region",
)
plt.show()
Multiple Series Index Plot¶
Use the series_col parameter to create grouped index plots showing multiple time periods or segments.
# Create sample data with multiple quarters
rng = np.random.default_rng(42)
quarterly_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 12,
"store_type": ["Mall", "Outlet", "Online"] * 20,
"quarter": ["Q1", "Q2", "Q3", "Q4"] * 15,
"revenue": rng.gamma(2.5, 800, 60),
},
)
index.plot(
quarterly_data,
value_col="revenue",
group_col="store_type",
index_col="product_category",
value_to_index="Electronics",
series_col="quarter", # Creates grouped bars for each quarter
title="Store Performance by Quarter",
y_label="Store Type",
legend_title="Quarter",
)
plt.show()
Different Aggregation Function¶
Use agg_func="mean" instead of the default "sum" to show average performance rather than total performance.
# Create sample transaction data with multiple transactions per category
rng = np.random.default_rng(42)
transaction_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 30,
"customer_segment": ["Premium", "Standard", "Budget"] * 50,
"avg_order_value": rng.gamma(1.8, 120, 150),
},
)
index.plot(
transaction_data,
value_col="avg_order_value",
group_col="customer_segment",
index_col="product_category",
value_to_index="Electronics",
agg_func="mean", # Use mean instead of sum
title="Order Value by Customer Segment",
y_label="Customer Segment",
)
plt.show()
Value-Based Sorting¶
Use sort_by="value" to sort groups by their index values rather than alphabetically by group name.
# Create sample performance data across different regions
rng = np.random.default_rng(42)
performance_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 16,
"region": ["North", "South", "East", "West", "Central", "Mountain", "Pacific", "Atlantic"] * 10,
"performance_score": rng.gamma(2.2, 50, 80),
},
)
index.plot(
performance_data,
value_col="performance_score",
group_col="region",
index_col="product_category",
value_to_index="Electronics",
sort_by="value", # Sort by index values instead of group names
sort_order="descending", # Show highest performers first
title="Regional Performance Sorted by Value",
y_label="Region",
)
plt.show()
Custom Highlight Range¶
Use custom highlight_range instead of the default (80, 120) to focus on different performance thresholds.
# Create sample efficiency data
rng = np.random.default_rng(42)
efficiency_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 12,
"department": ["Marketing", "Sales", "Operations", "Customer Service"] * 15,
"efficiency_metric": rng.gamma(2.0, 85, 60),
},
)
index.plot(
efficiency_data,
value_col="efficiency_metric",
group_col="department",
index_col="product_category",
value_to_index="Electronics",
highlight_range=(90, 110), # Tighter range for acceptable performance
title="Department Efficiency vs Target Range",
y_label="Department",
)
plt.show()
Color by Threshold¶
Use color_by_threshold=True to automatically color bars based on the highlight range. Bars at or above the upper threshold are colored with the plot.color.positive option, bars at or below the lower threshold use plot.color.negative, and bars between use plot.color.neutral.
# Reuse the performance data from the value-based sorting example
index.plot(
performance_data,
value_col="performance_score",
group_col="region",
index_col="product_category",
value_to_index="Electronics",
sort_by="value",
sort_order="descending",
color_by_threshold=True, # Color bars by highlight_range thresholds
title="Regional Performance with Threshold Coloring",
y_label="Region",
)
plt.show()
Top N Filtering¶
Use top_n parameter to show only the top performing groups, useful for focusing on best performers.
# Create sample data with many product subcategories
rng = np.random.default_rng(42)
# fmt: off
subcategories = [
"Smartphones", "Laptops", "Tablets", "Audio", "Cameras", "Gaming",
"Shirts", "Pants", "Shoes", "Accessories", "Outerwear", "Dresses",
"Furniture", "Decor", "Kitchen", "Bedding", "Lighting", "Storage",
"Running", "Gym", "Outdoor", "Team Sports", "Water Sports", "Fitness",
]
# fmt: on
product_data = []
for i in range(120):
category_idx = i % 5
subcategory_idx = (i // 5) % len(subcategories)
category = ["Electronics", "Apparel", "Home", "Sports", "Beauty"][category_idx]
subcategory = subcategories[subcategory_idx]
product_data.append(
{
"product_category": category,
"product_subcategory": subcategory,
"sales_volume": rng.gamma(1.5, 20),
},
)
product_data = pd.DataFrame(product_data)
index.plot(
product_data,
value_col="sales_volume",
group_col="product_subcategory",
index_col="product_category",
value_to_index="Electronics",
top_n=8,
title="Top 8 Product Subcategories",
y_label="Product Subcategory",
source_text="Source: Product Sales Database - Q4 2024",
)
plt.show()
Bottom N Filtering¶
Use bottom_n parameter to show only the bottom performing groups, useful for identifying underperformers.
# Create sample data with varied performance
rng = np.random.default_rng(42)
varied_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 20,
"store_location": [
"Downtown",
"Mall",
"Suburb",
"Airport",
"Online",
"Outlet",
"Resort",
"University",
"Hospital",
"Stadium",
]
* 10,
"sales_performance": rng.gamma(1.8, 15),
},
)
index.plot(
varied_data,
value_col="sales_performance",
group_col="store_location",
index_col="product_category",
value_to_index="Electronics",
bottom_n=2,
title="Bottom Performing Store Locations",
y_label="Store Location",
)
plt.show()
Filter Above/Below Thresholds¶
Use filter_above and filter_below to show only groups within specific performance ranges.
# Create strategic data to ensure multiple visually interesting bars outside gray area
rng = np.random.default_rng(42)
# fmt: off
threshold_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 20,
"sales_channel": [
"Direct Sales", "Partner Network", "E-commerce", "Retail Stores",
"Wholesale", "Franchise", "Mobile App", "Social Commerce",
] * 12 + ["Direct Sales", "Partner Network", "E-commerce", "Retail Stores"] * 1,
"performance_metric": (
# Electronics (baseline) - moderate consistent performance
[50, 50, 50, 50, 50, 50, 50, 50] * 2 + [50, 50, 50, 50]
+
# Apparel - significantly higher in digital channels
[75, 60, 120, 55, 45, 70, 110, 80] * 2 + [73, 62, 118, 57]
+
# Home - very strong e-commerce and mobile performance
[85, 65, 140, 60, 50, 80, 135, 90] * 2 + [83, 67, 138, 62]
+
# Sports - excellent digital presence
[90, 70, 130, 65, 55, 85, 125, 95] * 2 + [88, 72, 128, 67]
+
# Beauty - outstanding mobile and e-commerce performance
[80, 60, 150, 55, 45, 75, 145, 85] * 2 + [78, 62, 148, 57]
),
},
)
# fmt: on
index.plot(
threshold_data,
value_col="performance_metric",
group_col="sales_channel",
index_col="product_category",
value_to_index="Electronics",
filter_above=15,
sort_by="value",
sort_order="descending",
title="High-Performing Sales Channels (Above 115% - Multiple Strong Performers)",
y_label="Sales Channel",
)
plt.show()
# Create sample data for focused analysis with varied regional performance
# fmt: off
focus_data = pd.DataFrame(
{
"product_category": (
["Electronics"] * 18 + ["Apparel"] * 18 + ["Home"] * 18 + ["Sports"] * 18 + ["Beauty"] * 18
),
"market_region": [
"North America",
"Europe",
"Asia Pacific",
"Latin America",
"Middle East",
"Africa",
]
* 15,
"market_share": (
# Electronics (baseline) - North America stronger
[25, 20, 20, 15, 10, 10] * 3
+
# Apparel - Europe stronger
[10, 35, 15, 20, 10, 10] * 3
+
# Home - Asia Pacific much stronger
[15, 15, 45, 10, 10, 5] * 3
+
# Sports - North America stronger
[30, 18, 20, 12, 10, 10] * 3
+
# Beauty - Europe stronger
[12, 38, 18, 15, 10, 7] * 3
),
},
)
# fmt: on
index.plot(
focus_data,
value_col="market_share",
group_col="market_region",
index_col="product_category",
value_to_index="Electronics",
include_only_groups=["North America", "Europe", "Asia Pacific"],
title="Major Market Regions Performance",
y_label="Market Region",
)
plt.show()
Exclude Specific Groups¶
Use exclude_groups to remove specific groups from the analysis, useful for excluding outliers or irrelevant categories.
# Create sample data with some outlier segments
rng = np.random.default_rng(42)
segment_data = pd.DataFrame(
{
"product_category": ["Electronics", "Apparel", "Home", "Sports", "Beauty"] * 16,
"customer_type": [
"Regular",
"Premium",
"VIP",
"Corporate",
"Wholesale",
"Employee",
"Test",
"Internal",
]
* 10,
"revenue_contribution": rng.uniform(10, 30, 80), # Use uniform for consistent data
},
)
index.plot(
segment_data,
value_col="revenue_contribution",
group_col="customer_type",
index_col="product_category",
value_to_index="Electronics",
exclude_groups=["Test", "Internal"],
title="Customer Type Performance (Excluding Test Data)",
y_label="Customer Type",
)
plt.show()