Mini-project CH03ΒΆ
- Class 1:55
- Analysts: Olivia Smith, Theo Brito Machado
- Company: Ralph Lauren, NYSE: RL
1) SetupΒΆ
Importing the functions on the top of the code. Now we are actually using the dataframes from the tidy outputs from project 2 rather than raw CSV. Importation already in MM
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display
pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 180)
pd.set_option("display.float_format", lambda x: f"{x:,.4f}")
def locate_base_dir(start: Path) -> Path:
for p in [start, *start.parents]:
if (p / "DATA" / "csv_export").exists() and (p / "Mini-projects").exists():
return p
raise FileNotFoundError("Could not locate the course root from the current working directory.")
BASE_DIR = locate_base_dir(Path.cwd())
CH02_OUT_DIR = BASE_DIR / "Mini-projects" / "outputs"
OUT_DIR = BASE_DIR / "Mini projects" / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)
CH02_PACK_PATH = CH02_OUT_DIR / "unit1_tidy_historical_pack_usdm.csv"
CH02_AUDIT_PATH = CH02_OUT_DIR / "unit1_tidy_historical_audit.csv"
ch02_tidy_raw = pd.read_csv(CH02_PACK_PATH, parse_dates=["Date"])
ch02_audit_raw = pd.read_csv(CH02_AUDIT_PATH)
print("Loaded quarterly Chapter 2 base files:")
print(f" Historical pack: {ch02_tidy_raw.shape} -> {CH02_PACK_PATH}")
print(f" Audit table : {ch02_audit_raw.shape} -> {CH02_AUDIT_PATH}")
print("Chapter 3 output folder:", OUT_DIR)
Loaded quarterly Chapter 2 base files: Historical pack: (40, 70) -> /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini-projects/outputs/unit1_tidy_historical_pack_usdm.csv Audit table : (40, 8) -> /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini-projects/outputs/unit1_tidy_historical_audit.csv Chapter 3 output folder: /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini projects/outputs
2) Bringing in the CH02 dataΒΆ
Tidy outputs from the M1 mini project gives us a better base to work, we are just reshaping and reorganizing some key parts of the data to make this project easier when comes the PVM section.
# Again for code quality purpose simplifying order for indexing
PERIOD_ORDER = {"Q1": 1, "Q2": 2, "Q3": 3, "Q4": 4}
# defining functions for periods
def make_period_key(fiscal_year: int, period: str) -> str:
return f"{int(fiscal_year)}-{str(period).upper()}"
def split_period_key(period_key: str) -> tuple[int, int]:
year_str, quarter_str = str(period_key).split("-Q")
return int(year_str), int(quarter_str)
def next_periods(last_period_key: str, periods_forward: int) -> list[str]:
year, quarter = split_period_key(last_period_key)
out = []
for _ in range(periods_forward):
quarter += 1
if quarter > 4:
year += 1
quarter = 1
out.append(f"{year}-Q{quarter}")
return out
def parse_quarter_label(label: str) -> tuple[int, str]:
match = re.search(r"FY\s*(\d{4})\s*(Q[1-4])", str(label).upper())
if not match:
raise ValueError(f"Could not parse quarter label: {label}")
return int(match.group(1)), match.group(2)
# Preparing the data received from ch02 tidy dataframe
def prepare_history_from_ch02(tidy_df: pd.DataFrame, audit_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
h = tidy_df.copy()
h = h.rename(columns={"Quarter": "QuarterLabel", "QuarterNumber": "Quarter"})
h["FiscalYear"] = pd.to_numeric(h["FiscalYear"], errors="coerce")
h["Quarter"] = pd.to_numeric(h["Quarter"], errors="coerce")
h["Period"] = h["Period"].astype(str).str.upper().str.strip()
h["Date"] = pd.to_datetime(h["Date"], errors="coerce")
h = h.dropna(subset=["FiscalYear", "Quarter", "Period"]).copy()
h["FiscalYear"] = h["FiscalYear"].astype(int)
h["Quarter"] = h["Quarter"].astype(int)
h["PeriodKey"] = h.apply(lambda row: make_period_key(row["FiscalYear"], row["Period"]), axis=1)
h = h.sort_values(["FiscalYear", "Quarter", "Date"]).drop_duplicates(subset=["PeriodKey"], keep="last")
h = h.set_index("PeriodKey")
numeric_cols = [
"Revenue", "COGS", "GrossProfit", "OperatingExpenses", "OperatingIncome", "EBIT", "EBITDA", "DA",
"InterestExpense", "TaxExpense", "NetIncome", "TotalDebt"
]
for col in numeric_cols:
h[col] = pd.to_numeric(h[col], errors="coerce")
hist = pd.DataFrame(index=h.index)
hist["FiscalYear"] = h["FiscalYear"]
hist["Quarter"] = h["Quarter"]
hist["Date"] = h["Date"]
hist["Revenue"] = h["Revenue"]
hist["COGS"] = h["COGS"]
hist["GrossProfit"] = hist["Revenue"] - hist["COGS"]
hist["GrossMargin"] = hist["GrossProfit"] / hist["Revenue"].replace(0, np.nan)
hist["OperatingExpenses"] = h["OperatingExpenses"]
hist["OpExMargin"] = hist["OperatingExpenses"] / hist["Revenue"].replace(0, np.nan)
hist["OperatingIncome"] = hist["GrossProfit"] - hist["OperatingExpenses"]
hist["EBITBridge"] = h["EBIT"] - hist["OperatingIncome"]
hist["EBITBridgeMargin"] = hist["EBITBridge"] / hist["Revenue"].replace(0, np.nan)
hist["EBIT"] = hist["OperatingIncome"] + hist["EBITBridge"]
hist["DA"] = h["DA"]
hist["DAMargin"] = hist["DA"] / hist["Revenue"].replace(0, np.nan)
hist["EBITDA"] = hist["EBIT"] + hist["DA"]
hist["InterestExpense"] = h["InterestExpense"].abs()
hist["InterestMargin"] = hist["InterestExpense"] / hist["Revenue"].replace(0, np.nan)
hist["EBT"] = hist["EBIT"] - hist["InterestExpense"]
tax_rate_raw = np.where(hist["EBT"] > 0, h["TaxExpense"] / hist["EBT"], np.nan)
hist["TaxRate"] = pd.Series(tax_rate_raw, index=hist.index).clip(lower=0.0, upper=0.40)
hist["TaxExpense"] = np.where(hist["EBT"] > 0, hist["EBT"] * hist["TaxRate"], 0.0)
hist["NetIncome"] = hist["EBT"] - hist["TaxExpense"]
hist["NetMargin"] = hist["NetIncome"] / hist["Revenue"].replace(0, np.nan)
hist["EBITMargin"] = hist["EBIT"] / hist["Revenue"].replace(0, np.nan)
hist["TotalDebt"] = h["TotalDebt"]
a = audit_df.copy()
parsed_labels = a["Quarter"].apply(parse_quarter_label)
a["FiscalYear"] = [year for year, _ in parsed_labels]
a["Period"] = [period for _, period in parsed_labels]
a["Quarter"] = a["Period"].map(PERIOD_ORDER).astype(int)
a["PeriodKey"] = a.apply(lambda row: make_period_key(row["FiscalYear"], row["Period"]), axis=1)
a = a.set_index("PeriodKey").sort_index()
audit = hist[["FiscalYear", "Quarter", "Date"]].join(
a[[
"IS_RecordCount", "BS_RecordCount", "CF_RecordCount", "Ratios_RecordCount",
"BalanceError_USDm", "GrossProfitError_USDm", "AuditPass"
]],
how="left",
)
schema_map = pd.DataFrame(
{
"Source": ["Chapter 2 tidy pack"] * 12,
"SourceField": [
"FiscalYear", "Period", "Revenue", "COGS", "GrossProfit", "OperatingExpenses",
"OperatingIncome", "EBIT", "DA", "InterestExpense", "TaxExpense", "NetIncome"
],
"ModelField": [
"FiscalYear", "Quarterly Period", "Revenue", "COGS", "GrossProfit", "OperatingExpenses",
"OperatingIncome", "EBIT", "DA", "InterestExpense", "TaxExpense", "NetIncome"
],
"Unit": ["Fiscal year", "Quarter", "USDm", "USDm", "USDm", "USDm", "USDm", "USDm", "USDm", "USDm", "USDm", "USDm"],
}
)
return hist.sort_index(key=lambda idx: [split_period_key(x) for x in idx]), audit.sort_index(key=lambda idx: [split_period_key(x) for x in idx]), schema_map
# Running the actuall functions to get the tidy data ready to go
hist, audit, schema_map = prepare_history_from_ch02(ch02_tidy_raw, ch02_audit_raw)
print(f"History window: {hist.index.min()} to {hist.index.max()} ({len(hist)} quarters)")
print()
print("Schema map:")
display(schema_map)
print()
print("Chapter 2 audit carry-forward (tail):")
display(audit.tail(12))
print()
print("Quarterly income statement history (tail, USDm):")
display(hist[[
"FiscalYear", "Quarter", "Revenue", "COGS", "GrossProfit", "OperatingExpenses", "OperatingIncome",
"EBIT", "DA", "EBITDA", "InterestExpense", "EBT", "TaxExpense", "NetIncome"
]].round(2))
ebitda_desc = hist["NetIncome"].describe().T
display(ebitda_desc)
History window: 2016-Q4 to 2026-Q3 (40 quarters) Schema map:
| Source | SourceField | ModelField | Unit | |
|---|---|---|---|---|
| 0 | Chapter 2 tidy pack | FiscalYear | FiscalYear | Fiscal year |
| 1 | Chapter 2 tidy pack | Period | Quarterly Period | Quarter |
| 2 | Chapter 2 tidy pack | Revenue | Revenue | USDm |
| 3 | Chapter 2 tidy pack | COGS | COGS | USDm |
| 4 | Chapter 2 tidy pack | GrossProfit | GrossProfit | USDm |
| 5 | Chapter 2 tidy pack | OperatingExpenses | OperatingExpenses | USDm |
| 6 | Chapter 2 tidy pack | OperatingIncome | OperatingIncome | USDm |
| 7 | Chapter 2 tidy pack | EBIT | EBIT | USDm |
| 8 | Chapter 2 tidy pack | DA | DA | USDm |
| 9 | Chapter 2 tidy pack | InterestExpense | InterestExpense | USDm |
| 10 | Chapter 2 tidy pack | TaxExpense | TaxExpense | USDm |
| 11 | Chapter 2 tidy pack | NetIncome | NetIncome | USDm |
Chapter 2 audit carry-forward (tail):
| FiscalYear | Quarter | Date | IS_RecordCount | BS_RecordCount | CF_RecordCount | Ratios_RecordCount | BalanceError_USDm | GrossProfitError_USDm | AuditPass | |
|---|---|---|---|---|---|---|---|---|---|---|
| PeriodKey | ||||||||||
| 2023-Q4 | 2023 | 4 | 2023-04-01 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2024-Q1 | 2024 | 1 | 2023-07-01 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2024-Q2 | 2024 | 2 | 2023-09-30 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2024-Q3 | 2024 | 3 | 2023-12-30 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2024-Q4 | 2024 | 4 | 2024-03-30 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2025-Q1 | 2025 | 1 | 2024-06-29 | 1 | 1 | 1 | 1 | 0.0000 | -0.0000 | True |
| 2025-Q2 | 2025 | 2 | 2024-09-28 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2025-Q3 | 2025 | 3 | 2024-12-28 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2025-Q4 | 2025 | 4 | 2025-03-29 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2026-Q1 | 2026 | 1 | 2025-06-28 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2026-Q2 | 2026 | 2 | 2025-09-27 | 1 | 1 | 1 | 1 | 0.0000 | 0.0000 | True |
| 2026-Q3 | 2026 | 3 | 2025-12-27 | 1 | 1 | 1 | 1 | 0.0000 | -0.0000 | True |
Quarterly income statement history (tail, USDm):
| FiscalYear | Quarter | Revenue | COGS | GrossProfit | OperatingExpenses | OperatingIncome | EBIT | DA | EBITDA | InterestExpense | EBT | TaxExpense | NetIncome | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PeriodKey | ||||||||||||||
| 2016-Q4 | 2016 | 4 | 1,871.0000 | 857.0000 | 1,014.0000 | 947.0000 | 67.0000 | 69.0000 | 7.0000 | 76.0000 | 7.0000 | 62.0000 | 21.0000 | 41.0000 |
| 2017-Q1 | 2017 | 1 | 1,552.0000 | 657.6000 | 894.4000 | 925.8000 | -31.4000 | -29.8000 | 6.0000 | -23.8000 | 3.4000 | -33.2000 | 0.0000 | -33.2000 |
| 2017-Q2 | 2017 | 2 | 1,821.0000 | 866.4000 | 954.6000 | 877.9000 | 76.7000 | 77.8000 | 76.0000 | 153.8000 | 4.1000 | 73.7000 | 28.0000 | 45.7000 |
| 2017-Q3 | 2017 | 3 | 1,714.0000 | 731.4000 | 982.6000 | 854.9000 | 127.7000 | 126.7000 | 6.0000 | 132.7000 | 3.6000 | 123.1000 | 41.8000 | 81.3000 |
| 2017-Q4 | 2017 | 4 | 1,565.8000 | 746.7000 | 819.1000 | 1,086.9000 | -267.8000 | -267.5000 | 75.5000 | -192.0000 | 1.4000 | -268.9000 | 0.0000 | -268.9000 |
| 2018-Q1 | 2018 | 1 | 1,347.1000 | 495.9000 | 851.2000 | 760.9000 | 90.3000 | 91.8000 | 72.9000 | 164.7000 | 5.0000 | 86.8000 | 27.3000 | 59.5000 |
| 2018-Q2 | 2018 | 2 | 1,664.2000 | 668.4000 | 995.8000 | 802.5000 | 193.3000 | 195.8000 | 73.8000 | 269.6000 | 4.6000 | 191.2000 | 47.4000 | 143.8000 |
| 2018-Q3 | 2018 | 3 | 1,641.8000 | 645.6000 | 996.2000 | 807.0000 | 189.2000 | 191.1000 | 72.7000 | 263.8000 | 4.8000 | 186.3000 | 74.5200 | 111.7800 |
| 2018-Q4 | 2018 | 4 | 1,529.2000 | 620.7000 | 908.5000 | 883.1000 | 25.4000 | 28.7000 | 75.8000 | 104.5000 | 3.8000 | 24.9000 | 0.0000 | 24.9000 |
| 2019-Q1 | 2019 | 1 | 1,390.6000 | 494.9000 | 895.7000 | 765.6000 | 130.1000 | 137.3000 | 70.3000 | 207.6000 | 4.4000 | 132.9000 | 23.9000 | 109.0000 |
| 2019-Q2 | 2019 | 2 | 1,690.9000 | 661.6000 | 1,029.3000 | 819.3000 | 210.0000 | 220.8000 | 69.7000 | 290.5000 | 6.0000 | 214.8000 | 44.5000 | 170.3000 |
| 2019-Q3 | 2019 | 3 | 1,725.8000 | 666.3000 | 1,059.5000 | 865.7000 | 193.8000 | 204.7000 | 72.0000 | 276.7000 | 5.2000 | 199.5000 | 79.5000 | 120.0000 |
| 2019-Q4 | 2019 | 4 | 1,505.7000 | 604.2000 | 901.5000 | 873.6000 | 27.9000 | 40.4000 | 69.3000 | 109.7000 | 5.1000 | 35.3000 | 3.7000 | 31.6000 |
| 2020-Q1 | 2020 | 1 | 1,428.8000 | 508.0000 | 920.8000 | 777.5000 | 143.3000 | 150.8000 | 66.2000 | 217.0000 | 4.2000 | 146.6000 | 29.5000 | 117.1000 |
| 2020-Q2 | 2020 | 2 | 1,706.2000 | 657.2000 | 1,049.0000 | 815.9000 | 233.1000 | 241.0000 | 66.6000 | 307.6000 | 4.4000 | 236.6000 | 54.5000 | 182.1000 |
| 2020-Q3 | 2020 | 3 | 1,750.7000 | 661.6000 | 1,089.1000 | 864.7000 | 224.4000 | 234.6000 | 68.2000 | 302.8000 | 4.2000 | 230.4000 | 0.0000 | 230.4000 |
| 2020-Q4 | 2020 | 4 | 1,274.1000 | 679.7000 | 594.4000 | 878.2000 | -283.8000 | -282.4000 | 68.5000 | -213.9000 | 4.8000 | -287.2000 | 0.0000 | -287.2000 |
| 2021-Q1 | 2021 | 1 | 487.5000 | 138.8000 | 348.7000 | 516.7000 | -168.0000 | -163.0000 | 63.7000 | -99.3000 | 9.6000 | -172.6000 | 0.0000 | -172.6000 |
| 2021-Q2 | 2021 | 2 | 1,193.5000 | 394.1000 | 799.4000 | 819.7000 | -20.3000 | -16.3000 | 61.5000 | 45.2000 | 12.8000 | -29.1000 | 0.0000 | -29.1000 |
| 2021-Q3 | 2021 | 3 | 1,432.8000 | 502.4000 | 930.4000 | 760.0000 | 170.4000 | 174.4000 | 60.3000 | 234.7000 | 12.2000 | 162.2000 | 42.4000 | 119.8000 |
| 2021-Q4 | 2021 | 4 | 1,287.0000 | 504.1000 | 782.9000 | 808.6000 | -25.7000 | -21.4000 | 62.1000 | 40.7000 | 13.9000 | -35.3000 | 0.0000 | -35.3000 |
| 2022-Q1 | 2022 | 1 | 1,376.3000 | 408.2000 | 968.1000 | 747.5000 | 220.6000 | 223.3000 | 57.2000 | 280.5000 | 13.3000 | 210.0000 | 45.3000 | 164.7000 |
| 2022-Q2 | 2022 | 2 | 1,504.1000 | 488.9000 | 1,015.2000 | 763.3000 | 251.9000 | 251.7000 | 55.9000 | 307.6000 | 13.6000 | 238.1000 | 44.8000 | 193.3000 |
| 2022-Q3 | 2022 | 3 | 1,815.4000 | 617.3000 | 1,198.1000 | 909.0000 | 289.1000 | 290.6000 | 56.2000 | 346.8000 | 13.4000 | 277.2000 | 59.5000 | 217.7000 |
| 2022-Q4 | 2022 | 4 | 1,522.7000 | 556.6000 | 966.1000 | 929.3000 | 36.8000 | 43.0000 | 60.4000 | 103.4000 | 13.7000 | 29.3000 | 4.9000 | 24.4000 |
| 2023-Q1 | 2023 | 1 | 1,490.6000 | 489.2000 | 1,001.4000 | 826.2000 | 175.2000 | 174.0000 | 54.8000 | 228.8000 | 11.8000 | 162.2000 | 38.8000 | 123.4000 |
| 2023-Q2 | 2023 | 2 | 1,579.9000 | 556.8000 | 1,023.1000 | 816.4000 | 206.7000 | 209.6000 | 53.3000 | 262.9000 | 9.5000 | 200.1000 | 49.6000 | 150.5000 |
| 2023-Q3 | 2023 | 3 | 1,832.3000 | 641.6000 | 1,190.7000 | 908.6000 | 282.1000 | 292.4000 | 55.2000 | 347.6000 | 12.0000 | 280.4000 | 63.9000 | 216.5000 |
| 2023-Q4 | 2023 | 4 | 1,540.8000 | 590.2000 | 950.6000 | 910.4000 | 40.2000 | 56.3000 | 57.2000 | 113.5000 | 7.1000 | 49.2000 | 16.9000 | 32.3000 |
| 2024-Q1 | 2024 | 1 | 1,496.5000 | 464.5000 | 1,032.0000 | 865.6000 | 166.4000 | 180.6000 | 58.3000 | 238.9000 | 10.0000 | 170.6000 | 38.5000 | 132.1000 |
| 2024-Q2 | 2024 | 2 | 1,633.0000 | 562.9000 | 1,070.1000 | 905.6000 | 164.5000 | 175.5000 | 58.5000 | 234.0000 | 10.0000 | 165.5000 | 18.6000 | 146.9000 |
| 2024-Q3 | 2024 | 3 | 1,934.0000 | 648.0000 | 1,286.0000 | 968.3000 | 317.7000 | 340.4000 | 56.2000 | 396.6000 | 10.6000 | 329.8000 | 53.2000 | 276.6000 |
| 2024-Q4 | 2024 | 4 | 1,567.9000 | 524.2000 | 1,043.7000 | 935.9000 | 107.8000 | 123.1000 | 56.0000 | 179.1000 | 11.6000 | 111.5000 | 20.8000 | 90.7000 |
| 2025-Q1 | 2025 | 1 | 1,512.2000 | 446.4000 | 1,065.8000 | 857.3000 | 208.5000 | 227.5000 | 54.4000 | 281.9000 | 10.9000 | 216.6000 | 48.0000 | 168.6000 |
| 2025-Q2 | 2025 | 2 | 1,726.0000 | 570.3000 | 1,155.7000 | 976.8000 | 178.9000 | 199.5000 | 55.9000 | 255.4000 | 11.4000 | 188.1000 | 40.2000 | 147.9000 |
| 2025-Q3 | 2025 | 3 | 2,143.5000 | 677.4000 | 1,466.1000 | 1,076.4000 | 389.7000 | 395.3000 | 54.3000 | 449.6000 | 11.6000 | 383.7000 | 86.3000 | 297.4000 |
| 2025-Q4 | 2025 | 4 | 1,697.3000 | 532.0000 | 1,165.3000 | 1,010.3000 | 155.0000 | 172.5000 | 55.0000 | 227.5000 | 10.2000 | 162.3000 | 33.3000 | 129.0000 |
| 2026-Q1 | 2026 | 1 | 1,719.1000 | 476.8000 | 1,242.3000 | 968.7000 | 273.6000 | 289.5000 | 55.5000 | 345.0000 | 11.5000 | 278.0000 | 57.6000 | 220.4000 |
| 2026-Q2 | 2026 | 2 | 2,010.7000 | 644.3000 | 1,366.4000 | 1,120.7000 | 245.7000 | 244.1000 | 57.0000 | 301.1000 | 15.9000 | 228.2000 | 35.3000 | 192.9000 |
| 2026-Q3 | 2026 | 3 | 2,406.0000 | 724.3000 | 1,681.7000 | 1,178.6000 | 503.1000 | 477.1000 | 59.7000 | 536.8000 | 13.3000 | 463.8000 | 102.2000 | 361.6000 |
count 40.0000 mean 101.2220 std 133.0328 min -287.2000 25% 38.8250 50% 121.7000 75% 173.2500 max 361.6000 Name: NetIncome, dtype: float64
3) PVMΒΆ
Hardest part to find and comply. We used location, and revenue per location across the years to create a PVM base.
All data were inputed from the 10Q's. Asia main focus is on China, as can be highlighted in the later volume growth, the area is going to a expension movement lately
periods = [
"2021-Q3", "2021-Q4", "2022-Q1", "2022-Q2", "2022-Q3", "2022-Q4",
"2023-Q1", "2023-Q2", "2023-Q3", "2023-Q4", "2024-Q1", "2024-Q2",
"2024-Q3", "2024-Q4", "2025-Q1", "2025-Q2", "2025-Q3", "2025-Q4",
"2026-Q1", "2026-Q2",
]
pvm_foundation = {
"North America": {
"Volume": [232, 233, 233, 235, 239, 239, 238, 239, 236, 237, 237, 236, 237, 230, 228, 228, 228, 223, 225, 225],
"Revenue": [715.4, 569.0, 662.1, 703.1, 928.7, 674.3, 700.7, 726.6, 937.6, 655.6, 631.7, 717.8, 933.3, 667.7, 608.2, 739.5, 997.7, 704.7, 656.2, 832.4],
},
"Europe": {
"Volume": [96, 92, 94, 94, 97, 95, 97, 98, 104, 104, 104, 104, 105, 103, 103, 104, 103, 104, 106, 108],
"Revenue": [315.6, 370.1, 354.9, 495.5, 462.9, 467.4, 415.6, 493.5, 469.3, 460.8, 450.5, 526.8, 521.5, 469.2, 479.1, 565.9, 604.4, 525.5, 554.5, 688.3],
},
"Asia": {
"Volume": [147, 151, 155, 164, 169, 170, 191, 197, 209, 212, 219, 224, 228, 231, 234, 238, 248, 237, 238, 249],
"Revenue": [329.6, 289.4, 288.2, 269.9, 382.6, 346.1, 334.1, 316.4, 386.2, 390.0, 377.5, 348.4, 446.4, 394.3, 390.9, 380.2, 506.7, 431.6, 474.0, 445.6],
},
}
def build_revenue_drivers_bottom_up(
hist: pd.DataFrame,
periods: list[str],
pvm_foundation: dict[str, dict[str, list[float]]],
forecast_quarters: int = 8,
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
metric_cols = [
"RevenueGrowth_q",
"PriceGrowthEff_q",
"VolumeGrowthEff_q",
"MixGrowthEff_q",
]
def split_period_key(period_key: str) -> tuple[int, int]:
year_str, quarter_str = str(period_key).split("-Q")
return int(year_str), int(quarter_str)
def next_periods(start_period: str, periods_forward: int) -> list[str]:
year, quarter = split_period_key(start_period)
out = []
for _ in range(periods_forward):
quarter += 1
if quarter > 4:
year += 1
quarter = 1
out.append(f"{year}-Q{quarter}")
return out
def safe_recent_median(fit_df: pd.DataFrame) -> pd.Series:
if fit_df.empty:
return pd.Series({col: 0.0 for col in metric_cols}, dtype=float)
out = fit_df.tail(min(8, len(fit_df)))[metric_cols].median(numeric_only=True)
return out.reindex(metric_cols).fillna(0.0)
def build_blended_series(
quarter_series: pd.Series,
seasonal: pd.DataFrame,
recent: pd.Series,
metric: str,
weights: np.ndarray,
) -> list[float]:
values = []
recent_value = float(recent.get(metric, 0.0))
if pd.isna(recent_value):
recent_value = 0.0
for quarter, weight in zip(quarter_series, weights):
if metric in seasonal.columns and quarter in seasonal.index:
seasonal_value = seasonal.loc[quarter, metric]
else:
seasonal_value = recent_value
if pd.isna(seasonal_value):
seasonal_value = recent_value
values.append(weight * float(seasonal_value) + (1.0 - weight) * recent_value)
return values
# 1) Expand the regional PVM foundation into a long flat table
rows = []
for region, values in pvm_foundation.items():
if len(values["Volume"]) != len(periods) or len(values["Revenue"]) != len(periods):
raise ValueError(f"PVM series length mismatch for {region}.")
for period, volume, revenue in zip(periods, values["Volume"], values["Revenue"]):
rows.append(
{
"Region": region,
"Period": period,
"Volume": volume,
"Revenue_USDm": revenue,
}
)
pvm_ex = pd.DataFrame(rows)
pvm_ex["FiscalYear"] = pvm_ex["Period"].str.split("-Q").str[0].astype(int)
pvm_ex["Quarter"] = pvm_ex["Period"].str.split("-Q").str[1].astype(int)
pvm_ex = pvm_ex.sort_values(["Region", "FiscalYear", "Quarter"]).reset_index(drop=True)
pvm_ex["ASP"] = pvm_ex["Revenue_USDm"] / pvm_ex["Volume"].replace(0, np.nan)
# 2) Decide the forecast anchor
# We keep the same notebook behavior: forecast starts after the last quarter in hist.
hist_last_period = str(hist.index[-1])
hist_last_revenue = float(hist["Revenue"].iloc[-1])
pvm_last_period = max(pvm_ex["Period"].unique(), key=split_period_key)
# If the regional PVM table ends before hist, allocate the latest company revenue
# back to regions using the most recent regional revenue mix.
latest_mix = pvm_ex.loc[pvm_ex["Period"] == pvm_last_period, ["Region", "Revenue_USDm"]].copy()
latest_mix["RevenueShare"] = latest_mix["Revenue_USDm"] / latest_mix["Revenue_USDm"].sum()
if split_period_key(hist_last_period) >= split_period_key(pvm_last_period):
forecast_anchor_period = hist_last_period
company_anchor_revenue = hist_last_revenue
if hist_last_period == pvm_last_period:
anchor_method = "Actual regional revenue at the forecast anchor period"
anchor_revenue_by_region = latest_mix.set_index("Region")["Revenue_USDm"].to_dict()
else:
anchor_method = (
f"Scaled latest regional mix from {pvm_last_period} "
f"to company anchor revenue at {hist_last_period}"
)
anchor_revenue_by_region = (
latest_mix.set_index("Region")["RevenueShare"] * company_anchor_revenue
).to_dict()
else:
forecast_anchor_period = pvm_last_period
company_anchor_revenue = float(latest_mix["Revenue_USDm"].sum())
anchor_method = "Actual regional revenue at the latest PVM period"
anchor_revenue_by_region = latest_mix.set_index("Region")["Revenue_USDm"].to_dict()
future_periods = next_periods(forecast_anchor_period, forecast_quarters)
weights = np.linspace(1.0, 0.55, forecast_quarters)
regional_history_parts = []
regional_forecast_parts = []
summary_rows = []
# 3) Decompose and forecast each region separately
for region in sorted(pvm_ex["Region"].unique()):
region_hist = pvm_ex.loc[pvm_ex["Region"] == region].copy()
region_hist = region_hist.sort_values(["FiscalYear", "Quarter"]).reset_index(drop=True)
region_hist["PrevRevenue_USDm"] = region_hist["Revenue_USDm"].shift(1)
region_hist["PrevVolume"] = region_hist["Volume"].shift(1)
region_hist["PrevASP"] = region_hist["ASP"].shift(1)
region_hist["RevenueGrowth_q"] = region_hist["Revenue_USDm"].pct_change()
region_hist["PriceEffect_USDm"] = (
(region_hist["ASP"] - region_hist["PrevASP"]) * region_hist["PrevVolume"]
)
region_hist["VolumeEffect_USDm"] = (
region_hist["PrevASP"] * (region_hist["Volume"] - region_hist["PrevVolume"])
)
region_hist["MixEffect_USDm"] = (
region_hist["Revenue_USDm"]
- region_hist["PrevRevenue_USDm"]
- region_hist["PriceEffect_USDm"]
- region_hist["VolumeEffect_USDm"]
)
base_prev_revenue = region_hist["PrevRevenue_USDm"].replace(0, np.nan)
region_hist["PriceGrowthEff_q"] = region_hist["PriceEffect_USDm"] / base_prev_revenue
region_hist["VolumeGrowthEff_q"] = region_hist["VolumeEffect_USDm"] / base_prev_revenue
region_hist["MixGrowthEff_q"] = region_hist["MixEffect_USDm"] / base_prev_revenue
region_hist = region_hist.replace([np.inf, -np.inf], np.nan)
fit = region_hist[["Quarter", *metric_cols]].dropna().copy()
if fit.empty:
seasonal = pd.DataFrame(columns=metric_cols)
recent = pd.Series({col: 0.0 for col in metric_cols}, dtype=float)
else:
seasonal = fit.groupby("Quarter")[metric_cols].median(numeric_only=True)
recent = safe_recent_median(fit)
region_future = pd.DataFrame(index=future_periods)
region_future["Region"] = region
region_future["Period"] = region_future.index
region_future["FiscalYear"] = [split_period_key(p)[0] for p in region_future.index]
region_future["Quarter"] = [split_period_key(p)[1] for p in region_future.index]
region_future["PriceSignal_Assump_q"] = build_blended_series(
region_future["Quarter"], seasonal, recent, "PriceGrowthEff_q", weights
)
region_future["VolumeSignal_Assump_q"] = build_blended_series(
region_future["Quarter"], seasonal, recent, "VolumeGrowthEff_q", weights
)
region_future["MixSignal_Assump_q"] = build_blended_series(
region_future["Quarter"], seasonal, recent, "MixGrowthEff_q", weights
)
region_future["FinalRevenueGrowth_q"] = (
region_future["PriceSignal_Assump_q"]
+ region_future["VolumeSignal_Assump_q"]
+ region_future["MixSignal_Assump_q"]
).clip(lower=-0.25, upper=0.30)
anchor_revenue = float(anchor_revenue_by_region.get(region, region_hist["Revenue_USDm"].iloc[-1]))
revenue = anchor_revenue
prev_revenues = []
revenue_path = []
for growth in region_future["FinalRevenueGrowth_q"]:
prev_revenues.append(revenue)
revenue = revenue * (1.0 + float(growth))
revenue_path.append(revenue)
region_future["AnchorPeriod"] = forecast_anchor_period
region_future["AnchorMethod"] = anchor_method
region_future["AnchorRevenue_USDm"] = anchor_revenue
region_future["PrevRevenue_USDm"] = prev_revenues
region_future["RevenueForecast"] = revenue_path
# Store forecast contributions in USDm so total-company drivers can be
# aggregated after regional forecasting is complete.
denom = region_future["PrevRevenue_USDm"].replace(0, np.nan)
region_future["PriceEffectForecast_USDm"] = denom * region_future["PriceSignal_Assump_q"]
region_future["VolumeEffectForecast_USDm"] = denom * region_future["VolumeSignal_Assump_q"]
region_future["MixEffectForecast_USDm"] = denom * region_future["MixSignal_Assump_q"]
regional_history_parts.append(region_hist)
regional_forecast_parts.append(region_future.reset_index(drop=True))
summary_rows.append(
{
"Region": region,
"LastActualPVMPeriod": str(region_hist["Period"].iloc[-1]),
"ForecastAnchorPeriod": forecast_anchor_period,
"AnchorMethod": anchor_method,
"AnchorRevenue_USDm": anchor_revenue,
"RecentRevenueGrowth_q": float(recent.get("RevenueGrowth_q", 0.0)),
"RecentPrice_q": float(recent.get("PriceGrowthEff_q", 0.0)),
"RecentVolume_q": float(recent.get("VolumeGrowthEff_q", 0.0)),
"RecentMix_q": float(recent.get("MixGrowthEff_q", 0.0)),
}
)
regional_history = pd.concat(regional_history_parts, ignore_index=True)
regional_history = regional_history.sort_values(["Region", "FiscalYear", "Quarter"]).reset_index(drop=True)
regional_forecast = pd.concat(regional_forecast_parts, ignore_index=True)
regional_forecast = regional_forecast.sort_values(["Region", "FiscalYear", "Quarter"]).reset_index(drop=True)
# 4) Aggregate to company level only after all regional forecasts are done
company_forecast = (
regional_forecast.groupby(["Period", "FiscalYear", "Quarter"], as_index=False)[
[
"PrevRevenue_USDm",
"PriceEffectForecast_USDm",
"VolumeEffectForecast_USDm",
"MixEffectForecast_USDm",
"RevenueForecast",
]
]
.sum()
.sort_values(["FiscalYear", "Quarter"])
)
company_forecast["RevenueGrowth_q"] = (
company_forecast["RevenueForecast"] / company_forecast["PrevRevenue_USDm"].replace(0, np.nan)
) - 1.0
company_forecast["PriceSignal_Assump_q"] = (
company_forecast["PriceEffectForecast_USDm"] / company_forecast["PrevRevenue_USDm"].replace(0, np.nan)
)
company_forecast["VolumeSignal_Assump_q"] = (
company_forecast["VolumeEffectForecast_USDm"] / company_forecast["PrevRevenue_USDm"].replace(0, np.nan)
)
company_forecast["MixSignal_Assump_q"] = (
company_forecast["MixEffectForecast_USDm"] / company_forecast["PrevRevenue_USDm"].replace(0, np.nan)
)
company_forecast = company_forecast.replace([np.inf, -np.inf], np.nan)
company_forecast = company_forecast.set_index("Period")
# 5) Build total-company summary from regional decomposition after the fact
company_history = (
regional_history.groupby(["Period", "FiscalYear", "Quarter"], as_index=False)[
[
"Revenue_USDm",
"Volume",
"PriceEffect_USDm",
"VolumeEffect_USDm",
"MixEffect_USDm",
]
]
.sum()
.sort_values(["FiscalYear", "Quarter"])
)
company_history["PrevRevenue_USDm"] = company_history["Revenue_USDm"].shift(1)
company_history["RevenueGrowth_q"] = company_history["Revenue_USDm"].pct_change()
company_base = company_history["PrevRevenue_USDm"].replace(0, np.nan)
company_history["PriceGrowthEff_q"] = company_history["PriceEffect_USDm"] / company_base
company_history["VolumeGrowthEff_q"] = company_history["VolumeEffect_USDm"] / company_base
company_history["MixGrowthEff_q"] = company_history["MixEffect_USDm"] / company_base
company_fit = company_history[metric_cols].replace([np.inf, -np.inf], np.nan).dropna()
company_recent = safe_recent_median(company_fit)
summary_rows.append(
{
"Region": "Total Company",
"LastActualPVMPeriod": pvm_last_period,
"ForecastAnchorPeriod": forecast_anchor_period,
"AnchorMethod": anchor_method,
"AnchorRevenue_USDm": company_anchor_revenue,
"RecentRevenueGrowth_q": float(company_recent.get("RevenueGrowth_q", 0.0)),
"RecentPrice_q": float(company_recent.get("PriceGrowthEff_q", 0.0)),
"RecentVolume_q": float(company_recent.get("VolumeGrowthEff_q", 0.0)),
"RecentMix_q": float(company_recent.get("MixGrowthEff_q", 0.0)),
}
)
summary = pd.DataFrame(summary_rows)
return regional_history, regional_forecast, company_forecast, summary
#Printing the summaries and outputs
print("Quarterly PVM driver summary:")
print(f" Recent revenue growth median: {revenue_summary['recent_revenue_growth_q']:.4f}")
print(f" Recent price effect median : {revenue_summary['recent_price_q']:.4f}")
print(f" Recent volume effect median : {revenue_summary['recent_volume_q']:.4f}")
print(f" Recent mix effect median : {revenue_summary['recent_mix_q']:.4f}")
print()
print("PVM quarterly decomposition sample:")
display(q_agg[["FiscalYear", "Quarter", "Revenue_USDm", "Volume", "ASP", "PriceGrowthEff_q", "VolumeGrowthEff_q", "MixGrowthEff_q"]].tail(8).round(6))
print()
print("Forecast revenue driver table:")
display(rev_assumptions.round(6))
Quarterly PVM driver summary: Recent revenue growth median: 0.0770 Recent price effect median : 0.0676 Recent volume effect median : 0.0089 Recent mix effect median : 0.0019 PVM quarterly decomposition sample:
| FiscalYear | Quarter | Revenue_USDm | Volume | ASP | PriceGrowthEff_q | VolumeGrowthEff_q | MixGrowthEff_q | |
|---|---|---|---|---|---|---|---|---|
| Period | ||||||||
| 2024-Q3 | 2024 | 3 | 1,901.2000 | 570 | 3.3354 | 0.1809 | 0.0106 | 0.0019 |
| 2024-Q4 | 2024 | 4 | 1,531.2000 | 564 | 2.7149 | -0.1860 | -0.0105 | 0.0020 |
| 2025-Q1 | 2025 | 1 | 1,478.2000 | 565 | 2.6163 | -0.0363 | 0.0018 | -0.0001 |
| 2025-Q2 | 2025 | 2 | 1,685.6000 | 570 | 2.9572 | 0.1303 | 0.0089 | 0.0012 |
| 2025-Q3 | 2025 | 3 | 2,108.8000 | 579 | 3.6421 | 0.2316 | 0.0158 | 0.0037 |
| 2025-Q4 | 2025 | 4 | 1,661.8000 | 564 | 2.9465 | -0.1910 | -0.0259 | 0.0049 |
| 2026-Q1 | 2026 | 1 | 1,684.7000 | 569 | 2.9608 | 0.0049 | 0.0089 | 0.0000 |
| 2026-Q2 | 2026 | 2 | 1,966.3000 | 582 | 3.3785 | 0.1411 | 0.0228 | 0.0032 |
Forecast revenue driver table:
| FiscalYear | Quarter | PriceSignal_Assump_q | VolumeSignal_Assump_q | MixSignal_Assump_q | FinalRevenueGrowth_q | RevenueForecast | |
|---|---|---|---|---|---|---|---|
| 2026-Q4 | 2026 | 4 | -0.1660 | -0.0020 | 0.0003 | -0.1676 | 2,002.6792 |
| 2027-Q1 | 2027 | 1 | -0.0296 | 0.0124 | 0.0001 | -0.0172 | 1,968.2061 |
| 2027-Q2 | 2027 | 2 | 0.0958 | 0.0144 | 0.0013 | 0.1115 | 2,187.6398 |
| 2027-Q3 | 2027 | 3 | 0.1585 | 0.0179 | 0.0034 | 0.1798 | 2,580.8810 |
| 2027-Q4 | 2027 | 4 | -0.1059 | 0.0008 | 0.0007 | -0.1044 | 2,311.5155 |
| 2028-Q1 | 2028 | 1 | -0.0029 | 0.0114 | 0.0006 | 0.0091 | 2,332.4538 |
| 2028-Q2 | 2028 | 2 | 0.0875 | 0.0128 | 0.0015 | 0.1017 | 2,569.7135 |
| 2028-Q3 | 2028 | 3 | 0.1295 | 0.0150 | 0.0029 | 0.1475 | 2,948.6466 |
4) COGS and OpExΒΆ
def same_quarter_blend(seasonal_series: pd.Series, overall_value: float, quarter_index: pd.Series, seasonal_weight: float = 0.70) -> pd.Series:
values = []
for quarter in quarter_index:
seasonal_value = float(seasonal_series.get(quarter, overall_value))
if pd.isna(seasonal_value):
seasonal_value = overall_value
if pd.isna(overall_value):
overall_used = seasonal_value
else:
overall_used = overall_value
values.append(seasonal_weight * seasonal_value + (1.0 - seasonal_weight) * overall_used)
return pd.Series(values, index=quarter_index.index)
def build_income_statement_model(hist: pd.DataFrame, rev_assumptions: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
hist_model = hist.copy()
recent_gm = float(hist_model["GrossMargin"].tail(8).median())
recent_opex = float(hist_model["OpExMargin"].tail(8).median())
recent_da = float(hist_model["DAMargin"].tail(8).median())
recent_ebit_bridge = float(hist_model["EBITBridgeMargin"].tail(8).median())
interest_margin = float(hist_model["InterestMargin"].tail(8).median())
tax_rate_norm = float(hist_model.loc[hist_model["EBT"] > 0, "TaxRate"].tail(8).median())
defaults = {
"recent_gm": 0.65,
"recent_opex": 0.55,
"recent_da": 0.03,
"recent_ebit_bridge": 0.0,
"interest_margin": 0.007,
"tax_rate_norm": 0.21,
}
if pd.isna(recent_gm):
recent_gm = defaults["recent_gm"]
if pd.isna(recent_opex):
recent_opex = defaults["recent_opex"]
if pd.isna(recent_da):
recent_da = defaults["recent_da"]
if pd.isna(recent_ebit_bridge):
recent_ebit_bridge = defaults["recent_ebit_bridge"]
if pd.isna(interest_margin):
interest_margin = defaults["interest_margin"]
if pd.isna(tax_rate_norm):
tax_rate_norm = defaults["tax_rate_norm"]
forecast = rev_assumptions.copy()
forecast["GrossMargin"] = same_quarter_blend(
hist_model.groupby("Quarter")["GrossMargin"].median(),
recent_gm,
forecast["Quarter"],
).clip(0.25, 0.80)
forecast["Revenue"] = forecast["RevenueForecast"]
forecast["COGS"] = forecast["Revenue"] * (1.0 - forecast["GrossMargin"])
forecast["GrossProfit"] = forecast["Revenue"] - forecast["COGS"]
forecast["OpExMargin"] = same_quarter_blend(
hist_model.groupby("Quarter")["OpExMargin"].median(),
recent_opex,
forecast["Quarter"],
).clip(0.30, 0.80)
forecast["OperatingExpenses"] = forecast["Revenue"] * forecast["OpExMargin"]
forecast["OperatingIncome"] = forecast["GrossProfit"] - forecast["OperatingExpenses"]
forecast["EBITBridgeMargin"] = same_quarter_blend(
hist_model.groupby("Quarter")["EBITBridgeMargin"].median(),
recent_ebit_bridge,
forecast["Quarter"],
).clip(-0.05, 0.05)
forecast["EBITBridge"] = forecast["Revenue"] * forecast["EBITBridgeMargin"]
forecast["EBIT"] = forecast["OperatingIncome"] + forecast["EBITBridge"]
forecast["DAMargin"] = same_quarter_blend(
hist_model.groupby("Quarter")["DAMargin"].median(),
recent_da,
forecast["Quarter"],
).clip(0.0, 0.10)
forecast["DA"] = forecast["Revenue"] * forecast["DAMargin"]
forecast["EBITDA"] = forecast["EBIT"] + forecast["DA"]
forecast["InterestExpense"] = forecast["Revenue"] * interest_margin
forecast["EBT"] = forecast["EBIT"] - forecast["InterestExpense"]
forecast["TaxRate"] = tax_rate_norm
forecast["TaxExpense"] = np.where(forecast["EBT"] > 0, forecast["EBT"] * forecast["TaxRate"], 0.0)
forecast["NetIncome"] = forecast["EBT"] - forecast["TaxExpense"]
forecast["NetMargin"] = forecast["NetIncome"] / forecast["Revenue"].replace(0, np.nan)
forecast["EBITMargin"] = forecast["EBIT"] / forecast["Revenue"].replace(0, np.nan)
forecast["Stage"] = "Forecast"
historical = hist_model.copy()
historical["Stage"] = "Historical"
model_cols = [
"FiscalYear", "Quarter", "Stage", "Revenue", "COGS", "GrossProfit", "GrossMargin", "OperatingExpenses",
"OpExMargin", "OperatingIncome", "EBITBridge", "EBITBridgeMargin", "EBIT", "EBITMargin", "DA", "DAMargin",
"EBITDA", "InterestExpense", "EBT", "TaxRate", "TaxExpense", "NetIncome", "NetMargin"
]
model_is = pd.concat([historical[model_cols], forecast[model_cols]], axis=0)
model_is["RevenueGrowth"] = model_is["Revenue"].pct_change()
model_is = model_is[[
"FiscalYear", "Quarter", "Stage", "Revenue", "RevenueGrowth", "COGS", "GrossProfit", "GrossMargin",
"OperatingExpenses", "OpExMargin", "OperatingIncome", "EBITBridge", "EBITBridgeMargin", "EBIT", "EBITMargin",
"DA", "DAMargin", "EBITDA", "InterestExpense", "EBT", "TaxRate", "TaxExpense", "NetIncome", "NetMargin"
]]
assumption_table = pd.DataFrame(
{
"Assumption": [
"Quarterly price effect",
"Quarterly volume effect",
"Quarterly mix effect",
"Gross margin path",
"Operating expense margin path",
"EBIT bridge margin path",
"DA margin path",
"Interest expense burden",
"Normalized tax rate",
"Forecast horizon (quarters)",
],
"Value": [
float(rev_assumptions["PriceSignal_Assump_q"].median()),
float(rev_assumptions["VolumeSignal_Assump_q"].median()),
float(rev_assumptions["MixSignal_Assump_q"].median()),
float(forecast["GrossMargin"].median()),
float(forecast["OpExMargin"].median()),
float(forecast["EBITBridgeMargin"].median()),
float(forecast["DAMargin"].median()),
interest_margin,
tax_rate_norm,
int(len(forecast)),
],
"Basis": [
"Same-quarter PVM median blended with recent overall median",
"Same-quarter PVM median blended with recent overall median",
"Same-quarter PVM median blended with recent overall median",
"Same-quarter historical median blended with trailing eight-quarter median",
"Same-quarter historical median blended with trailing eight-quarter median",
"Same-quarter historical median blended with trailing eight-quarter median",
"Same-quarter historical median blended with trailing eight-quarter median",
"Trailing eight-quarter median as a percent of revenue",
"Median tax rate on positive-EBT quarters",
"Configured forecast horizon",
],
}
)
return model_is, assumption_table
model_is, assumption_table = build_income_statement_model(hist, rev_assumptions)
print("Explicit quarterly modeling assumptions:")
display(assumption_table)
print()
print("Model-ready quarterly income statement (tail):")
display(model_is.tail(20).round(4))
Explicit quarterly modeling assumptions:
| Assumption | Value | Basis | |
|---|---|---|---|
| 0 | Quarterly price effect | 0.0423 | Same-quarter PVM median blended with recent ov... |
| 1 | Quarterly volume effect | 0.0126 | Same-quarter PVM median blended with recent ov... |
| 2 | Quarterly mix effect | 0.0010 | Same-quarter PVM median blended with recent ov... |
| 3 | Gross margin path | 0.6609 | Same-quarter historical median blended with tr... |
| 4 | Operating expense margin path | 0.5461 | Same-quarter historical median blended with tr... |
| 5 | EBIT bridge margin path | 0.0051 | Same-quarter historical median blended with tr... |
| 6 | DA margin path | 0.0371 | Same-quarter historical median blended with tr... |
| 7 | Interest expense burden | 0.0066 | Trailing eight-quarter median as a percent of ... |
| 8 | Normalized tax rate | 0.2105 | Median tax rate on positive-EBT quarters |
| 9 | Forecast horizon (quarters) | 8.0000 | Configured forecast horizon |
Model-ready quarterly income statement (tail):
| FiscalYear | Quarter | Stage | Revenue | RevenueGrowth | COGS | GrossProfit | GrossMargin | OperatingExpenses | OpExMargin | OperatingIncome | EBITBridge | EBITBridgeMargin | EBIT | EBITMargin | DA | DAMargin | EBITDA | InterestExpense | EBT | TaxRate | TaxExpense | NetIncome | NetMargin | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2023-Q4 | 2023 | 4 | Historical | 1,540.8000 | -0.1591 | 590.2000 | 950.6000 | 0.6170 | 910.4000 | 0.5909 | 40.2000 | 16.1000 | 0.0104 | 56.3000 | 0.0365 | 57.2000 | 0.0371 | 113.5000 | 7.1000 | 49.2000 | 0.3435 | 16.9000 | 32.3000 | 0.0210 |
| 2024-Q1 | 2024 | 1 | Historical | 1,496.5000 | -0.0288 | 464.5000 | 1,032.0000 | 0.6896 | 865.6000 | 0.5784 | 166.4000 | 14.2000 | 0.0095 | 180.6000 | 0.1207 | 58.3000 | 0.0390 | 238.9000 | 10.0000 | 170.6000 | 0.2257 | 38.5000 | 132.1000 | 0.0883 |
| 2024-Q2 | 2024 | 2 | Historical | 1,633.0000 | 0.0912 | 562.9000 | 1,070.1000 | 0.6553 | 905.6000 | 0.5546 | 164.5000 | 11.0000 | 0.0067 | 175.5000 | 0.1075 | 58.5000 | 0.0358 | 234.0000 | 10.0000 | 165.5000 | 0.1124 | 18.6000 | 146.9000 | 0.0900 |
| 2024-Q3 | 2024 | 3 | Historical | 1,934.0000 | 0.1843 | 648.0000 | 1,286.0000 | 0.6649 | 968.3000 | 0.5007 | 317.7000 | 22.7000 | 0.0117 | 340.4000 | 0.1760 | 56.2000 | 0.0291 | 396.6000 | 10.6000 | 329.8000 | 0.1613 | 53.2000 | 276.6000 | 0.1430 |
| 2024-Q4 | 2024 | 4 | Historical | 1,567.9000 | -0.1893 | 524.2000 | 1,043.7000 | 0.6657 | 935.9000 | 0.5969 | 107.8000 | 15.3000 | 0.0098 | 123.1000 | 0.0785 | 56.0000 | 0.0357 | 179.1000 | 11.6000 | 111.5000 | 0.1865 | 20.8000 | 90.7000 | 0.0578 |
| 2025-Q1 | 2025 | 1 | Historical | 1,512.2000 | -0.0355 | 446.4000 | 1,065.8000 | 0.7048 | 857.3000 | 0.5669 | 208.5000 | 19.0000 | 0.0126 | 227.5000 | 0.1504 | 54.4000 | 0.0360 | 281.9000 | 10.9000 | 216.6000 | 0.2216 | 48.0000 | 168.6000 | 0.1115 |
| 2025-Q2 | 2025 | 2 | Historical | 1,726.0000 | 0.1414 | 570.3000 | 1,155.7000 | 0.6696 | 976.8000 | 0.5659 | 178.9000 | 20.6000 | 0.0119 | 199.5000 | 0.1156 | 55.9000 | 0.0324 | 255.4000 | 11.4000 | 188.1000 | 0.2137 | 40.2000 | 147.9000 | 0.0857 |
| 2025-Q3 | 2025 | 3 | Historical | 2,143.5000 | 0.2419 | 677.4000 | 1,466.1000 | 0.6840 | 1,076.4000 | 0.5022 | 389.7000 | 5.6000 | 0.0026 | 395.3000 | 0.1844 | 54.3000 | 0.0253 | 449.6000 | 11.6000 | 383.7000 | 0.2249 | 86.3000 | 297.4000 | 0.1387 |
| 2025-Q4 | 2025 | 4 | Historical | 1,697.3000 | -0.2082 | 532.0000 | 1,165.3000 | 0.6866 | 1,010.3000 | 0.5952 | 155.0000 | 17.5000 | 0.0103 | 172.5000 | 0.1016 | 55.0000 | 0.0324 | 227.5000 | 10.2000 | 162.3000 | 0.2052 | 33.3000 | 129.0000 | 0.0760 |
| 2026-Q1 | 2026 | 1 | Historical | 1,719.1000 | 0.0128 | 476.8000 | 1,242.3000 | 0.7226 | 968.7000 | 0.5635 | 273.6000 | 15.9000 | 0.0092 | 289.5000 | 0.1684 | 55.5000 | 0.0323 | 345.0000 | 11.5000 | 278.0000 | 0.2072 | 57.6000 | 220.4000 | 0.1282 |
| 2026-Q2 | 2026 | 2 | Historical | 2,010.7000 | 0.1696 | 644.3000 | 1,366.4000 | 0.6796 | 1,120.7000 | 0.5574 | 245.7000 | -1.6000 | -0.0008 | 244.1000 | 0.1214 | 57.0000 | 0.0283 | 301.1000 | 15.9000 | 228.2000 | 0.1547 | 35.3000 | 192.9000 | 0.0959 |
| 2026-Q3 | 2026 | 3 | Historical | 2,406.0000 | 0.1966 | 724.3000 | 1,681.7000 | 0.6990 | 1,178.6000 | 0.4899 | 503.1000 | -26.0000 | -0.0108 | 477.1000 | 0.1983 | 59.7000 | 0.0248 | 536.8000 | 13.3000 | 463.8000 | 0.2204 | 102.2000 | 361.6000 | 0.1503 |
| 2026-Q4 | 2026 | 4 | Forecast | 2,002.6792 | -0.1676 | 744.9087 | 1,257.7705 | 0.6280 | 1,174.9062 | 0.5867 | 82.8643 | 10.9058 | 0.0054 | 93.7701 | 0.0468 | 79.4918 | 0.0397 | 173.2619 | 13.3122 | 80.4579 | 0.2105 | 16.9328 | 63.5251 | 0.0317 |
| 2027-Q1 | 2027 | 1 | Forecast | 1,968.2061 | -0.0172 | 625.7379 | 1,342.4682 | 0.6821 | 1,110.7202 | 0.5643 | 231.7481 | 12.7943 | 0.0065 | 244.5423 | 0.1242 | 74.5598 | 0.0379 | 319.1021 | 13.0831 | 231.4592 | 0.2105 | 48.7118 | 182.7474 | 0.0928 |
| 2027-Q2 | 2027 | 2 | Forecast | 2,187.6398 | 0.1115 | 740.3307 | 1,447.3091 | 0.6616 | 1,154.8358 | 0.5279 | 292.4733 | 10.2088 | 0.0047 | 302.6821 | 0.1384 | 79.5654 | 0.0364 | 382.2475 | 14.5417 | 288.1404 | 0.2105 | 60.6406 | 227.4998 | 0.1040 |
| 2027-Q3 | 2027 | 3 | Forecast | 2,580.8810 | 0.1798 | 876.7271 | 1,704.1539 | 0.6603 | 1,340.0457 | 0.5192 | 364.1082 | 12.2401 | 0.0047 | 376.3483 | 0.1458 | 80.2135 | 0.0311 | 456.5619 | 17.1557 | 359.1927 | 0.2105 | 75.5940 | 283.5987 | 0.1099 |
| 2027-Q4 | 2027 | 4 | Forecast | 2,311.5155 | -0.1044 | 859.7822 | 1,451.7333 | 0.6280 | 1,356.0903 | 0.5867 | 95.6430 | 12.5875 | 0.0054 | 108.2305 | 0.0468 | 91.7503 | 0.0397 | 199.9809 | 15.3651 | 92.8654 | 0.2105 | 19.5440 | 73.3214 | 0.0317 |
| 2028-Q1 | 2028 | 1 | Forecast | 2,332.4538 | 0.0091 | 741.5406 | 1,590.9132 | 0.6821 | 1,316.2765 | 0.5643 | 274.6367 | 15.1620 | 0.0065 | 289.7987 | 0.1242 | 88.3582 | 0.0379 | 378.1570 | 15.5043 | 274.2944 | 0.2105 | 57.7267 | 216.5678 | 0.0928 |
| 2028-Q2 | 2028 | 2 | Forecast | 2,569.7135 | 0.1017 | 869.6303 | 1,700.0833 | 0.6616 | 1,356.5291 | 0.5279 | 343.5541 | 11.9917 | 0.0047 | 355.5459 | 0.1384 | 93.4616 | 0.0364 | 449.0074 | 17.0814 | 338.4644 | 0.2105 | 71.2316 | 267.2328 | 0.1040 |
| 2028-Q3 | 2028 | 3 | Forecast | 2,948.6466 | 0.1475 | 1,001.6573 | 1,946.9892 | 0.6603 | 1,530.9970 | 0.5192 | 415.9923 | 13.9842 | 0.0047 | 429.9765 | 0.1458 | 91.6437 | 0.0311 | 521.6201 | 19.6003 | 410.3762 | 0.2105 | 86.3658 | 324.0104 | 0.1099 |
5) Margin Drift and OutliersΒΆ
def build_exception_report(
model_df: pd.DataFrame,
margin_thresholds_bps: dict[str, float] | None = None,
z_threshold: float = 2.0,
tol: float = 1e-6,
) -> pd.DataFrame:
if margin_thresholds_bps is None:
margin_thresholds_bps = {
"GrossMargin": 250.0,
"EBITMargin": 300.0,
"NetMargin": 300.0,
}
d = model_df.copy()
rows = []
for metric, threshold in margin_thresholds_bps.items():
delta_bps = d[metric].diff(4) * 10_000
bad = delta_bps[delta_bps.abs() > threshold].dropna()
for period_key, value in bad.items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "MarginDrift",
"Rule": f"|YoY Delta {metric}| > {threshold:.0f} bps",
"Value": float(value),
"Severity": "Warning",
})
hist_mask = d["Stage"].eq("Historical")
for metric in ["RevenueGrowth", "GrossMargin", "EBITMargin", "NetMargin"]:
for quarter_num, base in d.loc[hist_mask, ["Quarter", metric]].dropna().groupby("Quarter"):
base_series = base[metric]
if len(base_series) < 3 or float(base_series.std(ddof=0)) == 0.0:
continue
mu = float(base_series.mean())
sd = float(base_series.std(ddof=0))
candidates = d.loc[d["Quarter"] == quarter_num, metric].dropna()
z_scores = (candidates - mu) / sd
bad = z_scores[z_scores.abs() > z_threshold]
for period_key, value in bad.items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "Outlier",
"Rule": f"|SameQuarter z({metric})| > {z_threshold:.1f}",
"Value": float(value),
"Severity": "Warning",
})
gp_diff = (d["Revenue"] - d["COGS"] - d["GrossProfit"]).abs()
for period_key, value in gp_diff[gp_diff > tol].items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "Mechanics",
"Rule": "Revenue - COGS must equal GrossProfit",
"Value": float(value),
"Severity": "Critical",
})
opinc_diff = (d["GrossProfit"] - d["OperatingExpenses"] - d["OperatingIncome"]).abs()
for period_key, value in opinc_diff[opinc_diff > tol].items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "Mechanics",
"Rule": "GrossProfit - OpEx must equal OperatingIncome",
"Value": float(value),
"Severity": "Critical",
})
ebit_diff = (d["OperatingIncome"] + d["EBITBridge"] - d["EBIT"]).abs()
for period_key, value in ebit_diff[ebit_diff > tol].items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "Mechanics",
"Rule": "OperatingIncome + EBITBridge must equal EBIT",
"Value": float(value),
"Severity": "Critical",
})
ebitda_diff = (d["EBIT"] + d["DA"] - d["EBITDA"]).abs()
for period_key, value in ebitda_diff[ebitda_diff > tol].items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "Mechanics",
"Rule": "EBIT + DA must equal EBITDA",
"Value": float(value),
"Severity": "Critical",
})
ni_diff = (d["EBT"] - d["TaxExpense"] - d["NetIncome"]).abs()
for period_key, value in ni_diff[ni_diff > tol].items():
rows.append({
"Period": period_key,
"Stage": d.loc[period_key, "Stage"],
"CheckType": "Mechanics",
"Rule": "EBT - TaxExpense must equal NetIncome",
"Value": float(value),
"Severity": "Critical",
})
out = pd.DataFrame(rows)
if len(out) == 0:
return pd.DataFrame(columns=["Period", "Stage", "CheckType", "Rule", "Value", "Severity"])
severity_order = {"Critical": 0, "Warning": 1}
out["SeverityOrder"] = out["Severity"].map(severity_order).fillna(99)
out = out.sort_values(["SeverityOrder", "Period", "CheckType", "Rule"]).drop(columns="SeverityOrder")
return out.reset_index(drop=True)
exceptions = build_exception_report(model_is)
print(f"Exceptions found: {len(exceptions)}")
if len(exceptions) == 0:
print("No exceptions triggered under current thresholds.")
else:
display(exceptions)
fig, axes = plt.subplots(1, 2, figsize=(15, 4))
plot_idx = np.arange(len(model_is))
historical_cutoff = (model_is["Stage"] == "Historical").sum() - 0.5
xticks = np.arange(0, len(model_is), 4)
if xticks[-1] != len(model_is) - 1:
xticks = np.append(xticks, len(model_is) - 1)
axes[0].plot(plot_idx, model_is["Revenue"], marker="o", label="Revenue")
axes[0].axvline(x=historical_cutoff, color="gray", linestyle="--", linewidth=1)
axes[0].set_title("Quarterly Revenue (USDm): historical + forecast")
axes[0].set_ylabel("USDm")
axes[0].set_xticks(xticks)
axes[0].set_xticklabels(model_is.index[xticks], rotation=45, ha="right")
axes[0].legend()
axes[1].plot(plot_idx, model_is["GrossMargin"] * 100, marker="o", label="GrossMargin")
axes[1].plot(plot_idx, model_is["EBITMargin"] * 100, marker="o", label="EBITMargin")
axes[1].plot(plot_idx, model_is["NetMargin"] * 100, marker="o", label="NetMargin")
axes[1].axvline(x=historical_cutoff, color="gray", linestyle="--", linewidth=1)
axes[1].set_title("Quarterly Margin Profile (%)")
axes[1].set_ylabel("Percent")
axes[1].set_xticks(xticks)
axes[1].set_xticklabels(model_is.index[xticks], rotation=45, ha="right")
axes[1].legend()
plt.tight_layout()
plt.show()
Exceptions found: 68
| Period | Stage | CheckType | Rule | Value | Severity | |
|---|---|---|---|---|---|---|
| 0 | 2017-Q1 | Historical | Outlier | |SameQuarter z(GrossMargin)| > 2.0 | -2.1544 | Warning |
| 1 | 2017-Q2 | Historical | Outlier | |SameQuarter z(GrossMargin)| > 2.0 | -2.3869 | Warning |
| 2 | 2017-Q4 | Historical | MarginDrift | |YoY Delta EBITMargin| > 300 bps | -2,077.1786 | Warning |
| 3 | 2017-Q4 | Historical | MarginDrift | |YoY Delta NetMargin| > 300 bps | -1,936.4671 | Warning |
| 4 | 2018-Q1 | Historical | MarginDrift | |YoY Delta EBITMargin| > 300 bps | 873.4742 | Warning |
| ... | ... | ... | ... | ... | ... | ... |
| 63 | 2027-Q1 | Forecast | MarginDrift | |YoY Delta GrossMargin| > 250 bps | -405.6851 | Warning |
| 64 | 2027-Q1 | Forecast | MarginDrift | |YoY Delta NetMargin| > 300 bps | -353.5687 | Warning |
| 65 | 2027-Q3 | Forecast | MarginDrift | |YoY Delta EBITMargin| > 300 bps | -524.7428 | Warning |
| 66 | 2027-Q3 | Forecast | MarginDrift | |YoY Delta GrossMargin| > 250 bps | -386.6164 | Warning |
| 67 | 2027-Q3 | Forecast | MarginDrift | |YoY Delta NetMargin| > 300 bps | -404.0648 | Warning |
68 rows Γ 6 columns
6) Export Deliverables (CSV + LaTeX)ΒΆ
The final deliverables remain easy to share: a quarterly full model, a forecast slice, an exception report, a driver table, and a compact assumption table.
def export_deliverables(
model_is: pd.DataFrame,
rev_assumptions: pd.DataFrame,
assumption_table: pd.DataFrame,
exceptions: pd.DataFrame,
out_dir: Path,
) -> dict[str, Path]:
export_cols = [
"FiscalYear", "Quarter", "Stage", "Revenue", "RevenueGrowth", "COGS", "GrossProfit", "GrossMargin",
"OperatingExpenses", "OpExMargin", "OperatingIncome", "EBITBridge", "EBIT", "EBITMargin", "DA", "EBITDA",
"InterestExpense", "EBT", "TaxRate", "TaxExpense", "NetIncome", "NetMargin"
]
full_model_export = model_is[export_cols].copy().round(4)
forecast_export = full_model_export[full_model_export["Stage"] == "Forecast"].copy()
full_model_path = out_dir / "mini2_income_statement_full_model.csv"
forecast_csv_path = out_dir / "mini2_income_statement_forecast.csv"
assump_path = out_dir / "mini2_driver_assumptions.csv"
exceptions_path = out_dir / "mini2_exception_report.csv"
rev_driver_path = out_dir / "mini2_revenue_driver_table.csv"
full_model_export.to_csv(full_model_path, index_label="Period")
forecast_export.to_csv(forecast_csv_path, index_label="Period")
forecast_export.to_latex(forecast_tex_path, float_format="%.4f", index=True)
assumption_table.to_csv(assump_path, index=False)
exceptions.to_csv(exceptions_path, index=False)
rev_assumptions.round(6).to_csv(rev_driver_path, index_label="Period")
return {
"full_model": full_model_path,
"forecast_csv": forecast_csv_path,
"assumptions": assump_path,
"exceptions": exceptions_path,
"revenue_drivers": rev_driver_path,
}
export_paths = export_deliverables(
model_is=model_is,
rev_assumptions=rev_assumptions,
assumption_table=assumption_table,
exceptions=exceptions,
out_dir=OUT_DIR,
)
print("Export complete:")
for _, path in export_paths.items():
print(" ", path)
Export complete: /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini projects/outputs/mini2_income_statement_full_model.csv /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini projects/outputs/mini2_income_statement_forecast.csv /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini projects/outputs/mini2_driver_assumptions.csv /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini projects/outputs/mini2_exception_report.csv /Users/theomachado/Library/CloudStorage/OneDrive-UniversityofFlorida/Spring 2026/FIN4453 - OFC/Mini projects/outputs/mini2_revenue_driver_table.csv