2025-10-24 15:54:54 -03:00

83 lines
2.5 KiB
Python

# file: forecast_ruptura_multioutput.py
import pandas as pd
import numpy as np
from datetime import timedelta
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import lightgbm as lgb
# =========================
# 1. Dataset fictício
# =========================
np.random.seed(42)
datas = pd.date_range(start="2024-03-01", periods=400, freq="D")
# vendas com tendência + sazonalidade semanal + ruído
vendas = 50 + 5*np.sin(2*np.pi*datas.dayofyear/7) + np.random.poisson(5, len(datas))
df = pd.DataFrame({"data": datas, "vendas": vendas})
# =========================
# 2. Feature Engineering
# =========================
def criar_features(df):
df["dia_semana"] = df["data"].dt.weekday
df["mes"] = df["data"].dt.month
df["lag_1"] = df["vendas"].shift(1)
df["lag_7"] = df["vendas"].shift(7)
df["mm_7"] = df["vendas"].rolling(7).mean().shift(1)
df["mm_30"] = df["vendas"].rolling(30).mean().shift(1)
return df
df = criar_features(df)
# =========================
# 3. Criar targets multi-step (7 dias)
# =========================
horizon = 7
for i in range(1, horizon+1):
df[f"target_d{i}"] = df["vendas"].shift(-i)
df = df.dropna()
# =========================
# 4. Split treino/teste
# =========================
X = df[["dia_semana", "mes", "lag_1", "lag_7", "mm_7", "mm_30"]]
y = df[[f"target_d{i}" for i in range(1, horizon+1)]]
train_size = int(len(df) * 0.8)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]
# =========================
# 5. Treino modelo multi-output
# =========================
base_model = lgb.LGBMRegressor()
model = MultiOutputRegressor(base_model)
model.fit(X_train, y_train)
# =========================
# 6. Avaliar no teste
# =========================
y_pred = model.predict(X_test)
# Calcular erro por horizonte
for i in range(horizon):
mae = mean_absolute_error(y_test.iloc[:, i], y_pred[:, i])
rmse = mean_squared_error(y_test.iloc[:, i], y_pred[:, i], squared=False)
print(f"D+{i+1}: MAE={mae:.2f}, RMSE={rmse:.2f}")
# =========================
# 7. Exemplo de previsão real
# =========================
# Última linha do dataset como input
X_last = X.iloc[[-1]]
y_future = model.predict(X_last)[0]
future_dates = [df["data"].iloc[-1] + timedelta(days=i) for i in range(1, horizon+1)]
df_forecast = pd.DataFrame({"data": future_dates, "previsao_vendas": y_future})
print("\nPrevisão para os próximos 7 dias:")
print(df_forecast)