• Player explorer
  • Compare players
  • Disagreement
  • Reality check
  • About
    • About FFHedge
    • Methodology
    • Calibration
  • reluctant criminologists

Reality check

How good are these projections, really? The honest answer is that they are good when the choice is easy and a coin flip when it is hard, which is most of what careful prediction looks like once you stop grading it on the easy cases.

Every week you are really making a string of pairwise bets: start this player or that one. The grid below scores those bets across the 2025 season. Each cell takes a matchup between two ranking tiers and asks how often the model’s higher-projected player actually outscored the other. Start a top-twelve player over a waiver-wire replacement and the model is right around three-quarters to four-fifths of the time, which is genuine and useful skill on the most common call you face. Ask it to separate two players the consensus already rates as peers and it drops to the coin-flip line; when two options are truly alike, no projection here can reliably order them, and you are better off reading the calibrated probabilities and their uncertainty than trusting the pick. It is the same shape that shows up wherever prediction is tested honestly — telling unlike things apart is easy, telling alike things apart is hard — which is exactly why a model can look sharp across a whole slate and shrug at the call you actually agonized over.

Code
palette = ({
  accent: "#93c54b",
  accentDark: "#7aa83c",
  expert: "#b5651d",   // warm brown for the expert-driven (Model A) signal
  data:   "#3a6ea5",   // muted blue for the data-driven (Model B) signal
  mixture: "#3e3f3a",  // ink for the blended predictive
  sand: "#f8f5f0",
  bust: "#c9b8a3",
  held: "#dfe7c8",
  strong: "#b9d68a",
  leagueWinner: "#93c54b"
})

// Probability formatting with the "no false precision" rule: anything that
// would round to a flat tiny number is shown as "<1%"; high values mirror it.
fmtPct = function (p) {
  if (p == null || isNaN(p)) return "—";
  if (p < 0.01) return "<1%";
  if (p > 0.99) return ">99%";
  return (100 * p).toFixed(0) + "%";
}

// Fantasy-point formatting to one decimal.
fmtFp = function (x) {
  if (x == null || isNaN(x)) return "—";
  return x.toFixed(1);
}

// Four narrative-bin probabilities from the three exceedance probabilities.
// Inputs are P(exceed floor), P(exceed target), P(exceed ceiling).
narrativeProbs = function (pFloor, pTarget, pCeiling) {
  return {
    bust: Math.max(0, 1 - pFloor),
    held_up: Math.max(0, pFloor - pTarget),
    strong: Math.max(0, pTarget - pCeiling),
    league_winner: Math.max(0, pCeiling)
  };
}

// Map an ECR rank to its tier label (matches the export's ecr_tier bins).
ecrTier = function (ecr) {
  if (ecr == null || isNaN(ecr)) return null;
  if (ecr <= 5) return "1-5";
  if (ecr <= 12) return "6-12";
  if (ecr <= 24) return "13-24";
  if (ecr <= 48) return "25-48";
  if (ecr <= 96) return "49-96";
  return "97+";
}

// Position-prefixed ECR label. In Flex mode WR and RB ranks collide (both
// have a "#6"), so we prefix with the player's position (RB6 / WR6); in
// single-position mode the bare rank is unambiguous. The middle argument is a
// rank (the dense posRank from addPosRank below), not the raw continuous ECR.
ecrDisp = function (pos, ecr, isFlex) {
  if (ecr == null || isNaN(ecr)) return "—";
  const n = Math.round(ecr);
  return isFlex ? `${pos ?? "WR"}${n}` : `${n}`;
}

// Dense positional ECR rank (1..N within each position for a week's active
// pool, gapless). ecr_rank is the continuous FantasyPros average rank, so
// rounding it for display yields duplicate/skipped integers; dense-ranking
// recovers a clean ordinal rank. Mutates rows (adds `posRank`), returns rows.
addPosRank = function (rows) {
  const byPos = d3.group(rows, d => d.position ?? "WR");
  for (const [, ps] of byPos) {
    ps.slice()
      .sort((a, b) => d3.ascending(a.ecr ?? 9999, b.ecr ?? 9999))
      .forEach((p, i) => { p.posRank = i + 1; });
  }
  return rows;
}

// Linear mix of the Expert and Data marginals at lean w (w = weight on Expert).
// Exact for exceedance probabilities and the mean; use for all Blend numbers.
// (Percentiles are NOT linear — never synthesize blend percentiles with this.)
blendField = (em, dm, w, field) => {
  const e = em?.[field], d = dm?.[field];
  if (e == null || d == null) return e ?? d ?? null;
  return w * e + (1 - w) * d;
}

// Reference: the data-optimal stacked weight per position (what we used to
// deploy before Stage 1's 0.50 hedge), for the slider caption. Flex omitted
// (mixed pool, no single stacked weight).
stackedLean = ({ WR: 0.378, RB: 0.077 })

// Human-readable archetype labels for badges (WR archetype set).
archetypeLabel = ({
  fill_in_situation: "fill-in situation",
  emerging_player_elevation: "emerging player",
  late_season_expansion: "late-season expansion",
  recent_role_change: "recent role change",
  rookie_or_low_sample: "rookie / low sample",
  stable_veteran: "stable veteran",
  star_returning: "star returning"
})

// RB archetype set. The RB build ships a different seven flags: fill_in_rb,
// is_rookie, and low_sample come straight from the feature table; the other four
// are carry-share analogs of the WR snap-share archetypes (see methodology).
rbFlagKeys = ["fill_in_rb","is_rookie","low_sample","late_season_expansion",
              "recent_role_change","stable_veteran","star_returning"]
rbArchetypeLabel = ({
  fill_in_rb: "fill-in (handcuff)",
  is_rookie: "rookie",
  low_sample: "low sample",
  late_season_expansion: "late-season expansion",
  recent_role_change: "recent role change",
  stable_veteran: "stable veteran",
  star_returning: "star returning"
})
rbCompactLabel = ({
  fill_in_rb: "fill-in", is_rookie: "rookie", low_sample: "low-smp",
  late_season_expansion: "late-exp", recent_role_change: "role-chg",
  stable_veteran: "stable", star_returning: "star-ret"
})

// Slot thresholds by position. Decoupled from the locked-config files so the
// Flex position's combined slots (WR1/RB1, WR2/RB2) resolve to a single set of
// thresholds — the WR and RB tiers share identical floor/target/ceiling values.
positionThresholds = ({
  WR: { WR1: { floor: 12, target: 16, ceiling: 20 },
        WR2: { floor: 10, target: 12, ceiling: 15 },
        Flex: { floor: 6, target: 10, ceiling: 15 } },
  RB: { RB1: { floor: 12, target: 16, ceiling: 20 },
        RB2: { floor: 10, target: 12, ceiling: 15 },
        Flex: { floor: 6, target: 10, ceiling: 15 } },
  Flex: { "WR1/RB1": { floor: 12, target: 16, ceiling: 20 },
          "WR2/RB2": { floor: 10, target: 12, ceiling: 15 },
          Flex: { floor: 6, target: 10, ceiling: 15 } }
})

// Slot dropdown options per position. WR/RB keep their own tiers; Flex mixes the
// two pools with combined tier labels (no position-specific tier filtering).
slotOptionsFor = function (position) {
  if (position === "RB") return ["RB1", "RB2", "Flex"];
  if (position === "Flex") return ["WR1/RB1", "WR2/RB2", "Flex"];
  return ["WR1", "WR2", "Flex"];
}

// Map an exceedance-probability slot to the per-row column name. RB and WR rows
// carry the same slot column names as their own position; the Flex combined
// slots read the underlying-position column on each row (WR1/RB1 -> WR1 on a WR
// row, RB1 on an RB row).
slotColFor = function (slot, rowPosition) {
  if (slot === "WR1/RB1") return rowPosition === "RB" ? "RB1" : "WR1";
  if (slot === "WR2/RB2") return rowPosition === "RB" ? "RB2" : "WR2";
  return slot;
}

// Per-row archetype keys and compact/full labels, branching on the row's
// position. Used in Flex mode where WR and RB rows are interleaved.
flagKeysForRow = (rowPosition) => rowPosition === "RB" ? rbFlagKeys : FLAG_KEYS_WR;
compactLabelForRow = (rowPosition) => rowPosition === "RB" ? rbCompactLabel : compactLabelWR;
fullLabelForRow = (rowPosition) => rowPosition === "RB" ? rbArchetypeLabel : archetypeLabel;

// WR flag keys / compact labels live here too so the per-row resolvers above
// work on every page without each page having to define the WR set first.
FLAG_KEYS_WR = ["fill_in_situation","emerging_player_elevation","late_season_expansion",
                "recent_role_change","rookie_or_low_sample","stable_veteran","star_returning"]
compactLabelWR = ({
  fill_in_situation:"fill-in", emerging_player_elevation:"emerging",
  late_season_expansion:"late-exp", recent_role_change:"role-chg",
  rookie_or_low_sample:"rookie/ls", stable_veteran:"stable", star_returning:"star-ret"
})
Code
db = DuckDBClient.of({
  tier:   FileAttachment("data/tier_matrix.parquet"),
  wrSurp: FileAttachment("data/surprises.parquet"),
  wrPred: FileAttachment("data/predictives.parquet"),
  rbSurp: FileAttachment("data/rb_surprises.parquet"),
  rbPred: FileAttachment("data/rb_predictives.parquet")
})
tierMatrix = db.query(`SELECT * FROM tier`)
wrSurprises = db.query(`SELECT * FROM wrSurp`)
wrPredictives = db.query(`SELECT * FROM wrPred`)
rbSurprises = db.query(`SELECT * FROM rbSurp`)
rbPredictives = db.query(`SELECT * FROM rbPred`)

slugify = function (s) {
  return (s ?? "").toLowerCase().normalize("NFD").replace(/[̀-ͯ]/g, "")
    .replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
}

// Resolve the raw tables to one position's active set and join the surprise rows
// to the cross_blend predictive (mean / p10–p90 / realized / ECR / name / finish).
// Each returned record carries its own player-explorer href so the card renderer
// needs no module-level lookup.
buildRecords = function (pos) {
  const surprises = pos === "RB" ? rbSurprises
                  : pos === "Flex" ? wrSurprises.concat(rbSurprises) : wrSurprises;
  const predictives = pos === "RB" ? rbPredictives
                    : pos === "Flex" ? wrPredictives.concat(rbPredictives) : wrPredictives;
  const blendRows = predictives.filter(d => d.predictive === "cross_blend");
  const blendByKey = new Map(blendRows.map(d => [`${String(d.player_id)}|${Number(d.week)}`, d]));
  const finishByKey = new Map();
  for (const [, rows] of d3.group(blendRows.filter(d => d.realized_fp != null),
      d => `${Number(d.week)}|${d.position ?? "WR"}`)) {
    rows.slice().sort((a, b) => d3.descending(a.realized_fp, b.realized_fp))
      .forEach((r, i) => finishByKey.set(`${String(r.player_id)}|${Number(r.week)}`, i + 1));
  }
  const records = surprises.map(s => {
    const key = `${String(s.player_id)}|${Number(s.week)}`;
    const b = blendByKey.get(key);
    if (!b || b.realized_fp == null) return null;
    return {
      id: String(s.player_id), week: Number(s.week),
      p_atleast: s.p_atleast, p_atmost: s.p_atmost,
      name: b.player_display_name, team: b.team, ecr: b.ecr_rank,
      position: b.position ?? "WR",
      mean: b.mean, p10: b.p10, p90: b.p90, realized: b.realized_fp,
      finish: finishByKey.get(key),
      href: `index.html#week=${Number(s.week)}&player=${slugify(b.player_display_name ?? "")}`
    };
  }).filter(r => r != null);
  return { surprises, records };
}

// Compact distribution strip: p10–p90 range, dashed mean tick, realized dot in the tail.
strip = function (r) {
  const W = 230, H = 26, pad = 7, y = H / 2;
  const lo = Math.min(r.p10, r.realized, 0), hi = Math.max(r.p90, r.realized);
  const sc = v => pad + (W - 2 * pad) * (v - lo) / ((hi - lo) || 1);
  return html`<svg width="${W}" height="${H}" style="display:block;margin:0.25rem 0;">
    <line x1="${pad}" y1="${y}" x2="${W - pad}" y2="${y}" stroke="var(--rc-sand-panel)" stroke-width="2"></line>
    <rect x="${sc(r.p10)}" y="${y - 4}" width="${Math.max(1, sc(r.p90) - sc(r.p10))}" height="8" rx="2" fill="${palette.strong}" opacity="0.55"></rect>
    <line x1="${sc(r.mean)}" y1="${y - 7}" x2="${sc(r.mean)}" y2="${y + 7}" stroke="${palette.mixture}" stroke-width="1.5" stroke-dasharray="2,2"></line>
    <circle cx="${sc(r.realized)}" cy="${y}" r="4.5" fill="${palette.accent}" stroke="#fff" stroke-width="1"></circle>
  </svg>`;
}

card = function (r, kind) {
  const pct = Math.round(100 * (kind === "up" ? r.p_atleast : r.p_atmost));
  const ecrTxt = r.ecr != null ? Math.round(r.ecr) : "—";
  const finTxt = r.finish ?? "—";
  const line = kind === "up"
    ? `The model gave ${r.name} about a ${pct}% chance of scoring ${fmtFp(r.realized)} or more in Week ${r.week}; they put up ${fmtFp(r.realized)} fp.`
    : `The model gave ${r.name} about a ${pct}% chance of scoring ${fmtFp(r.realized)} or fewer in Week ${r.week}; they managed ${fmtFp(r.realized)} fp.`;
  const pos = r.position ?? "WR";
  const ctx = kind === "up"
    ? `Ranked ${pos}${ecrTxt}, finished as ${pos}${finTxt} that week.`
    : `Ranked ${pos}${ecrTxt}, finished as ${pos}${finTxt}.`;
  return html`<div class="disagreement-card" style="margin-bottom:0.6rem;">
    <div style="display:flex;justify-content:space-between;align-items:baseline;gap:0.5rem;">
      <a href="${r.href}" style="font-weight:700;">${r.name}</a>
      <span style="color:var(--rc-muted);font-size:0.85rem;">${r.team ?? "—"} · Week ${r.week}</span>
    </div>
    ${strip(r)}
    <div style="font-size:0.9rem;margin-top:0.2rem;">${line}</div>
    <div style="font-size:0.8rem;color:var(--rc-muted);margin-top:0.15rem;">${ctx}</div>
  </div>`;
}

The start/sit scorecard

Read each cell as a hit rate: across head-to-head weeks, how often the higher-ranked player actually outscored the lower-ranked one — whether the two sit in the same tier (the diagonal) or in different tiers (everywhere else). A coin flip is 0.50; higher is better, and the gap between two players’ tiers is roughly how far above a coin flip you can expect to be.

Code
// Ranker toggle for the grid: the deployed Blend (default) vs raw ECR consensus.
viewof gridRanker = Inputs.radio(["Blend","ECR"], { value: "Blend", label: "Ranker" })
Code
// Position selector (WR / RB), persisted within the session. Drives the grid; the
// tier matrix is per-position (no Flex matrix), so a stored "Flex" coerces to "WR".
viewof position = (() => {
  const saved0 = (typeof sessionStorage !== "undefined" ? sessionStorage.getItem("ffhedge_position") : null);
  const saved = saved0 === "RB" ? "RB" : "WR";
  const radio = Inputs.radio(["WR","RB"], { value: saved, label: "Position" });
  radio.addEventListener("input", () => {
    try { sessionStorage.setItem("ffhedge_position", radio.value); } catch (e) {}
  });
  return radio;
})()
Code
{
  const TIERS = ["T1", "T2", "Flex", "Replacement"];
  const cells = tierMatrix.filter(d => d.position === position && d.ranker === gridRanker);
  // Colorblind-safe diverging scale: orange (below the coin flip) ↔ neutral sand
  // at 0.50 ↔ blue (above it). No red–green pairing.
  const cmap = d3.scaleDiverging()
    .domain([0.40, 0.50, 0.85])
    .interpolator(d3.piecewise(d3.interpolateRgb, ["#c8772b", "#ece3d2", "#2f6ea5"]));
  const plot = Plot.plot({
    width: 600, height: 460, marginLeft: 116, marginTop: 38, marginBottom: 44, marginRight: 24,
    padding: 0,
    x: { domain: TIERS, label: "tier of the other player →", tickSize: 0 },
    y: { domain: TIERS.slice().reverse(), label: "↑ tier of this player", tickSize: 0 },
    marks: [
      Plot.cell(cells, {
        x: "tier_col", y: "tier_row", fill: d => cmap(d.accuracy), inset: 0.5,
        title: d => `${d.tier_row} vs ${d.tier_col}: ${(100 * d.accuracy).toFixed(0)}% over ${d.n_pairs.toLocaleString()} pairs (95% CI ${d.ci_lo.toFixed(2)}–${d.ci_hi.toFixed(2)})`
      }),
      Plot.text(cells, {
        x: "tier_col", y: "tier_row", text: d => d.accuracy.toFixed(2),
        fill: d => (d.accuracy >= 0.62 || d.accuracy <= 0.45) ? "white" : palette.mixture,
        fontWeight: 600, fontSize: 14
      })
    ]
  });
  const stops = [0.42, 0.46, 0.50, 0.58, 0.66, 0.74, 0.82];
  const legend = html`<div style="display:flex;align-items:center;gap:2px;margin:0.2rem 0 0.5rem 0;font-size:0.78rem;color:var(--rc-muted);flex-wrap:wrap;">
    <span style="margin-right:6px;">worse than a coin flip</span>
    ${stops.map(v => html`<span title="${v.toFixed(2)}" style="display:inline-block;width:24px;height:14px;background:${cmap(v)};border:1px solid rgba(0,0,0,0.08);"></span>`)}
    <span style="margin-left:6px;">better</span>
    <span style="margin-left:10px;">— neutral sand marks the <strong>0.50</strong> coin-flip reference</span>
  </div>`;
  const head = html`<div style="font-weight:600;margin-bottom:0.2rem;">${position} · ${gridRanker} · 2025 season — the diagonal is the within-tier toss-up, the far corners the easy calls</div>`;
  return html`<div>${head}${legend}${plot}</div>`;
}

Pick accuracy by tier matchup on the 2025 season: for each pair of ranking tiers, the share of head-to-head weeks in which the higher-projected player outscored the other. Above the 0.50 line is better than a coin flip. The far corners (a tier-1 starter against a replacement) are the easy, common calls; the diagonal is the within-tier toss-up, where even the elite tier holds only a slim edge.

Note

One thing the grid cannot tell you is who ceilings. Ordering two players is one task; calling who erupts for thirty is another, and on that one every model here, and the experts too, sit at a coin flip. The biggest surprises below are mostly booms the model had already marked unlikely, which is the honest signature of calibrated uncertainty rather than a missed prediction.

Biggest surprises

Code
// The surprises list carries its OWN position radio so a reader does not scroll
// back up to switch. Seeded from the shared ffhedge_position session key (like the
// top radio and the disagreement-page chart radio), it does not write that key.
viewof surpPosition = (() => {
  const saved = (typeof sessionStorage !== "undefined" ? sessionStorage.getItem("ffhedge_position") : null) ?? "WR";
  return Inputs.radio(["WR","RB","Flex"], { value: saved, label: "Position" });
})()
Code
// Boom/bust toggle for the single list below (default Upside).
viewof surpKind = Inputs.radio(["Upside","Downside"], { value: "Upside", label: "Surprise type" })
Code
// Week selector (UI-only; the data carries every player-week). "All weeks"
// reproduces the season-retrospective default; the 2026 rollout will flip this
// default to "last week" (see ROADMAP.md).
viewof surpWeek = Inputs.select(["All weeks"].concat(d3.range(1, 18)),
  { value: "All weeks", label: "Week (2025)" })
Code
surpData = buildRecords(surpPosition)
surprisesView = surpWeek === "All weeks" ? surpData.surprises
              : surpData.surprises.filter(s => Number(s.week) === surpWeek)
recordsView   = surpWeek === "All weeks" ? surpData.records
              : surpData.records.filter(r => r.week === surpWeek)
Code
{
  const n = surprisesView.length;
  const above = surprisesView.filter(s => s.p_atleast < 0.10).length;
  const below = surprisesView.filter(s => s.p_atmost < 0.10).length;
  const pa = 100 * above / n, pb = 100 * below / n;
  const near = v => Math.abs(v - 10) <= 2 ? "close to" : (v > 10 ? "a little above" : "a little below");
  const qual = (near(pa) === near(pb)) ? near(pa) : "in the neighborhood of";
  const noun = surpPosition === "RB" ? "running-back" : (surpPosition === "Flex" ? "player" : "receiver");
  const span = surpWeek === "All weeks" ? "of the 2025 NFL season" : `in Week ${surpWeek} (2025)`;
  return html`<p>Across the ${n.toLocaleString()} ${noun}-weeks ${span}, <strong>${pa.toFixed(1)}%</strong> finished above the model's 90th percentile and <strong>${pb.toFixed(1)}%</strong> below its 10th — ${qual} the one-in-ten (10%) a calibrated model expects at each tail.</p>`;
}
Code
{
  const kind = surpKind === "Upside" ? "up" : "down";
  const key = kind === "up" ? "p_atleast" : "p_atmost";
  const top = recordsView.slice().sort((a, b) => d3.ascending(a[key], b[key])).slice(0, 10);
  if (top.length === 0) return html`<p style="color:var(--rc-muted);">No player-weeks for this selection.</p>`;
  return html`<div>${top.map(r => card(r, kind))}</div>`;
}

FFHedge · 2025 season validation archive · a reluctant criminologists project.