From 584e738bd76a8cfb6247701dce5dde0dd52a724e Mon Sep 17 00:00:00 2001
From: winter-again <63322884+winter-again@users.noreply.github.com>
Date: Wed, 11 Mar 2026 15:16:06 -0400
Subject: [PATCH 1/4] Consolidate county and state population data into a
 single module

---
 README.md                  |   6 +-
 src/kintsugi/_data.py      |  27 +-
 src/kintsugi/county_pop.py | 176 ----------
 src/kintsugi/population.py | 678 ++++++++++++++++++++++++++++++++++++
 tests/county_pop_test.py   | 175 ----------
 tests/population_test.py   | 681 +++++++++++++++++++++++++++++++++++++
 6 files changed, 1380 insertions(+), 363 deletions(-)
 delete mode 100644 src/kintsugi/county_pop.py
 create mode 100644 src/kintsugi/population.py
 delete mode 100644 tests/county_pop_test.py
 create mode 100644 tests/population_test.py

diff --git a/README.md b/README.md
index c5e5ca0..d1a2131 100644
--- a/README.md
+++ b/README.md
@@ -39,13 +39,13 @@ counties = county_geo(2024)
 states = state_geo(2024)
 ```
 
-County and county-age population counts
+State and county population data, stratified by several different variables:
 
 ```python
-from kintsugi.county_pop import county_pop, county_age_pop
+from kintsugi.population import county_pop, state_age_pop
 
 lf_county_pop = county_pop(2024)
-lf_county_age_pop = county_age_pop(2024)
+lf_state_age_pop = state_age_pop(2024)
 ```
 
 Low-population county groups
diff --git a/src/kintsugi/_data.py b/src/kintsugi/_data.py
index ab4f6ff..69d334e 100644
--- a/src/kintsugi/_data.py
+++ b/src/kintsugi/_data.py
@@ -22,15 +22,24 @@
     "geo/cb_2020_us_state_5m.zip": "aedc60e0d1924a9030ee6d39ff0ed27ad7d1b0bc86807ea809391a6b9008ffb3",
     "geo/cb_2024_us_county_5m.zip": "a867f8734059b45d1d54a0ba56189dd7e73c42eb451418fa56de44c35232614b",
     "geo/cb_2024_us_state_5m.zip": "c9db0e395c11a1f94a8017fde4f4c7cbee1dca6eb37ba8f1ccaab927df70885f",
-    "pop/county_cc/county_pop_2016.parquet": "1d337d32b401b1d101f643e4f734dc62f6fc4659d9c168cab025fcfacdc930ec",
-    "pop/county_cc/county_pop_2017.parquet": "7f3d834d37d505baee184352cc3c2144cb5dde1745a51356c8b5debc0fddc768",
-    "pop/county_cc/county_pop_2018.parquet": "45e476c3bbe375b2de44b261bccec032609320de311cc95c02b1125216d8c748",
-    "pop/county_cc/county_pop_2019.parquet": "06081711d88339c4e2af398e3e7345d336b26b5c3a6148b00f0c4273b51a7f4b",
-    "pop/county_cc/county_pop_2020.parquet": "4ba406a680041dd3cb4025733fffe852383a9171dcd7ceaaa3fa4e551573dc57",
-    "pop/county_cc/county_pop_2021.parquet": "527c058c14b8de7826748bb883969bed8960a9e060e2aa010bd6367f41458306",
-    "pop/county_cc/county_pop_2022.parquet": "bffccaf83d23245378cbc900f5f7bc1740c7dd2c5085570b20d44649d5afcbc1",
-    "pop/county_cc/county_pop_2023.parquet": "dc5941017a40488424faae38fcca8b7032024523e823af17c0d539b657ee239a",
-    "pop/county_cc/county_pop_2024.parquet": "cae4e9e5d956dfdd60a68a06887e0c4a1f8918f81e09c8fe2015f3b1feb85d82",
+    "pop/county_cc/county_pop_2016.parquet": "74caad19bf5eed856ad9b6f63c65f7fceca612dec680d0768890de2265116607",
+    "pop/county_cc/county_pop_2017.parquet": "d93d027929861e115cf34b15f1ff7c697c8eaa327b73cd8132710a11860a63d5",
+    "pop/county_cc/county_pop_2018.parquet": "be3d3bab642a9f6f111c792a431f940b1753373194993885e4d47c136feed91a",
+    "pop/county_cc/county_pop_2019.parquet": "98801f118cd795c026a8269d5ac6674f98b9d47e0207c6a2721a5b7f4b6e5c08",
+    "pop/county_cc/county_pop_2020.parquet": "f1e4f282d297dc5498b6f839412c0815ca6f9e0a15d83d5d3867f2d70aa8413d",
+    "pop/county_cc/county_pop_2021.parquet": "3af369564ebb0e1fda25b440e5bf133ecb2d2eab60ab40f5db1f0a0955db713b",
+    "pop/county_cc/county_pop_2022.parquet": "977856eb5fffd508442ccedaa54c92e338b037135e5a9be55a03c7132863d9ca",
+    "pop/county_cc/county_pop_2023.parquet": "a4d66c302a557c1565ec9f43bad5ea9d4267576d1fbd17d8939e5a858a3d73e7",
+    "pop/county_cc/county_pop_2024.parquet": "12b16c7c20329a3df2f4120f6ec9a9a7313147fad0fd03bc360b1de5769c8abd",
+    "pop/state/state_pop_2016.parquet": "bac51c5ba4a9ff7305e92b3b2804c854fc20b9cbcf01156e5439d92668c0c81e",
+    "pop/state/state_pop_2017.parquet": "6fb950b1b78409af8130317b08b437b742c0906ff9d5c38655c1189103b8dddc",
+    "pop/state/state_pop_2018.parquet": "913fca35299028a842325000e58e33cd3912c1e900d480f00b468095398e57f8",
+    "pop/state/state_pop_2019.parquet": "7ca2c87065f24857178bb33a7512cb799a92890596bac6fff1cbeb3c69f6fc36",
+    "pop/state/state_pop_2020.parquet": "275b861e07f1c2327fb5382a28e84a5fb7ac4f896ae9f91b06612f6197af9611",
+    "pop/state/state_pop_2021.parquet": "8b47a5c9fdca838954c8ddac8265ad00d590281c7b444019070c81b9942a727e",
+    "pop/state/state_pop_2022.parquet": "ea113b3766c44bbf250e01b0b9509e810590119b3b9470b13dc347d43aed042b",
+    "pop/state/state_pop_2023.parquet": "e96a982342510fe6a1ba90fc85a9bd6fbdd8687bceaf76e6e117606429d2d160",
+    "pop/state/state_pop_2024.parquet": "b79bca471a68b8c3742ec30d41a2b65ab1227152e81239faf00763188752c6ff",
     "county_groups.parquet": "7d7c150b5efd5596e0eaaed27abd6dc86137f08ff677c2606d402b9d165b87fa",
     "state.txt": "bea4e03f71a1fa0045ae732aabad11fa541e5932b071c2369bb0d325e8cba5a0",
 }
diff --git a/src/kintsugi/county_pop.py b/src/kintsugi/county_pop.py
deleted file mode 100644
index 284780e..0000000
--- a/src/kintsugi/county_pop.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from typing import Literal, NamedTuple, overload
-
-import pandas as pd
-import polars as pl
-
-from ._data import get_dataset
-
-type VintageYear = Literal[
-    2016,
-    2017,
-    2018,
-    2019,
-    2020,
-    2021,
-    2022,
-    2023,
-    2024,
-]
-
-
-class Vintage(NamedTuple):
-    year_lb: int
-    year_ub: int
-    county_fips: set[str]
-
-
-def get_vintage(vintage_year: VintageYear) -> Vintage:
-    """
-    Get info like year bounds for a given vintage year
-    """
-    vintage_year_lb = 2016
-    vintage_year_ub = 2024
-    if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
-        raise ValueError(
-            f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
-        )
-
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    county_fips = set(
-        pl.scan_parquet(data)
-        .select("county_fips")
-        .unique()
-        .collect()
-        .to_series()
-        .to_list()
-    )
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    return Vintage(year_lb, vintage_year, county_fips)
-
-
-@overload
-def county_pop(
-    year: int,
-    *,
-    vintage_year: VintageYear | None = ...,
-    as_pandas: Literal[False] = ...,
-) -> pl.LazyFrame: ...
-
-
-@overload
-def county_pop(
-    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
-) -> pd.DataFrame: ...
-
-
-def county_pop(
-    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
-) -> pl.LazyFrame | pd.DataFrame:
-    """
-    County population estimates for select years. Uses county population
-    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
-    The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
-
-    It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
-
-    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
-    """
-    if vintage_year is None:
-        if 2010 <= year <= 2019:
-            vintage_year = 2020
-        else:
-            vintage_year = 2024
-
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    lf = (
-        pl.scan_parquet(data)
-        .filter(
-            pl.col("year") == year,
-            pl.col("age_grp") == "tot",
-        )
-        .select("state_name", "county_name", "county_fips", "year", "tot_pop")
-        .sort("county_fips")
-    )
-
-    if as_pandas:
-        return lf.collect().to_pandas()
-
-    return lf
-
-
-@overload
-def county_age_pop(
-    year: int,
-    *,
-    vintage_year: VintageYear | None = ...,
-    as_pandas: Literal[False] = ...,
-) -> pl.LazyFrame: ...
-
-
-@overload
-def county_age_pop(
-    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
-) -> pd.DataFrame: ...
-
-
-def county_age_pop(
-    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
-) -> pl.LazyFrame | pd.DataFrame:
-    """
-    County-age population estimates for select years. Uses county population
-    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
-    The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
-
-    It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
-
-    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
-    """
-    if vintage_year is None:
-        if 2010 <= year <= 2019:
-            vintage_year = 2020
-        else:
-            vintage_year = 2024
-
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    lf = (
-        pl.scan_parquet(data)
-        .filter(
-            pl.col("year") == year,
-            pl.col("age_grp") != "tot",
-        )
-        .select(
-            "state_name", "county_name", "county_fips", "year", "age_grp", "tot_pop"
-        )
-        .sort("county_fips", "age_grp")
-    )
-
-    if as_pandas:
-        return lf.collect().to_pandas()
-
-    return lf
diff --git a/src/kintsugi/population.py b/src/kintsugi/population.py
new file mode 100644
index 0000000..0ce8ac9
--- /dev/null
+++ b/src/kintsugi/population.py
@@ -0,0 +1,678 @@
+from typing import Literal, NamedTuple, overload
+
+import pandas as pd
+import polars as pl
+
+from ._data import get_dataset
+
+type VintageYear = Literal[
+    2016,
+    2017,
+    2018,
+    2019,
+    2020,
+    2021,
+    2022,
+    2023,
+    2024,
+]
+
+
+class Vintage(NamedTuple):
+    year_lb: int
+    year_ub: int
+    county_fips: set[str]
+
+
+def get_vintage(vintage_year: VintageYear) -> Vintage:
+    """
+    Get info like year bounds for a given vintage year
+    """
+    vintage_year_lb = 2016
+    vintage_year_ub = 2024
+    if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
+        raise ValueError(
+            f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
+        )
+
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    county_fips = set(
+        pl.scan_parquet(data)
+        .select("county_fips")
+        .unique()
+        .collect()
+        .to_series()
+        .to_list()
+    )
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    return Vintage(year_lb, vintage_year, county_fips)
+
+
+@overload
+def state_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State population estimates for select years. Uses state population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") == "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .drop("sex", "hispanic_origin")
+        .group_by(["state_name", "state_fips", "year"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_age_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_age_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_age_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-age population estimates for select years. Age is given in years, not binned groups.
+    Note that an age value of `85` corresponds to >= 85 years old.
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") == "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .drop("sex", "hispanic_origin")
+        .group_by(["state_name", "state_fips", "year", "age"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips", "age")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-sex population estimates for select years. Uses state population by characteristics
+    data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") != "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .drop("hispanic_origin")
+        .group_by(["state_name", "state_fips", "year", "sex"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[True],
+) -> pd.DataFrame: ...
+
+
+def state_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = None,
+    incl_hispanic_orig: bool = False,
+    as_pandas: bool = False,
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-race population estimates for select years. Specify `incl_hispanic=True` to include
+    Hispanic counts column. Uses state population by characteristics
+    data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") == "tot",
+            pl.col("hispanic_origin") != "tot"
+            if incl_hispanic_orig
+            else pl.col("hispanic_origin") == "tot",
+        )
+        .drop("sex")
+        .group_by(
+            ["state_name", "state_fips", "year", "race", "hispanic_origin"]
+            if incl_hispanic_orig
+            else ["state_name", "state_fips", "year", "race"]
+        )
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort(
+            ["state_fips", "race", "hispanic_origin"]
+            if incl_hispanic_orig
+            else ["state_fips", "race"]
+        )
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_age_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-age-sex population estimates for select years. Age is given in years, not binned groups.
+    Note that an age value of `85` corresponds to >= 85 years old.
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") != "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .drop("hispanic_origin")
+        .group_by(["state_name", "state_fips", "year", "age", "sex"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips", "age", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year if, for example, you need a certain set of county
+    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
+    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
+    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") == "tot",
+        )
+        .select("state_name", "county_name", "county_fips", "year", "tot_pop")
+        .sort("county_fips")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_age_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_age_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_age_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-age population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year if, for example, you need a certain set of county
+    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
+    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
+    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") != "tot",
+        )
+        .select(
+            "state_name", "county_name", "county_fips", "year", "age_grp", "tot_pop"
+        )
+        .sort("county_fips", "age_grp")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-sex population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year if, for example, you need a certain set of county
+    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
+    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
+    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") == "tot",
+        )
+        .select(
+            "state_name", "county_name", "county_fips", "year", "tot_male", "tot_female"
+        )
+        .sort("county_fips")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[True],
+) -> pd.DataFrame: ...
+
+
+def county_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = None,
+    incl_hispanic_orig: bool = False,
+    as_pandas: bool = False,
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-race population estimates for select years. Specify `incl_hispanic=True` to include
+    Hispanic counts column. Uses county population by characteristics
+    data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year if, for example, you need a certain set of county
+    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
+    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
+    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") == "tot",
+        )
+        .select(
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "white_male",
+            "white_female",
+            "black_male",
+            "black_female",
+            "aian_male",
+            "aian_female",
+            "asian_male",
+            "asian_female",
+            "nhpi_male",
+            "nhpi_female",
+            "hispanic_male",
+            "hispanic_female",
+        )
+        .with_columns(
+            (pl.col(f"{r}_male") + pl.col(f"{r}_female")).alias(r)
+            for r in ["white", "black", "aian", "asian", "nhpi", "hispanic"]
+        )
+        .select(
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "white",
+            "black",
+            "aian",
+            "asian",
+            "nhpi",
+            "hispanic",
+        )
+        .sort("county_fips")
+    )
+
+    if not incl_hispanic_orig:
+        lf = lf.drop("hispanic")
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
diff --git a/tests/county_pop_test.py b/tests/county_pop_test.py
deleted file mode 100644
index 68d1a48..0000000
--- a/tests/county_pop_test.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import pandera.polars as pa
-import polars as pl
-import pytest
-from pandas import DataFrame
-from pandera.polars import PolarsData
-
-from kintsugi.county_pop import (
-    VintageYear,
-    county_age_pop,
-    county_pop,
-    get_vintage,
-)
-
-from .models import BasePolarsModel
-
-age_grps = {
-    0: "tot",
-    1: "0-4",
-    2: "5-9",
-    3: "10-14",
-    4: "15-19",
-    5: "20-24",
-    6: "25-29",
-    7: "30-34",
-    8: "35-39",
-    9: "40-44",
-    10: "45-49",
-    11: "50-54",
-    12: "55-59",
-    13: "60-64",
-    14: "65-69",
-    15: "70-74",
-    16: "75-79",
-    17: "80-84",
-    18: ">=85",
-}
-age_grp_enum = pl.Enum(age_grps.values())
-
-
-class CountyPopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyPopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_pop(year, vintage_year=vintage_year)
-
-
-def test_county_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-def test_get_vintage_info() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        get_vintage(2000)  # pyright: ignore [reportArgumentType]
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_pop_as_pandas(year: int) -> None:
-    df = county_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-class CountyAgePopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
-    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "county_name",
-            "county_fips",
-            "year",
-            "age_grp",
-        ]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_age_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyAgePopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_age_pop(year, vintage_year=vintage_year)
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_age_pop_as_pandas(year: int) -> None:
-    df = county_age_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-def test_county_age_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
diff --git a/tests/population_test.py b/tests/population_test.py
new file mode 100644
index 0000000..0ecaaa8
--- /dev/null
+++ b/tests/population_test.py
@@ -0,0 +1,681 @@
+import pandera.polars as pa
+import polars as pl
+import pytest
+from pandas import DataFrame
+from pandera.polars import PolarsData
+
+from kintsugi.population import (
+    VintageYear,
+    county_age_pop,
+    county_pop,
+    county_race_pop,
+    county_sex_pop,
+    get_vintage,
+    state_age_pop,
+    state_age_sex_pop,
+    state_pop,
+    state_race_pop,
+    state_sex_pop,
+)
+
+from .models import BasePolarsModel
+
+
+class StatePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(unique=True, in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year"]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_pop(year, vintage_year=vintage_year).collect().pipe(
+            StatePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_pop(year, vintage_year=vintage_year)
+
+
+def test_state_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_pop_as_pandas(year: int) -> None:
+    df = state_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+class StateAgePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "state_fips",
+            "year",
+            "age",
+        ]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_age_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_age_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateAgePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_age_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_age_pop_as_pandas(year: int) -> None:
+    df = state_age_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_age_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class StateSexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": ["tot", "male", "female"]})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "state_fips",
+            "year",
+            "sex",
+        ]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateSexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_sex_pop_as_pandas(year: int) -> None:
+    df = state_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class StateRacePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": ["white", "black", "aian", "asian", "nhpi"]}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "state_fips",
+            "year",
+            "race",
+        ]
+
+
+class StateRaceHispanicPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": ["white", "black", "aian", "asian", "nhpi"]}
+    )
+    hispanic_origin: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": ["tot", "not_hispanic", "hispanic"]}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "state_fips",
+            "year",
+            "race",
+            "hispanic_origin",
+        ]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_race_pop(
+            year, vintage_year=vintage_year, incl_hispanic_orig=True
+        ).collect().pipe(StateRaceHispanicPopulation.validate, lazy=True)
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_race_hispanic_pop_as_pandas(year: int) -> None:
+    df = state_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_race_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+
+
+class StateAgeSexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": ["tot", "male", "female"]})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year", "age", "sex"]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_age_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_age_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateAgeSexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_age_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_age_sex_pop_as_pandas(year: int) -> None:
+    df = state_age_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_age_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+age_grps = {
+    0: "tot",
+    1: "0-4",
+    2: "5-9",
+    3: "10-14",
+    4: "15-19",
+    5: "20-24",
+    6: "25-29",
+    7: "30-34",
+    8: "35-39",
+    9: "40-44",
+    10: "45-49",
+    11: "50-54",
+    12: "55-59",
+    13: "60-64",
+    14: "65-69",
+    15: "70-74",
+    16: "75-79",
+    17: "80-84",
+    18: ">=85",
+}
+age_grp_enum = pl.Enum(age_grps.values())
+
+
+class CountyPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_pop(year, vintage_year=vintage_year)
+
+
+def test_county_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+def test_get_vintage_info() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        get_vintage(2000)  # pyright: ignore [reportArgumentType]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_pop_as_pandas(year: int) -> None:
+    df = county_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+class CountyAgePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+        ]
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_age_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyAgePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_age_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_age_pop_as_pandas(year: int) -> None:
+    df = county_age_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_age_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountySexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    tot_male: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    tot_female: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+        ]
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountySexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_sex_pop_as_pandas(year: int) -> None:
+    df = county_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountyRacePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+        ]
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+class CountyRaceHispanicPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    hispanic: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+        ]
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_race_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_race_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyRacePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_race_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_race_pop_as_pandas(year: int) -> None:
+    df = county_race_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_race_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_race_pop(
+            year, vintage_year=vintage_year, incl_hispanic_orig=True
+        ).collect().pipe(CountyRaceHispanicPopulation.validate, lazy=True)
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_race_hispanic_pop_as_pandas(year: int) -> None:
+    df = county_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_race_hispanic_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]

From 33486f072bba453954c4994477563018afba2c54 Mon Sep 17 00:00:00 2001
From: winter-again <63322884+winter-again@users.noreply.github.com>
Date: Wed, 11 Mar 2026 17:18:37 -0400
Subject: [PATCH 2/4] Validate year against vintage year in separate function.
 Refine state population functions and their tests.

---
 src/kintsugi/population.py | 206 ++++++---
 tests/population_test.py   | 840 ++++++++++++++++++++-----------------
 2 files changed, 588 insertions(+), 458 deletions(-)

diff --git a/src/kintsugi/population.py b/src/kintsugi/population.py
index 0ce8ac9..a98a610 100644
--- a/src/kintsugi/population.py
+++ b/src/kintsugi/population.py
@@ -18,16 +18,14 @@
 ]
 
 
-class Vintage(NamedTuple):
-    year_lb: int
-    year_ub: int
-    county_fips: set[str]
+# class Vintage(NamedTuple):
+#     year_lb: int
+#     year_ub: int
+#     county_fips: set[str]
 
 
-def get_vintage(vintage_year: VintageYear) -> Vintage:
-    """
-    Get info like year bounds for a given vintage year
-    """
+def validate_vintage_year(year: int, vintage_year: VintageYear) -> None:
+    """Validate year against vintage_year"""
     vintage_year_lb = 2016
     vintage_year_ub = 2024
     if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
@@ -35,21 +33,47 @@ def get_vintage(vintage_year: VintageYear) -> Vintage:
             f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
         )
 
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    county_fips = set(
-        pl.scan_parquet(data)
-        .select("county_fips")
-        .unique()
-        .collect()
-        .to_series()
-        .to_list()
-    )
     if vintage_year <= 2020:
         year_lb = 2010
     else:
         year_lb = 2020
 
-    return Vintage(year_lb, vintage_year, county_fips)
+    if not (year_lb <= year <= vintage_year):
+        raise ValueError(f"Must choose a year between {year_lb} and {vintage_year}")
+
+
+# def _get_vintage(vintage_year: VintageYear) -> Vintage:
+#     """Get info like year bounds for a given vintage year."""
+#     vintage_year_lb = 2016
+#     vintage_year_ub = 2024
+#     if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
+#         raise ValueError(
+#             f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
+#         )
+#
+#     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+#     county_fips = set(
+#         pl.scan_parquet(data)
+#         .select("county_fips")
+#         .unique()
+#         .collect()
+#         .to_series()
+#         .to_list()
+#     )
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     return Vintage(year_lb, vintage_year, county_fips)
+
+
+# TODO: should docstrings have info on the schema?
+
+# match conventions in kintsugi-data processing script
+sex_enum = pl.Enum(["tot", "male", "female"])
+race_enum = pl.Enum(["white", "black", "aian", "asian", "nhpi"])
+hispanic_enum = pl.Enum(["tot", "not_hispanic", "hispanic"])
 
 
 @overload
@@ -71,12 +95,13 @@ def state_pop(
     year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
 ) -> pl.LazyFrame | pd.DataFrame:
     """
-    State population estimates for select years. Uses state population
-    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
-    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+    State population estimates for select years.
+
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html.
+    The raw files are not present in the kintsugi-data repo. Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
     for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
     while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
@@ -88,12 +113,7 @@ def state_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -102,7 +122,6 @@ def state_pop(
             pl.col("sex") == "tot",
             pl.col("hispanic_origin") == "tot",
         )
-        .drop("sex", "hispanic_origin")
         .group_by(["state_name", "state_fips", "year"])
         .agg(tot_pop=pl.col("tot_pop").sum())
         .sort("state_fips")
@@ -133,13 +152,14 @@ def state_age_pop(
     year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
 ) -> pl.LazyFrame | pd.DataFrame:
     """
-    State-age population estimates for select years. Age is given in years, not binned groups.
-    Note that an age value of `85` corresponds to >= 85 years old.
-    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
-    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+    State-age population estimates for select years.
+
+    Age is given in years, not binned groups. Note that an age value of `85` corresponds to >= 85 years old.
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html.
+    The raw files are not present in the kintsugi-data repo. Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
     for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
     while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
@@ -151,12 +171,7 @@ def state_age_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -165,7 +180,6 @@ def state_age_pop(
             pl.col("sex") == "tot",
             pl.col("hispanic_origin") == "tot",
         )
-        .drop("sex", "hispanic_origin")
         .group_by(["state_name", "state_fips", "year", "age"])
         .agg(tot_pop=pl.col("tot_pop").sum())
         .sort("state_fips", "age")
@@ -213,12 +227,7 @@ def state_sex_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -227,7 +236,6 @@ def state_sex_pop(
             pl.col("sex") != "tot",
             pl.col("hispanic_origin") == "tot",
         )
-        .drop("hispanic_origin")
         .group_by(["state_name", "state_fips", "year", "sex"])
         .agg(tot_pop=pl.col("tot_pop").sum())
         .sort("state_fips", "sex")
@@ -285,12 +293,7 @@ def state_race_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -301,7 +304,6 @@ def state_race_pop(
             if incl_hispanic_orig
             else pl.col("hispanic_origin") == "tot",
         )
-        .drop("sex")
         .group_by(
             ["state_name", "state_fips", "year", "race", "hispanic_origin"]
             if incl_hispanic_orig
@@ -340,13 +342,14 @@ def state_age_sex_pop(
     year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
 ) -> pl.LazyFrame | pd.DataFrame:
     """
-    State-age-sex population estimates for select years. Age is given in years, not binned groups.
-    Note that an age value of `85` corresponds to >= 85 years old.
-    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
-    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+    State-age-sex population estimates for select years.
+
+    Age is given in years, not binned groups. Note that an age value of `85` corresponds to >= 85 years old.
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html.
+    The raw files are not present in the kintsugi-data repo. Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
     for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
     while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
@@ -358,12 +361,7 @@ def state_age_sex_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -372,7 +370,6 @@ def state_age_sex_pop(
             pl.col("sex") != "tot",
             pl.col("hispanic_origin") == "tot",
         )
-        .drop("hispanic_origin")
         .group_by(["state_name", "state_fips", "year", "age", "sex"])
         .agg(tot_pop=pl.col("tot_pop").sum())
         .sort("state_fips", "age", "sex")
@@ -676,3 +673,78 @@ def county_race_pop(
         return lf.collect().to_pandas()
 
     return lf
+
+
+@overload
+def county_age_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-age-sex population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year if, for example, you need a certain set of county
+    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
+    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
+    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    vintage = get_vintage(vintage_year)
+    if not (vintage.year_lb <= year <= vintage.year_ub):
+        raise ValueError(
+            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
+        )
+
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") != "tot",
+        )
+        .select(
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+            "tot_male",
+            "tot_female",
+        )
+        .unpivot(
+            index=["state_name", "county_name", "county_fips", "year", "age_grp"],
+            variable_name="sex",
+            value_name="tot_pop",
+        )
+        .with_columns(sex=pl.col("sex").str.replace("tot_", "").cast(sex_enum))
+        .sort("county_fips", "age_grp", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
diff --git a/tests/population_test.py b/tests/population_test.py
index 0ecaaa8..96ced8a 100644
--- a/tests/population_test.py
+++ b/tests/population_test.py
@@ -10,7 +10,9 @@
     county_pop,
     county_race_pop,
     county_sex_pop,
-    get_vintage,
+    hispanic_enum,
+    race_enum,
+    sex_enum,
     state_age_pop,
     state_age_sex_pop,
     state_pop,
@@ -22,7 +24,7 @@
 
 
 class StatePopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_name: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
     state_fips: pl.String = pa.Field(unique=True, in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
     tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
@@ -30,6 +32,14 @@ class StatePopulation(BasePolarsModel):
     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
         unique: list[str] = ["state_name", "state_fips", "year"]
 
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_height(cls, data: PolarsData) -> bool:
+        return data.lazyframe.select(pl.len()).collect().item() == 51  # pyright: ignore [reportAny]
+
 
 @pytest.mark.parametrize(
     ("year"),
@@ -74,15 +84,14 @@ class StateAgePopulation(BasePolarsModel):
     state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
     age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
-    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
 
     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "state_fips",
-            "year",
-            "age",
-        ]
+        unique: list[str] = ["state_name", "state_fips", "year", "age"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
 
 
 @pytest.mark.parametrize(
@@ -127,16 +136,15 @@ class StateSexPopulation(BasePolarsModel):
     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
     state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": ["tot", "male", "female"]})  # pyright: ignore [reportAny]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
     tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
 
     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "state_fips",
-            "year",
-            "sex",
-        ]
+        unique: list[str] = ["state_name", "state_fips", "year", "sex"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
 
 
 @pytest.mark.parametrize(
@@ -182,7 +190,7 @@ class StateRacePopulation(BasePolarsModel):
     state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
     race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
-        dtype_kwargs={"categories": ["white", "black", "aian", "asian", "nhpi"]}
+        dtype_kwargs={"categories": race_enum.categories}
     )
     tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
 
@@ -194,27 +202,9 @@ class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
             "race",
         ]
 
-
-class StateRaceHispanicPopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
-        dtype_kwargs={"categories": ["white", "black", "aian", "asian", "nhpi"]}
-    )
-    hispanic_origin: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
-        dtype_kwargs={"categories": ["tot", "not_hispanic", "hispanic"]}
-    )
-    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "state_fips",
-            "year",
-            "race",
-            "hispanic_origin",
-        ]
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
 
 
 @pytest.mark.parametrize(
@@ -225,277 +215,60 @@ class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
     ("vintage_year"),
     range(2016, 2025),
 )
-def test_state_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+def test_state_race_pop(year: int, vintage_year: VintageYear) -> None:
     if vintage_year <= 2020:
         year_lb = 2010
     else:
         year_lb = 2020
 
     if year_lb <= year <= vintage_year:
-        state_race_pop(
-            year, vintage_year=vintage_year, incl_hispanic_orig=True
-        ).collect().pipe(StateRaceHispanicPopulation.validate, lazy=True)
+        state_race_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateRacePopulation.validate, lazy=True
+        )
     else:
         with pytest.raises(ValueError, match="^Must choose a year between"):
-            state_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+            state_race_pop(year, vintage_year=vintage_year)
 
 
 @pytest.mark.parametrize(
     ("year"),
     range(2010, 2025),
 )
-def test_state_race_hispanic_pop_as_pandas(year: int) -> None:
-    df = state_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+def test_state_race_pop_as_pandas(year: int) -> None:
+    df = state_race_pop(year, as_pandas=True)
 
     assert isinstance(df, DataFrame)
 
 
 def test_state_race_pop_invalid_vintage_year_exception() -> None:
     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        state_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+        state_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
 
 
-class StateAgeSexPopulation(BasePolarsModel):
+class StateRaceHispanicPopulation(BasePolarsModel):
     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
     state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
-    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": ["tot", "male", "female"]})  # pyright: ignore [reportAny]
-    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = ["state_name", "state_fips", "year", "age", "sex"]
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_state_age_sex_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        state_age_sex_pop(year, vintage_year=vintage_year).collect().pipe(
-            StateAgeSexPopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            state_age_sex_pop(year, vintage_year=vintage_year)
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_state_age_sex_pop_as_pandas(year: int) -> None:
-    df = state_age_sex_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-def test_state_age_sex_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        state_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-age_grps = {
-    0: "tot",
-    1: "0-4",
-    2: "5-9",
-    3: "10-14",
-    4: "15-19",
-    5: "20-24",
-    6: "25-29",
-    7: "30-34",
-    8: "35-39",
-    9: "40-44",
-    10: "45-49",
-    11: "50-54",
-    12: "55-59",
-    13: "60-64",
-    14: "65-69",
-    15: "70-74",
-    16: "75-79",
-    17: "80-84",
-    18: ">=85",
-}
-age_grp_enum = pl.Enum(age_grps.values())
-
-
-class CountyPopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyPopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_pop(year, vintage_year=vintage_year)
-
-
-def test_county_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-def test_get_vintage_info() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        get_vintage(2000)  # pyright: ignore [reportArgumentType]
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_pop_as_pandas(year: int) -> None:
-    df = county_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-class CountyAgePopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum.categories}
+    )
+    hispanic_origin: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": hispanic_enum.categories}
+    )
     tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
 
     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
         unique: list[str] = [
             "state_name",
-            "county_name",
-            "county_fips",
-            "year",
-            "age_grp",
-        ]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_age_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyAgePopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_age_pop(year, vintage_year=vintage_year)
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_age_pop_as_pandas(year: int) -> None:
-    df = county_age_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-def test_county_age_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-class CountySexPopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    tot_male: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    tot_female: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "county_name",
-            "county_fips",
+            "state_fips",
             "year",
+            "race",
+            "hispanic_origin",
         ]
 
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
 
 
 @pytest.mark.parametrize(
@@ -506,103 +279,50 @@ def has_correct_states(cls, data: PolarsData) -> bool:
     ("vintage_year"),
     range(2016, 2025),
 )
-def test_county_sex_pop(year: int, vintage_year: VintageYear) -> None:
+def test_state_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
     if vintage_year <= 2020:
         year_lb = 2010
     else:
         year_lb = 2020
 
     if year_lb <= year <= vintage_year:
-        county_sex_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountySexPopulation.validate, lazy=True
-        )
+        state_race_pop(
+            year, vintage_year=vintage_year, incl_hispanic_orig=True
+        ).collect().pipe(StateRaceHispanicPopulation.validate, lazy=True)
     else:
         with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_sex_pop(year, vintage_year=vintage_year)
+            state_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
 
 
 @pytest.mark.parametrize(
     ("year"),
     range(2010, 2025),
 )
-def test_county_sex_pop_as_pandas(year: int) -> None:
-    df = county_sex_pop(year, as_pandas=True)
+def test_state_race_hispanic_pop_as_pandas(year: int) -> None:
+    df = state_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
 
     assert isinstance(df, DataFrame)
 
 
-def test_county_sex_pop_invalid_vintage_year_exception() -> None:
+def test_state_race_hispanic_pop_invalid_vintage_year_exception() -> None:
     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-class CountyRacePopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "county_name",
-            "county_fips",
-            "year",
-        ]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
+        state_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
 
 
-class CountyRaceHispanicPopulation(BasePolarsModel):
+class StateAgeSexPopulation(BasePolarsModel):
     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-    hispanic: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+    age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
 
     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "county_name",
-            "county_fips",
-            "year",
-        ]
+        unique: list[str] = ["state_name", "state_fips", "year", "age", "sex"]
 
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
 
 
 @pytest.mark.parametrize(
@@ -613,69 +333,407 @@ def has_correct_states(cls, data: PolarsData) -> bool:
     ("vintage_year"),
     range(2016, 2025),
 )
-def test_county_race_pop(year: int, vintage_year: VintageYear) -> None:
+def test_state_age_sex_pop(year: int, vintage_year: VintageYear) -> None:
     if vintage_year <= 2020:
         year_lb = 2010
     else:
         year_lb = 2020
 
     if year_lb <= year <= vintage_year:
-        county_race_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyRacePopulation.validate, lazy=True
+        state_age_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateAgeSexPopulation.validate, lazy=True
         )
     else:
         with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_race_pop(year, vintage_year=vintage_year)
+            state_age_sex_pop(year, vintage_year=vintage_year)
 
 
 @pytest.mark.parametrize(
     ("year"),
     range(2010, 2025),
 )
-def test_county_race_pop_as_pandas(year: int) -> None:
-    df = county_race_pop(year, as_pandas=True)
+def test_state_age_sex_pop_as_pandas(year: int) -> None:
+    df = state_age_sex_pop(year, as_pandas=True)
 
     assert isinstance(df, DataFrame)
 
 
-def test_county_race_pop_invalid_vintage_year_exception() -> None:
+def test_state_age_sex_pop_invalid_vintage_year_exception() -> None:
     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_race_pop(
-            year, vintage_year=vintage_year, incl_hispanic_orig=True
-        ).collect().pipe(CountyRaceHispanicPopulation.validate, lazy=True)
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_race_hispanic_pop_as_pandas(year: int) -> None:
-    df = county_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
-
-    assert isinstance(df, DataFrame)
+        state_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
 
 
-def test_county_race_hispanic_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+# age_grps = {
+#     0: "tot",
+#     1: "0-4",
+#     2: "5-9",
+#     3: "10-14",
+#     4: "15-19",
+#     5: "20-24",
+#     6: "25-29",
+#     7: "30-34",
+#     8: "35-39",
+#     9: "40-44",
+#     10: "45-49",
+#     11: "50-54",
+#     12: "55-59",
+#     13: "60-64",
+#     14: "65-69",
+#     15: "70-74",
+#     16: "75-79",
+#     17: "80-84",
+#     18: ">=85",
+# }
+# age_grp_enum = pl.Enum(age_grps.values())
+#
+#
+# class CountyPopulation(BasePolarsModel):
+#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
+#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+#     tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+#
+#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+#         unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
+#
+#     @pa.dataframe_check
+#     def has_correct_states(cls, data: PolarsData) -> bool:
+#         return (
+#             data.lazyframe.select(
+#                 pl.col("county_fips")
+#                 .str.slice(0, 2)
+#                 .is_between(pl.lit("01"), pl.lit("56"))
+#                 .all()
+#             )
+#             .collect()
+#             .item()
+#             is True
+#         )
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# @pytest.mark.parametrize(
+#     ("vintage_year"),
+#     range(2016, 2025),
+# )
+# def test_county_pop(year: int, vintage_year: VintageYear) -> None:
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     if year_lb <= year <= vintage_year:
+#         county_pop(year, vintage_year=vintage_year).collect().pipe(
+#             CountyPopulation.validate, lazy=True
+#         )
+#     else:
+#         with pytest.raises(ValueError, match="^Must choose a year between"):
+#             county_pop(year, vintage_year=vintage_year)
+#
+#
+# def test_county_pop_invalid_vintage_year_exception() -> None:
+#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+#         county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+#
+#
+# # def test_get_vintage_info() -> None:
+# #     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+# #         get_vintage(2000)  # pyright: ignore [reportArgumentType]
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# def test_county_pop_as_pandas(year: int) -> None:
+#     df = county_pop(year, as_pandas=True)
+#
+#     assert isinstance(df, DataFrame)
+#
+#
+# class CountyAgePopulation(BasePolarsModel):
+#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+#     age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+#     tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#
+#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+#         unique: list[str] = [
+#             "state_name",
+#             "county_name",
+#             "county_fips",
+#             "year",
+#             "age_grp",
+#         ]
+#
+#     @pa.dataframe_check
+#     def has_correct_states(cls, data: PolarsData) -> bool:
+#         return (
+#             data.lazyframe.select(
+#                 pl.col("county_fips")
+#                 .str.slice(0, 2)
+#                 .is_between(pl.lit("01"), pl.lit("56"))
+#                 .all()
+#             )
+#             .collect()
+#             .item()
+#             is True
+#         )
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# @pytest.mark.parametrize(
+#     ("vintage_year"),
+#     range(2016, 2025),
+# )
+# def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     if year_lb <= year <= vintage_year:
+#         county_age_pop(year, vintage_year=vintage_year).collect().pipe(
+#             CountyAgePopulation.validate, lazy=True
+#         )
+#     else:
+#         with pytest.raises(ValueError, match="^Must choose a year between"):
+#             county_age_pop(year, vintage_year=vintage_year)
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# def test_county_age_pop_as_pandas(year: int) -> None:
+#     df = county_age_pop(year, as_pandas=True)
+#
+#     assert isinstance(df, DataFrame)
+#
+#
+# def test_county_age_pop_invalid_vintage_year_exception() -> None:
+#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+#         county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+#
+#
+# class CountySexPopulation(BasePolarsModel):
+#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+#     tot_male: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     tot_female: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#
+#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+#         unique: list[str] = [
+#             "state_name",
+#             "county_name",
+#             "county_fips",
+#             "year",
+#         ]
+#
+#     @pa.dataframe_check
+#     def has_correct_states(cls, data: PolarsData) -> bool:
+#         return (
+#             data.lazyframe.select(
+#                 pl.col("county_fips")
+#                 .str.slice(0, 2)
+#                 .is_between(pl.lit("01"), pl.lit("56"))
+#                 .all()
+#             )
+#             .collect()
+#             .item()
+#             is True
+#         )
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# @pytest.mark.parametrize(
+#     ("vintage_year"),
+#     range(2016, 2025),
+# )
+# def test_county_sex_pop(year: int, vintage_year: VintageYear) -> None:
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     if year_lb <= year <= vintage_year:
+#         county_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+#             CountySexPopulation.validate, lazy=True
+#         )
+#     else:
+#         with pytest.raises(ValueError, match="^Must choose a year between"):
+#             county_sex_pop(year, vintage_year=vintage_year)
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# def test_county_sex_pop_as_pandas(year: int) -> None:
+#     df = county_sex_pop(year, as_pandas=True)
+#
+#     assert isinstance(df, DataFrame)
+#
+#
+# def test_county_sex_pop_invalid_vintage_year_exception() -> None:
+#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+#         county_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+#
+#
+# class CountyRacePopulation(BasePolarsModel):
+#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+#     white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#
+#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+#         unique: list[str] = [
+#             "state_name",
+#             "county_name",
+#             "county_fips",
+#             "year",
+#         ]
+#
+#     @pa.dataframe_check
+#     def has_correct_states(cls, data: PolarsData) -> bool:
+#         return (
+#             data.lazyframe.select(
+#                 pl.col("county_fips")
+#                 .str.slice(0, 2)
+#                 .is_between(pl.lit("01"), pl.lit("56"))
+#                 .all()
+#             )
+#             .collect()
+#             .item()
+#             is True
+#         )
+#
+#
+# class CountyRaceHispanicPopulation(BasePolarsModel):
+#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+#     white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#     hispanic: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+#
+#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+#         unique: list[str] = [
+#             "state_name",
+#             "county_name",
+#             "county_fips",
+#             "year",
+#         ]
+#
+#     @pa.dataframe_check
+#     def has_correct_states(cls, data: PolarsData) -> bool:
+#         return (
+#             data.lazyframe.select(
+#                 pl.col("county_fips")
+#                 .str.slice(0, 2)
+#                 .is_between(pl.lit("01"), pl.lit("56"))
+#                 .all()
+#             )
+#             .collect()
+#             .item()
+#             is True
+#         )
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# @pytest.mark.parametrize(
+#     ("vintage_year"),
+#     range(2016, 2025),
+# )
+# def test_county_race_pop(year: int, vintage_year: VintageYear) -> None:
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     if year_lb <= year <= vintage_year:
+#         county_race_pop(year, vintage_year=vintage_year).collect().pipe(
+#             CountyRacePopulation.validate, lazy=True
+#         )
+#     else:
+#         with pytest.raises(ValueError, match="^Must choose a year between"):
+#             county_race_pop(year, vintage_year=vintage_year)
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# def test_county_race_pop_as_pandas(year: int) -> None:
+#     df = county_race_pop(year, as_pandas=True)
+#
+#     assert isinstance(df, DataFrame)
+#
+#
+# def test_county_race_pop_invalid_vintage_year_exception() -> None:
+#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+#         county_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# @pytest.mark.parametrize(
+#     ("vintage_year"),
+#     range(2016, 2025),
+# )
+# def test_county_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     if year_lb <= year <= vintage_year:
+#         county_race_pop(
+#             year, vintage_year=vintage_year, incl_hispanic_orig=True
+#         ).collect().pipe(CountyRaceHispanicPopulation.validate, lazy=True)
+#     else:
+#         with pytest.raises(ValueError, match="^Must choose a year between"):
+#             county_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+#
+#
+# @pytest.mark.parametrize(
+#     ("year"),
+#     range(2010, 2025),
+# )
+# def test_county_race_hispanic_pop_as_pandas(year: int) -> None:
+#     df = county_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+#
+#     assert isinstance(df, DataFrame)
+#
+#
+# def test_county_race_hispanic_pop_invalid_vintage_year_exception() -> None:
+#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+#         county_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]

From 16f7a6711eb67e468f0accaa8c4072397add81d2 Mon Sep 17 00:00:00 2001
From: winter-again <63322884+winter-again@users.noreply.github.com>
Date: Wed, 11 Mar 2026 18:03:47 -0400
Subject: [PATCH 3/4] Finalize code and tests for county population functions

---
 src/kintsugi/population.py | 132 +++---
 tests/population_test.py   | 813 ++++++++++++++++++++-----------------
 2 files changed, 502 insertions(+), 443 deletions(-)

diff --git a/src/kintsugi/population.py b/src/kintsugi/population.py
index a98a610..ff5ddef 100644
--- a/src/kintsugi/population.py
+++ b/src/kintsugi/population.py
@@ -72,7 +72,10 @@ def validate_vintage_year(year: int, vintage_year: VintageYear) -> None:
 
 # match conventions in kintsugi-data processing script
 sex_enum = pl.Enum(["tot", "male", "female"])
-race_enum = pl.Enum(["white", "black", "aian", "asian", "nhpi"])
+race_enum_no_hispanic = pl.Enum(["white", "black", "aian", "asian", "nhpi"])
+race_enum_incl_hispanic = pl.Enum(
+    ["white", "black", "aian", "asian", "nhpi", "hispanic"]
+)
 hispanic_enum = pl.Enum(["tot", "not_hispanic", "hispanic"])
 
 
@@ -275,7 +278,7 @@ def state_race_pop(
     as_pandas: bool = False,
 ) -> pl.LazyFrame | pd.DataFrame:
     """
-    State-race population estimates for select years. Specify `incl_hispanic=True` to include
+    State-race population estimates for select years. Specify `incl_hispanic_orig=True` to include
     Hispanic counts column. Uses state population by characteristics
     data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
     The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
@@ -381,6 +384,30 @@ def state_age_sex_pop(
     return lf
 
 
+age_grps = [
+    "tot",
+    "0-4",
+    "5-9",
+    "10-14",
+    "15-19",
+    "20-24",
+    "25-29",
+    "30-34",
+    "35-39",
+    "40-44",
+    "45-49",
+    "50-54",
+    "55-59",
+    "60-64",
+    "65-69",
+    "70-74",
+    "75-79",
+    "80-84",
+    ">=85",
+]
+age_grp_enum = pl.Enum(age_grps)
+
+
 @overload
 def county_pop(
     year: int,
@@ -403,13 +430,12 @@ def county_pop(
     County population estimates for select years. Uses county population
     by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
     The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
+    Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
     Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
     """
@@ -419,12 +445,7 @@ def county_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -464,13 +485,12 @@ def county_age_pop(
     County-age population estimates for select years. Uses county population
     by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
     The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
+    Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
     Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
     """
@@ -480,12 +500,7 @@ def county_age_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -527,13 +542,12 @@ def county_sex_pop(
     County-sex population estimates for select years. Uses county population
     by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
     The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
+    Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
     Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
     """
@@ -543,12 +557,7 @@ def county_sex_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -559,7 +568,13 @@ def county_sex_pop(
         .select(
             "state_name", "county_name", "county_fips", "year", "tot_male", "tot_female"
         )
-        .sort("county_fips")
+        .unpivot(
+            index=["state_name", "county_name", "county_fips", "year"],
+            variable_name="sex",
+            value_name="tot_pop",
+        )
+        .with_columns(sex=pl.col("sex").str.replace("tot_", "").cast(sex_enum))
+        .sort("county_fips", "sex")
     )
 
     if as_pandas:
@@ -596,17 +611,16 @@ def county_race_pop(
     as_pandas: bool = False,
 ) -> pl.LazyFrame | pd.DataFrame:
     """
-    County-race population estimates for select years. Specify `incl_hispanic=True` to include
+    County-race population estimates for select years. Specify `incl_hispanic_orig=True` to include
     Hispanic counts column. Uses county population by characteristics
     data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
     The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
+    Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
     Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
     """
@@ -616,12 +630,7 @@ def county_race_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
@@ -663,12 +672,21 @@ def county_race_pop(
             "nhpi",
             "hispanic",
         )
-        .sort("county_fips")
     )
 
     if not incl_hispanic_orig:
         lf = lf.drop("hispanic")
 
+    lf = (
+        lf.unpivot(
+            index=["state_name", "county_name", "county_fips", "year"],
+            variable_name="race",
+            value_name="tot_pop",
+        )
+        .cast({"race": race_enum_incl_hispanic})
+        .sort("county_fips", "race")
+    )
+
     if as_pandas:
         return lf.collect().to_pandas()
 
@@ -697,13 +715,12 @@ def county_age_sex_pop(
     County-age-sex population estimates for select years. Uses county population
     by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
     The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
+    Instead, parquet files containing a subset of columns are used.
 
     It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
 
     Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
     """
@@ -713,12 +730,7 @@ def county_age_sex_pop(
         else:
             vintage_year = 2024
 
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
+    validate_vintage_year(year, vintage_year)
     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
     lf = (
         pl.scan_parquet(data)
diff --git a/tests/population_test.py b/tests/population_test.py
index 96ced8a..aef3185 100644
--- a/tests/population_test.py
+++ b/tests/population_test.py
@@ -6,12 +6,15 @@
 
 from kintsugi.population import (
     VintageYear,
+    age_grp_enum,
     county_age_pop,
+    county_age_sex_pop,
     county_pop,
     county_race_pop,
     county_sex_pop,
     hispanic_enum,
-    race_enum,
+    race_enum_incl_hispanic,
+    race_enum_no_hispanic,
     sex_enum,
     state_age_pop,
     state_age_sex_pop,
@@ -190,17 +193,12 @@ class StateRacePopulation(BasePolarsModel):
     state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
     race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
-        dtype_kwargs={"categories": race_enum.categories}
+        dtype_kwargs={"categories": race_enum_no_hispanic.categories}
     )
     tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
 
     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "state_fips",
-            "year",
-            "race",
-        ]
+        unique: list[str] = ["state_name", "state_fips", "year", "race"]
 
     @pa.check("year")
     def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
@@ -250,7 +248,7 @@ class StateRaceHispanicPopulation(BasePolarsModel):
     state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
     race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
-        dtype_kwargs={"categories": race_enum.categories}
+        dtype_kwargs={"categories": race_enum_no_hispanic.categories}
     )
     hispanic_origin: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
         dtype_kwargs={"categories": hispanic_enum.categories}
@@ -363,377 +361,426 @@ def test_state_age_sex_pop_invalid_vintage_year_exception() -> None:
         state_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
 
 
-# age_grps = {
-#     0: "tot",
-#     1: "0-4",
-#     2: "5-9",
-#     3: "10-14",
-#     4: "15-19",
-#     5: "20-24",
-#     6: "25-29",
-#     7: "30-34",
-#     8: "35-39",
-#     9: "40-44",
-#     10: "45-49",
-#     11: "50-54",
-#     12: "55-59",
-#     13: "60-64",
-#     14: "65-69",
-#     15: "70-74",
-#     16: "75-79",
-#     17: "80-84",
-#     18: ">=85",
-# }
-# age_grp_enum = pl.Enum(age_grps.values())
-#
-#
-# class CountyPopulation(BasePolarsModel):
-#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
-#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-#     tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
-#
-#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-#         unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
-#
-#     @pa.dataframe_check
-#     def has_correct_states(cls, data: PolarsData) -> bool:
-#         return (
-#             data.lazyframe.select(
-#                 pl.col("county_fips")
-#                 .str.slice(0, 2)
-#                 .is_between(pl.lit("01"), pl.lit("56"))
-#                 .all()
-#             )
-#             .collect()
-#             .item()
-#             is True
-#         )
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# @pytest.mark.parametrize(
-#     ("vintage_year"),
-#     range(2016, 2025),
-# )
-# def test_county_pop(year: int, vintage_year: VintageYear) -> None:
-#     if vintage_year <= 2020:
-#         year_lb = 2010
-#     else:
-#         year_lb = 2020
-#
-#     if year_lb <= year <= vintage_year:
-#         county_pop(year, vintage_year=vintage_year).collect().pipe(
-#             CountyPopulation.validate, lazy=True
-#         )
-#     else:
-#         with pytest.raises(ValueError, match="^Must choose a year between"):
-#             county_pop(year, vintage_year=vintage_year)
-#
-#
-# def test_county_pop_invalid_vintage_year_exception() -> None:
-#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-#         county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-#
-#
-# # def test_get_vintage_info() -> None:
-# #     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-# #         get_vintage(2000)  # pyright: ignore [reportArgumentType]
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# def test_county_pop_as_pandas(year: int) -> None:
-#     df = county_pop(year, as_pandas=True)
-#
-#     assert isinstance(df, DataFrame)
-#
-#
-# class CountyAgePopulation(BasePolarsModel):
-#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-#     age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
-#     tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#
-#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-#         unique: list[str] = [
-#             "state_name",
-#             "county_name",
-#             "county_fips",
-#             "year",
-#             "age_grp",
-#         ]
-#
-#     @pa.dataframe_check
-#     def has_correct_states(cls, data: PolarsData) -> bool:
-#         return (
-#             data.lazyframe.select(
-#                 pl.col("county_fips")
-#                 .str.slice(0, 2)
-#                 .is_between(pl.lit("01"), pl.lit("56"))
-#                 .all()
-#             )
-#             .collect()
-#             .item()
-#             is True
-#         )
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# @pytest.mark.parametrize(
-#     ("vintage_year"),
-#     range(2016, 2025),
-# )
-# def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
-#     if vintage_year <= 2020:
-#         year_lb = 2010
-#     else:
-#         year_lb = 2020
-#
-#     if year_lb <= year <= vintage_year:
-#         county_age_pop(year, vintage_year=vintage_year).collect().pipe(
-#             CountyAgePopulation.validate, lazy=True
-#         )
-#     else:
-#         with pytest.raises(ValueError, match="^Must choose a year between"):
-#             county_age_pop(year, vintage_year=vintage_year)
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# def test_county_age_pop_as_pandas(year: int) -> None:
-#     df = county_age_pop(year, as_pandas=True)
-#
-#     assert isinstance(df, DataFrame)
-#
-#
-# def test_county_age_pop_invalid_vintage_year_exception() -> None:
-#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-#         county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-#
-#
-# class CountySexPopulation(BasePolarsModel):
-#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-#     tot_male: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     tot_female: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#
-#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-#         unique: list[str] = [
-#             "state_name",
-#             "county_name",
-#             "county_fips",
-#             "year",
-#         ]
-#
-#     @pa.dataframe_check
-#     def has_correct_states(cls, data: PolarsData) -> bool:
-#         return (
-#             data.lazyframe.select(
-#                 pl.col("county_fips")
-#                 .str.slice(0, 2)
-#                 .is_between(pl.lit("01"), pl.lit("56"))
-#                 .all()
-#             )
-#             .collect()
-#             .item()
-#             is True
-#         )
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# @pytest.mark.parametrize(
-#     ("vintage_year"),
-#     range(2016, 2025),
-# )
-# def test_county_sex_pop(year: int, vintage_year: VintageYear) -> None:
-#     if vintage_year <= 2020:
-#         year_lb = 2010
-#     else:
-#         year_lb = 2020
-#
-#     if year_lb <= year <= vintage_year:
-#         county_sex_pop(year, vintage_year=vintage_year).collect().pipe(
-#             CountySexPopulation.validate, lazy=True
-#         )
-#     else:
-#         with pytest.raises(ValueError, match="^Must choose a year between"):
-#             county_sex_pop(year, vintage_year=vintage_year)
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# def test_county_sex_pop_as_pandas(year: int) -> None:
-#     df = county_sex_pop(year, as_pandas=True)
-#
-#     assert isinstance(df, DataFrame)
-#
-#
-# def test_county_sex_pop_invalid_vintage_year_exception() -> None:
-#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-#         county_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-#
-#
-# class CountyRacePopulation(BasePolarsModel):
-#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-#     white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#
-#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-#         unique: list[str] = [
-#             "state_name",
-#             "county_name",
-#             "county_fips",
-#             "year",
-#         ]
-#
-#     @pa.dataframe_check
-#     def has_correct_states(cls, data: PolarsData) -> bool:
-#         return (
-#             data.lazyframe.select(
-#                 pl.col("county_fips")
-#                 .str.slice(0, 2)
-#                 .is_between(pl.lit("01"), pl.lit("56"))
-#                 .all()
-#             )
-#             .collect()
-#             .item()
-#             is True
-#         )
-#
-#
-# class CountyRaceHispanicPopulation(BasePolarsModel):
-#     state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-#     year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-#     white: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     black: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     aian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     asian: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     nhpi: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#     hispanic: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-#
-#     class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-#         unique: list[str] = [
-#             "state_name",
-#             "county_name",
-#             "county_fips",
-#             "year",
-#         ]
-#
-#     @pa.dataframe_check
-#     def has_correct_states(cls, data: PolarsData) -> bool:
-#         return (
-#             data.lazyframe.select(
-#                 pl.col("county_fips")
-#                 .str.slice(0, 2)
-#                 .is_between(pl.lit("01"), pl.lit("56"))
-#                 .all()
-#             )
-#             .collect()
-#             .item()
-#             is True
-#         )
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# @pytest.mark.parametrize(
-#     ("vintage_year"),
-#     range(2016, 2025),
-# )
-# def test_county_race_pop(year: int, vintage_year: VintageYear) -> None:
-#     if vintage_year <= 2020:
-#         year_lb = 2010
-#     else:
-#         year_lb = 2020
-#
-#     if year_lb <= year <= vintage_year:
-#         county_race_pop(year, vintage_year=vintage_year).collect().pipe(
-#             CountyRacePopulation.validate, lazy=True
-#         )
-#     else:
-#         with pytest.raises(ValueError, match="^Must choose a year between"):
-#             county_race_pop(year, vintage_year=vintage_year)
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# def test_county_race_pop_as_pandas(year: int) -> None:
-#     df = county_race_pop(year, as_pandas=True)
-#
-#     assert isinstance(df, DataFrame)
-#
-#
-# def test_county_race_pop_invalid_vintage_year_exception() -> None:
-#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-#         county_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# @pytest.mark.parametrize(
-#     ("vintage_year"),
-#     range(2016, 2025),
-# )
-# def test_county_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
-#     if vintage_year <= 2020:
-#         year_lb = 2010
-#     else:
-#         year_lb = 2020
-#
-#     if year_lb <= year <= vintage_year:
-#         county_race_pop(
-#             year, vintage_year=vintage_year, incl_hispanic_orig=True
-#         ).collect().pipe(CountyRaceHispanicPopulation.validate, lazy=True)
-#     else:
-#         with pytest.raises(ValueError, match="^Must choose a year between"):
-#             county_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
-#
-#
-# @pytest.mark.parametrize(
-#     ("year"),
-#     range(2010, 2025),
-# )
-# def test_county_race_hispanic_pop_as_pandas(year: int) -> None:
-#     df = county_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
-#
-#     assert isinstance(df, DataFrame)
-#
-#
-# def test_county_race_hispanic_pop_invalid_vintage_year_exception() -> None:
-#     with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-#         county_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+class CountyPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_pop(year, vintage_year=vintage_year)
+
+
+def test_county_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_pop_as_pandas(year: int) -> None:
+    df = county_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+class CountyAgePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+        ]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_age_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyAgePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_age_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_age_pop_as_pandas(year: int) -> None:
+    df = county_age_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_age_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountySexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year", "sex"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountySexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_sex_pop_as_pandas(year: int) -> None:
+    df = county_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountyRacePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum_incl_hispanic.categories}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year", "race"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_race_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_race_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyRacePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_race_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_race_pop_as_pandas(year: int) -> None:
+    df = county_race_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_race_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountyRaceHispanicPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum_incl_hispanic.categories}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year", "race"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_race_pop(
+            year, vintage_year=vintage_year, incl_hispanic_orig=True
+        ).collect().pipe(CountyRaceHispanicPopulation.validate, lazy=True)
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_race_hispanic_pop_as_pandas(year: int) -> None:
+    df = county_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_race_hispanic_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+
+
+class CountyAgeSexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+            "sex",
+        ]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_age_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_age_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyAgeSexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_age_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_age_sex_pop_as_pandas(year: int) -> None:
+    df = county_age_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_age_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]

From 2c7371d3a5ce78b675ab819ca2d2c85afd7f6b71 Mon Sep 17 00:00:00 2001
From: winter-again <63322884+winter-again@users.noreply.github.com>
Date: Wed, 11 Mar 2026 19:02:21 -0400
Subject: [PATCH 4/4] Fix import

---
 src/kintsugi/county_groups.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kintsugi/county_groups.py b/src/kintsugi/county_groups.py
index 285abc4..f8a9a42 100644
--- a/src/kintsugi/county_groups.py
+++ b/src/kintsugi/county_groups.py
@@ -4,7 +4,7 @@
 import polars as pl
 
 from ._data import get_dataset
-from .county_pop import county_pop
+from .population import county_pop
 
 
 @overload