diff --git a/README.md b/README.md
index c5e5ca0..d1a2131 100644
--- a/README.md
+++ b/README.md
@@ -39,13 +39,13 @@ counties = county_geo(2024)
 states = state_geo(2024)
 ```
 
-County and county-age population counts
+State and county population data, stratified by several different variables:
 
 ```python
-from kintsugi.county_pop import county_pop, county_age_pop
+from kintsugi.population import county_pop, state_age_pop
 
 lf_county_pop = county_pop(2024)
-lf_county_age_pop = county_age_pop(2024)
+lf_state_age_pop = state_age_pop(2024)
 ```
 
 Low-population county groups
diff --git a/src/kintsugi/_data.py b/src/kintsugi/_data.py
index ab4f6ff..69d334e 100644
--- a/src/kintsugi/_data.py
+++ b/src/kintsugi/_data.py
@@ -22,15 +22,24 @@
     "geo/cb_2020_us_state_5m.zip": "aedc60e0d1924a9030ee6d39ff0ed27ad7d1b0bc86807ea809391a6b9008ffb3",
     "geo/cb_2024_us_county_5m.zip": "a867f8734059b45d1d54a0ba56189dd7e73c42eb451418fa56de44c35232614b",
     "geo/cb_2024_us_state_5m.zip": "c9db0e395c11a1f94a8017fde4f4c7cbee1dca6eb37ba8f1ccaab927df70885f",
-    "pop/county_cc/county_pop_2016.parquet": "1d337d32b401b1d101f643e4f734dc62f6fc4659d9c168cab025fcfacdc930ec",
-    "pop/county_cc/county_pop_2017.parquet": "7f3d834d37d505baee184352cc3c2144cb5dde1745a51356c8b5debc0fddc768",
-    "pop/county_cc/county_pop_2018.parquet": "45e476c3bbe375b2de44b261bccec032609320de311cc95c02b1125216d8c748",
-    "pop/county_cc/county_pop_2019.parquet": "06081711d88339c4e2af398e3e7345d336b26b5c3a6148b00f0c4273b51a7f4b",
-    "pop/county_cc/county_pop_2020.parquet": "4ba406a680041dd3cb4025733fffe852383a9171dcd7ceaaa3fa4e551573dc57",
-    "pop/county_cc/county_pop_2021.parquet": "527c058c14b8de7826748bb883969bed8960a9e060e2aa010bd6367f41458306",
-    "pop/county_cc/county_pop_2022.parquet": "bffccaf83d23245378cbc900f5f7bc1740c7dd2c5085570b20d44649d5afcbc1",
-    "pop/county_cc/county_pop_2023.parquet": "dc5941017a40488424faae38fcca8b7032024523e823af17c0d539b657ee239a",
-    "pop/county_cc/county_pop_2024.parquet": "cae4e9e5d956dfdd60a68a06887e0c4a1f8918f81e09c8fe2015f3b1feb85d82",
+    "pop/county_cc/county_pop_2016.parquet": "74caad19bf5eed856ad9b6f63c65f7fceca612dec680d0768890de2265116607",
+    "pop/county_cc/county_pop_2017.parquet": "d93d027929861e115cf34b15f1ff7c697c8eaa327b73cd8132710a11860a63d5",
+    "pop/county_cc/county_pop_2018.parquet": "be3d3bab642a9f6f111c792a431f940b1753373194993885e4d47c136feed91a",
+    "pop/county_cc/county_pop_2019.parquet": "98801f118cd795c026a8269d5ac6674f98b9d47e0207c6a2721a5b7f4b6e5c08",
+    "pop/county_cc/county_pop_2020.parquet": "f1e4f282d297dc5498b6f839412c0815ca6f9e0a15d83d5d3867f2d70aa8413d",
+    "pop/county_cc/county_pop_2021.parquet": "3af369564ebb0e1fda25b440e5bf133ecb2d2eab60ab40f5db1f0a0955db713b",
+    "pop/county_cc/county_pop_2022.parquet": "977856eb5fffd508442ccedaa54c92e338b037135e5a9be55a03c7132863d9ca",
+    "pop/county_cc/county_pop_2023.parquet": "a4d66c302a557c1565ec9f43bad5ea9d4267576d1fbd17d8939e5a858a3d73e7",
+    "pop/county_cc/county_pop_2024.parquet": "12b16c7c20329a3df2f4120f6ec9a9a7313147fad0fd03bc360b1de5769c8abd",
+    "pop/state/state_pop_2016.parquet": "bac51c5ba4a9ff7305e92b3b2804c854fc20b9cbcf01156e5439d92668c0c81e",
+    "pop/state/state_pop_2017.parquet": "6fb950b1b78409af8130317b08b437b742c0906ff9d5c38655c1189103b8dddc",
+    "pop/state/state_pop_2018.parquet": "913fca35299028a842325000e58e33cd3912c1e900d480f00b468095398e57f8",
+    "pop/state/state_pop_2019.parquet": "7ca2c87065f24857178bb33a7512cb799a92890596bac6fff1cbeb3c69f6fc36",
+    "pop/state/state_pop_2020.parquet": "275b861e07f1c2327fb5382a28e84a5fb7ac4f896ae9f91b06612f6197af9611",
+    "pop/state/state_pop_2021.parquet": "8b47a5c9fdca838954c8ddac8265ad00d590281c7b444019070c81b9942a727e",
+    "pop/state/state_pop_2022.parquet": "ea113b3766c44bbf250e01b0b9509e810590119b3b9470b13dc347d43aed042b",
+    "pop/state/state_pop_2023.parquet": "e96a982342510fe6a1ba90fc85a9bd6fbdd8687bceaf76e6e117606429d2d160",
+    "pop/state/state_pop_2024.parquet": "b79bca471a68b8c3742ec30d41a2b65ab1227152e81239faf00763188752c6ff",
     "county_groups.parquet": "7d7c150b5efd5596e0eaaed27abd6dc86137f08ff677c2606d402b9d165b87fa",
     "state.txt": "bea4e03f71a1fa0045ae732aabad11fa541e5932b071c2369bb0d325e8cba5a0",
 }
diff --git a/src/kintsugi/county_groups.py b/src/kintsugi/county_groups.py
index 285abc4..f8a9a42 100644
--- a/src/kintsugi/county_groups.py
+++ b/src/kintsugi/county_groups.py
@@ -4,7 +4,7 @@
 import polars as pl
 
 from ._data import get_dataset
-from .county_pop import county_pop
+from .population import county_pop
 
 
 @overload
diff --git a/src/kintsugi/county_pop.py b/src/kintsugi/county_pop.py
deleted file mode 100644
index 284780e..0000000
--- a/src/kintsugi/county_pop.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from typing import Literal, NamedTuple, overload
-
-import pandas as pd
-import polars as pl
-
-from ._data import get_dataset
-
-type VintageYear = Literal[
-    2016,
-    2017,
-    2018,
-    2019,
-    2020,
-    2021,
-    2022,
-    2023,
-    2024,
-]
-
-
-class Vintage(NamedTuple):
-    year_lb: int
-    year_ub: int
-    county_fips: set[str]
-
-
-def get_vintage(vintage_year: VintageYear) -> Vintage:
-    """
-    Get info like year bounds for a given vintage year
-    """
-    vintage_year_lb = 2016
-    vintage_year_ub = 2024
-    if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
-        raise ValueError(
-            f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
-        )
-
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    county_fips = set(
-        pl.scan_parquet(data)
-        .select("county_fips")
-        .unique()
-        .collect()
-        .to_series()
-        .to_list()
-    )
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    return Vintage(year_lb, vintage_year, county_fips)
-
-
-@overload
-def county_pop(
-    year: int,
-    *,
-    vintage_year: VintageYear | None = ...,
-    as_pandas: Literal[False] = ...,
-) -> pl.LazyFrame: ...
-
-
-@overload
-def county_pop(
-    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
-) -> pd.DataFrame: ...
-
-
-def county_pop(
-    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
-) -> pl.LazyFrame | pd.DataFrame:
-    """
-    County population estimates for select years. Uses county population
-    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
-    The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
-
-    It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
-
-    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
-    """
-    if vintage_year is None:
-        if 2010 <= year <= 2019:
-            vintage_year = 2020
-        else:
-            vintage_year = 2024
-
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    lf = (
-        pl.scan_parquet(data)
-        .filter(
-            pl.col("year") == year,
-            pl.col("age_grp") == "tot",
-        )
-        .select("state_name", "county_name", "county_fips", "year", "tot_pop")
-        .sort("county_fips")
-    )
-
-    if as_pandas:
-        return lf.collect().to_pandas()
-
-    return lf
-
-
-@overload
-def county_age_pop(
-    year: int,
-    *,
-    vintage_year: VintageYear | None = ...,
-    as_pandas: Literal[False] = ...,
-) -> pl.LazyFrame: ...
-
-
-@overload
-def county_age_pop(
-    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
-) -> pd.DataFrame: ...
-
-
-def county_age_pop(
-    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
-) -> pl.LazyFrame | pd.DataFrame:
-    """
-    County-age population estimates for select years. Uses county population
-    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
-    The raw files are not present in the kintsugi-data repo because of their large size.
-    Instead, we use parquet files containing a subset of columns.
-
-    It's recommended to use the latest possible vintage to get a given year's data. However,
-    you may specify a specific vintage year if, for example, you need a certain set of county
-    geographies. If `vintage_year` is `None` (by default), data for years in the range [2010, 2019]
-    are sourced from the 2020 vintage (2010-2020 data), while data for years in the range
-    [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
-
-    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
-    """
-    if vintage_year is None:
-        if 2010 <= year <= 2019:
-            vintage_year = 2020
-        else:
-            vintage_year = 2024
-
-    vintage = get_vintage(vintage_year)
-    if not (vintage.year_lb <= year <= vintage.year_ub):
-        raise ValueError(
-            f"Must choose a year between {vintage.year_lb} and {vintage.year_ub}"
-        )
-
-    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
-    lf = (
-        pl.scan_parquet(data)
-        .filter(
-            pl.col("year") == year,
-            pl.col("age_grp") != "tot",
-        )
-        .select(
-            "state_name", "county_name", "county_fips", "year", "age_grp", "tot_pop"
-        )
-        .sort("county_fips", "age_grp")
-    )
-
-    if as_pandas:
-        return lf.collect().to_pandas()
-
-    return lf
diff --git a/src/kintsugi/population.py b/src/kintsugi/population.py
new file mode 100644
index 0000000..ff5ddef
--- /dev/null
+++ b/src/kintsugi/population.py
@@ -0,0 +1,762 @@
+from typing import Literal, NamedTuple, overload
+
+import pandas as pd
+import polars as pl
+
+from ._data import get_dataset
+
+type VintageYear = Literal[
+    2016,
+    2017,
+    2018,
+    2019,
+    2020,
+    2021,
+    2022,
+    2023,
+    2024,
+]
+
+
+# class Vintage(NamedTuple):
+#     year_lb: int
+#     year_ub: int
+#     county_fips: set[str]
+
+
+def validate_vintage_year(year: int, vintage_year: VintageYear) -> None:
+    """Validate year against vintage_year"""
+    vintage_year_lb = 2016
+    vintage_year_ub = 2024
+    if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
+        raise ValueError(
+            f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
+        )
+
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if not (year_lb <= year <= vintage_year):
+        raise ValueError(f"Must choose a year between {year_lb} and {vintage_year}")
+
+
+# def _get_vintage(vintage_year: VintageYear) -> Vintage:
+#     """Get info like year bounds for a given vintage year."""
+#     vintage_year_lb = 2016
+#     vintage_year_ub = 2024
+#     if not (vintage_year_lb <= vintage_year <= vintage_year_ub):
+#         raise ValueError(
+#             f"Must choose a vintage year between {vintage_year_lb} and {vintage_year_ub}"
+#         )
+#
+#     data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+#     county_fips = set(
+#         pl.scan_parquet(data)
+#         .select("county_fips")
+#         .unique()
+#         .collect()
+#         .to_series()
+#         .to_list()
+#     )
+#     if vintage_year <= 2020:
+#         year_lb = 2010
+#     else:
+#         year_lb = 2020
+#
+#     return Vintage(year_lb, vintage_year, county_fips)
+
+
+# TODO: should docstrings have info on the schema?
+
+# match conventions in kintsugi-data processing script
+sex_enum = pl.Enum(["tot", "male", "female"])
+race_enum_no_hispanic = pl.Enum(["white", "black", "aian", "asian", "nhpi"])
+race_enum_incl_hispanic = pl.Enum(
+    ["white", "black", "aian", "asian", "nhpi", "hispanic"]
+)
+hispanic_enum = pl.Enum(["tot", "not_hispanic", "hispanic"])
+
+
+@overload
+def state_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State population estimates for select years.
+
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html.
+    The raw files are not present in the kintsugi-data repo. Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") == "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .group_by(["state_name", "state_fips", "year"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_age_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_age_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_age_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-age population estimates for select years.
+
+    Age is given in years, not binned groups. Note that an age value of `85` corresponds to >= 85 years old.
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html.
+    The raw files are not present in the kintsugi-data repo. Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") == "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .group_by(["state_name", "state_fips", "year", "age"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips", "age")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-sex population estimates for select years. Uses state population by characteristics
+    data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") != "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .group_by(["state_name", "state_fips", "year", "sex"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[True],
+) -> pd.DataFrame: ...
+
+
+def state_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = None,
+    incl_hispanic_orig: bool = False,
+    as_pandas: bool = False,
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-race population estimates for select years. Specify `incl_hispanic_orig=True` to include
+    Hispanic counts column. Uses state population by characteristics
+    data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html
+    The raw files are not present in the kintsugi-data repo. Instead, we use parquet files containing a subset of columns.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    you may specify a specific vintage year. If `vintage_year` is `None` (by default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") == "tot",
+            pl.col("hispanic_origin") != "tot"
+            if incl_hispanic_orig
+            else pl.col("hispanic_origin") == "tot",
+        )
+        .group_by(
+            ["state_name", "state_fips", "year", "race", "hispanic_origin"]
+            if incl_hispanic_orig
+            else ["state_name", "state_fips", "year", "race"]
+        )
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort(
+            ["state_fips", "race", "hispanic_origin"]
+            if incl_hispanic_orig
+            else ["state_fips", "race"]
+        )
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def state_age_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def state_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def state_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    State-age-sex population estimates for select years.
+
+    Age is given in years, not binned groups. Note that an age value of `85` corresponds to >= 85 years old.
+    Uses state population by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-detail.html.
+    The raw files are not present in the kintsugi-data repo. Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/state/asrh/sc-est2024-alldata5.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/state/state_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("sex") != "tot",
+            pl.col("hispanic_origin") == "tot",
+        )
+        .group_by(["state_name", "state_fips", "year", "age", "sex"])
+        .agg(tot_pop=pl.col("tot_pop").sum())
+        .sort("state_fips", "age", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+age_grps = [
+    "tot",
+    "0-4",
+    "5-9",
+    "10-14",
+    "15-19",
+    "20-24",
+    "25-29",
+    "30-34",
+    "35-39",
+    "40-44",
+    "45-49",
+    "50-54",
+    "55-59",
+    "60-64",
+    "65-69",
+    "70-74",
+    "75-79",
+    "80-84",
+    ">=85",
+]
+age_grp_enum = pl.Enum(age_grps)
+
+
+@overload
+def county_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") == "tot",
+        )
+        .select("state_name", "county_name", "county_fips", "year", "tot_pop")
+        .sort("county_fips")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_age_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_age_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_age_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-age population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") != "tot",
+        )
+        .select(
+            "state_name", "county_name", "county_fips", "year", "age_grp", "tot_pop"
+        )
+        .sort("county_fips", "age_grp")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-sex population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") == "tot",
+        )
+        .select(
+            "state_name", "county_name", "county_fips", "year", "tot_male", "tot_female"
+        )
+        .unpivot(
+            index=["state_name", "county_name", "county_fips", "year"],
+            variable_name="sex",
+            value_name="tot_pop",
+        )
+        .with_columns(sex=pl.col("sex").str.replace("tot_", "").cast(sex_enum))
+        .sort("county_fips", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    incl_hispanic_orig: bool = ...,
+    as_pandas: Literal[True],
+) -> pd.DataFrame: ...
+
+
+def county_race_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = None,
+    incl_hispanic_orig: bool = False,
+    as_pandas: bool = False,
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-race population estimates for select years. Specify `incl_hispanic_orig=True` to include
+    Hispanic counts column. Uses county population by characteristics
+    data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") == "tot",
+        )
+        .select(
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "white_male",
+            "white_female",
+            "black_male",
+            "black_female",
+            "aian_male",
+            "aian_female",
+            "asian_male",
+            "asian_female",
+            "nhpi_male",
+            "nhpi_female",
+            "hispanic_male",
+            "hispanic_female",
+        )
+        .with_columns(
+            (pl.col(f"{r}_male") + pl.col(f"{r}_female")).alias(r)
+            for r in ["white", "black", "aian", "asian", "nhpi", "hispanic"]
+        )
+        .select(
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "white",
+            "black",
+            "aian",
+            "asian",
+            "nhpi",
+            "hispanic",
+        )
+    )
+
+    if not incl_hispanic_orig:
+        lf = lf.drop("hispanic")
+
+    lf = (
+        lf.unpivot(
+            index=["state_name", "county_name", "county_fips", "year"],
+            variable_name="race",
+            value_name="tot_pop",
+        )
+        .cast({"race": race_enum_incl_hispanic})
+        .sort("county_fips", "race")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
+
+
+@overload
+def county_age_sex_pop(
+    year: int,
+    *,
+    vintage_year: VintageYear | None = ...,
+    as_pandas: Literal[False] = ...,
+) -> pl.LazyFrame: ...
+
+
+@overload
+def county_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = ..., as_pandas: Literal[True]
+) -> pd.DataFrame: ...
+
+
+def county_age_sex_pop(
+    year: int, *, vintage_year: VintageYear | None = None, as_pandas: bool = False
+) -> pl.LazyFrame | pd.DataFrame:
+    """
+    County-age-sex population estimates for select years. Uses county population
+    by characteristics data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-counties-detail.html
+    The raw files are not present in the kintsugi-data repo because of their large size.
+    Instead, parquet files containing a subset of columns are used.
+
+    It's recommended to use the latest possible vintage to get a given year's data. However,
+    a specific vintage year may be provided. If `vintage_year` is `None` (the default), data
+    for years in the range [2010, 2019] are sourced from the 2020 vintage (2010-2020 data),
+    while data for years in the range [2020, 2024] are sourced from the 2024 vintage (2020-2024 data).
+
+    Source (2024 example): https://www2.census.gov/programs-surveys/popest/datasets/2020-2024/counties/asrh/cc-est2024-alldata.csv
+    """
+    if vintage_year is None:
+        if 2010 <= year <= 2019:
+            vintage_year = 2020
+        else:
+            vintage_year = 2024
+
+    validate_vintage_year(year, vintage_year)
+    data = get_dataset(f"pop/county_cc/county_pop_{vintage_year}.parquet")
+    lf = (
+        pl.scan_parquet(data)
+        .filter(
+            pl.col("year") == year,
+            pl.col("age_grp") != "tot",
+        )
+        .select(
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+            "tot_male",
+            "tot_female",
+        )
+        .unpivot(
+            index=["state_name", "county_name", "county_fips", "year", "age_grp"],
+            variable_name="sex",
+            value_name="tot_pop",
+        )
+        .with_columns(sex=pl.col("sex").str.replace("tot_", "").cast(sex_enum))
+        .sort("county_fips", "age_grp", "sex")
+    )
+
+    if as_pandas:
+        return lf.collect().to_pandas()
+
+    return lf
diff --git a/tests/county_pop_test.py b/tests/county_pop_test.py
deleted file mode 100644
index 68d1a48..0000000
--- a/tests/county_pop_test.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import pandera.polars as pa
-import polars as pl
-import pytest
-from pandas import DataFrame
-from pandera.polars import PolarsData
-
-from kintsugi.county_pop import (
-    VintageYear,
-    county_age_pop,
-    county_pop,
-    get_vintage,
-)
-
-from .models import BasePolarsModel
-
-age_grps = {
-    0: "tot",
-    1: "0-4",
-    2: "5-9",
-    3: "10-14",
-    4: "15-19",
-    5: "20-24",
-    6: "25-29",
-    7: "30-34",
-    8: "35-39",
-    9: "40-44",
-    10: "45-49",
-    11: "50-54",
-    12: "55-59",
-    13: "60-64",
-    14: "65-69",
-    15: "70-74",
-    16: "75-79",
-    17: "80-84",
-    18: ">=85",
-}
-age_grp_enum = pl.Enum(age_grps.values())
-
-
-class CountyPopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyPopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_pop(year, vintage_year=vintage_year)
-
-
-def test_county_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
-
-
-def test_get_vintage_info() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        get_vintage(2000)  # pyright: ignore [reportArgumentType]
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_pop_as_pandas(year: int) -> None:
-    df = county_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-class CountyAgePopulation(BasePolarsModel):
-    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
-    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
-    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
-    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
-
-    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
-        unique: list[str] = [
-            "state_name",
-            "county_name",
-            "county_fips",
-            "year",
-            "age_grp",
-        ]
-
-    @pa.dataframe_check
-    def has_correct_states(cls, data: PolarsData) -> bool:
-        return (
-            data.lazyframe.select(
-                pl.col("county_fips")
-                .str.slice(0, 2)
-                .is_between(pl.lit("01"), pl.lit("56"))
-                .all()
-            )
-            .collect()
-            .item()
-            is True
-        )
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-@pytest.mark.parametrize(
-    ("vintage_year"),
-    range(2016, 2025),
-)
-def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
-    if vintage_year <= 2020:
-        year_lb = 2010
-    else:
-        year_lb = 2020
-
-    if year_lb <= year <= vintage_year:
-        county_age_pop(year, vintage_year=vintage_year).collect().pipe(
-            CountyAgePopulation.validate, lazy=True
-        )
-    else:
-        with pytest.raises(ValueError, match="^Must choose a year between"):
-            county_age_pop(year, vintage_year=vintage_year)
-
-
-@pytest.mark.parametrize(
-    ("year"),
-    range(2010, 2025),
-)
-def test_county_age_pop_as_pandas(year: int) -> None:
-    df = county_age_pop(year, as_pandas=True)
-
-    assert isinstance(df, DataFrame)
-
-
-def test_county_age_pop_invalid_vintage_year_exception() -> None:
-    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
-        county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
diff --git a/tests/population_test.py b/tests/population_test.py
new file mode 100644
index 0000000..aef3185
--- /dev/null
+++ b/tests/population_test.py
@@ -0,0 +1,786 @@
+import pandera.polars as pa
+import polars as pl
+import pytest
+from pandas import DataFrame
+from pandera.polars import PolarsData
+
+from kintsugi.population import (
+    VintageYear,
+    age_grp_enum,
+    county_age_pop,
+    county_age_sex_pop,
+    county_pop,
+    county_race_pop,
+    county_sex_pop,
+    hispanic_enum,
+    race_enum_incl_hispanic,
+    race_enum_no_hispanic,
+    sex_enum,
+    state_age_pop,
+    state_age_sex_pop,
+    state_pop,
+    state_race_pop,
+    state_sex_pop,
+)
+
+from .models import BasePolarsModel
+
+
+class StatePopulation(BasePolarsModel):
+    state_name: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
+    state_fips: pl.String = pa.Field(unique=True, in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_height(cls, data: PolarsData) -> bool:
+        return data.lazyframe.select(pl.len()).collect().item() == 51  # pyright: ignore [reportAny]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_pop(year, vintage_year=vintage_year).collect().pipe(
+            StatePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_pop(year, vintage_year=vintage_year)
+
+
+def test_state_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_pop_as_pandas(year: int) -> None:
+    df = state_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+class StateAgePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year", "age"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_age_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_age_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateAgePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_age_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_age_pop_as_pandas(year: int) -> None:
+    df = state_age_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_age_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class StateSexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year", "sex"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateSexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_sex_pop_as_pandas(year: int) -> None:
+    df = state_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class StateRacePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum_no_hispanic.categories}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year", "race"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_race_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_race_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateRacePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_race_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_race_pop_as_pandas(year: int) -> None:
+    df = state_race_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_race_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class StateRaceHispanicPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum_no_hispanic.categories}
+    )
+    hispanic_origin: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": hispanic_enum.categories}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "state_fips",
+            "year",
+            "race",
+            "hispanic_origin",
+        ]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_race_pop(
+            year, vintage_year=vintage_year, incl_hispanic_orig=True
+        ).collect().pipe(StateRaceHispanicPopulation.validate, lazy=True)
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_race_hispanic_pop_as_pandas(year: int) -> None:
+    df = state_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_race_hispanic_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+
+
+class StateAgeSexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    state_fips: pl.String = pa.Field(in_range=("01", "56"))  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age: pl.Int64 = pa.Field(in_range=(0, 85))  # pyright: ignore [reportAny]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "state_fips", "year", "age", "sex"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_state_age_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        state_age_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            StateAgeSexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            state_age_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_state_age_sex_pop_as_pandas(year: int) -> None:
+    df = state_age_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_state_age_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        state_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountyPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String = pa.Field(unique=True)  # pyright: ignore [reportAny]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    tot_pop: pl.Int64 = pa.Field(gt=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_pop(year, vintage_year=vintage_year)
+
+
+def test_county_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_pop_as_pandas(year: int) -> None:
+    df = county_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+class CountyAgePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+        ]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_age_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_age_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyAgePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_age_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_age_pop_as_pandas(year: int) -> None:
+    df = county_age_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_age_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_age_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountySexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year", "sex"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountySexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_sex_pop_as_pandas(year: int) -> None:
+    df = county_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountyRacePopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum_incl_hispanic.categories}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year", "race"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_race_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_race_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyRacePopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_race_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_race_pop_as_pandas(year: int) -> None:
+    df = county_race_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_race_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_race_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]
+
+
+class CountyRaceHispanicPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    race: pl.Enum = pa.Field(  # pyright: ignore [reportAny]
+        dtype_kwargs={"categories": race_enum_incl_hispanic.categories}
+    )
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = ["state_name", "county_name", "county_fips", "year", "race"]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_race_hispanic_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_race_pop(
+            year, vintage_year=vintage_year, incl_hispanic_orig=True
+        ).collect().pipe(CountyRaceHispanicPopulation.validate, lazy=True)
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_race_pop(year, vintage_year=vintage_year, incl_hispanic_orig=True)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_race_hispanic_pop_as_pandas(year: int) -> None:
+    df = county_race_pop(year, as_pandas=True, incl_hispanic_orig=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_race_hispanic_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_race_pop(2023, vintage_year=2000, incl_hispanic_orig=True)  # pyright: ignore [reportArgumentType]
+
+
+class CountyAgeSexPopulation(BasePolarsModel):
+    state_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_name: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    county_fips: pl.String  # pyright: ignore [reportUninitializedInstanceVariable]
+    year: pl.Int64  # pyright: ignore [reportUninitializedInstanceVariable]
+    age_grp: pl.Enum = pa.Field(dtype_kwargs={"categories": age_grp_enum.categories})  # pyright: ignore [reportAny]
+    sex: pl.Enum = pa.Field(dtype_kwargs={"categories": sex_enum.categories})  # pyright: ignore [reportAny]
+    tot_pop: pl.Int64 = pa.Field(ge=0)  # pyright: ignore [reportAny]
+
+    class Config:  # pyright: ignore [reportIncompatibleVariableOverride]
+        unique: list[str] = [
+            "state_name",
+            "county_name",
+            "county_fips",
+            "year",
+            "age_grp",
+            "sex",
+        ]
+
+    @pa.check("year")
+    def all_identical(cls, data: PolarsData) -> pl.LazyFrame:
+        return data.lazyframe.select((pl.col(data.key).n_unique() == 1).all())
+
+    @pa.dataframe_check
+    def has_correct_states(cls, data: PolarsData) -> bool:
+        return (
+            data.lazyframe.select(
+                pl.col("county_fips")
+                .str.slice(0, 2)
+                .is_between(pl.lit("01"), pl.lit("56"))
+                .all()
+            )
+            .collect()
+            .item()
+            is True
+        )
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+@pytest.mark.parametrize(
+    ("vintage_year"),
+    range(2016, 2025),
+)
+def test_county_age_sex_pop(year: int, vintage_year: VintageYear) -> None:
+    if vintage_year <= 2020:
+        year_lb = 2010
+    else:
+        year_lb = 2020
+
+    if year_lb <= year <= vintage_year:
+        county_age_sex_pop(year, vintage_year=vintage_year).collect().pipe(
+            CountyAgeSexPopulation.validate, lazy=True
+        )
+    else:
+        with pytest.raises(ValueError, match="^Must choose a year between"):
+            county_age_sex_pop(year, vintage_year=vintage_year)
+
+
+@pytest.mark.parametrize(
+    ("year"),
+    range(2010, 2025),
+)
+def test_county_age_sex_pop_as_pandas(year: int) -> None:
+    df = county_age_sex_pop(year, as_pandas=True)
+
+    assert isinstance(df, DataFrame)
+
+
+def test_county_age_sex_pop_invalid_vintage_year_exception() -> None:
+    with pytest.raises(ValueError, match="^Must choose a vintage year between"):
+        county_age_sex_pop(2023, vintage_year=2000)  # pyright: ignore [reportArgumentType]