Skip to content

Commit e2f6f26

Browse files
authored
Add sort and limit parameters in hf cache ls (#3510)
* Add sort and limit parameters in hf cache ls * fix mypy * remove include_revisions + add test
1 parent aa6a232 commit e2f6f26

File tree

5 files changed

+132
-3
lines changed

5 files changed

+132
-3
lines changed

docs/source/en/guides/cli.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ model/microsoft/UserLM-8b be8f2069189bdf443e554c24e488ff3ff6952691 32.1G 4 da
614614
Found 1 repo(s) for a total of 1 revision(s) and 32.1G on disk.
615615
```
616616

617-
The command supports several output formats for scripting: `--format json` prints structured objects, `--format csv` writes comma-separated rows, and `--quiet` prints only IDs. Combine these with `--cache-dir` to target alternative cache locations. See the [Manage your cache](./manage-cache) guide for advanced workflows.
617+
The command supports several output formats for scripting: `--format json` prints structured objects, `--format csv` writes comma-separated rows, and `--quiet` prints only IDs. Use `--sort` to order entries by `accessed`, `modified`, `name`, or `size` (append `:asc` or `:desc` to control order), and `--limit` to restrict results to the top N entries. Combine these with `--cache-dir` to target alternative cache locations. See the [Manage your cache](./manage-cache) guide for advanced workflows.
618618

619619
Delete cache entries selected with `hf cache ls --q` by piping the IDs into `hf cache rm`:
620620

docs/source/en/guides/manage-cache.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ Found 2 repo(s) for a total of 2 revision(s) and 3.0G on disk.
401401

402402
Need machine-friendly output? Use `--format json` to get structured objects or
403403
`--format csv` for spreadsheets. Alternatively `--quiet` prints only identifiers (one
404-
per line) so you can pipe them into other tooling. Combine these options with
404+
per line) so you can pipe them into other tooling. Use `--sort` to order entries by `accessed`, `modified`, `name`, or `size` (append `:asc` or `:desc` to control order), and `--limit` to restrict results to the top N entries. Combine these options with
405405
`--cache-dir` when you need to inspect a cache stored outside of `HF_HOME`.
406406

407407
**Filter with common shell tools**

docs/source/en/package_reference/cli.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ $ hf cache ls [OPTIONS]
171171
* `-f, --filter TEXT`: Filter entries (e.g. 'size>1GB', 'type=model', 'accessed>7d'). Can be used multiple times.
172172
* `--format [table|json|csv]`: Output format. [default: table]
173173
* `-q, --quiet`: Print only IDs (repo IDs or revision hashes).
174+
* `--sort [accessed|accessed:asc|accessed:desc|modified|modified:asc|modified:desc|name|name:asc|name:desc|size|size:asc|size:desc]`: Sort entries by key. Supported keys: 'accessed', 'modified', 'name', 'size'. Append ':asc' or ':desc' to explicitly set the order (e.g., 'modified:asc'). Defaults: 'accessed', 'modified', 'size' default to 'desc' (newest/biggest first); 'name' defaults to 'asc' (alphabetical).
175+
* `--limit INTEGER`: Limit the number of results returned. Returns only the top N entries after sorting.
174176
* `--help`: Show this message and exit.
175177

176178
### `hf cache prune`

src/huggingface_hub/cli/cache.py

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,25 @@ class _DeletionResolution:
6262
_FILTER_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)\s*(?P<op>==|!=|>=|<=|>|<|=)\s*(?P<value>.+)$")
6363
_ALLOWED_OPERATORS = {"=", "!=", ">", "<", ">=", "<="}
6464
_FILTER_KEYS = {"accessed", "modified", "refs", "size", "type"}
65+
_SORT_KEYS = {"accessed", "modified", "name", "size"}
66+
_SORT_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)(?::(?P<order>asc|desc))?$")
67+
_SORT_DEFAULT_ORDER = {
68+
# Default ordering: accessed/modified/size are descending (newest/biggest first), name is ascending
69+
"accessed": "desc",
70+
"modified": "desc",
71+
"size": "desc",
72+
"name": "asc",
73+
}
74+
75+
76+
# Dynamically generate SortOptions enum from _SORT_KEYS
77+
_sort_options_dict = {}
78+
for key in sorted(_SORT_KEYS):
79+
_sort_options_dict[key] = key
80+
_sort_options_dict[f"{key}_asc"] = f"{key}:asc"
81+
_sort_options_dict[f"{key}_desc"] = f"{key}:desc"
82+
83+
SortOptions = Enum("SortOptions", _sort_options_dict, type=str, module=__name__) # type: ignore
6584

6685

6786
@dataclass(frozen=True)
@@ -378,6 +397,60 @@ def _compare_numeric(left: Optional[float], op: str, right: float) -> bool:
378397
return comparisons[op]
379398

380399

400+
def compile_cache_sort(sort_expr: str) -> tuple[Callable[[CacheEntry], tuple[Any, ...]], bool]:
401+
"""Convert a `hf cache ls` sort expression into a key function for sorting entries.
402+
403+
Returns:
404+
A tuple of (key_function, reverse_flag) where reverse_flag indicates whether
405+
to sort in descending order (True) or ascending order (False).
406+
"""
407+
match = _SORT_PATTERN.match(sort_expr.strip().lower())
408+
if not match:
409+
raise ValueError(f"Invalid sort expression: '{sort_expr}'. Expected format: 'key' or 'key:asc' or 'key:desc'.")
410+
411+
key = match.group("key").lower()
412+
explicit_order = match.group("order")
413+
414+
if key not in _SORT_KEYS:
415+
raise ValueError(f"Unsupported sort key '{key}' in '{sort_expr}'. Must be one of {list(_SORT_KEYS)}.")
416+
417+
# Use explicit order if provided, otherwise use default for the key
418+
order = explicit_order if explicit_order else _SORT_DEFAULT_ORDER[key]
419+
reverse = order == "desc"
420+
421+
def _sort_key(entry: CacheEntry) -> tuple[Any, ...]:
422+
repo, revision = entry
423+
424+
if key == "name":
425+
# Sort by cache_id (repo type/id)
426+
value: Any = repo.cache_id.lower()
427+
return (value,)
428+
429+
if key == "size":
430+
# Use revision size if available, otherwise repo size
431+
value = revision.size_on_disk if revision is not None else repo.size_on_disk
432+
return (value,)
433+
434+
if key == "accessed":
435+
# For revisions, accessed is not available per-revision, use repo's last_accessed
436+
# For repos, use repo's last_accessed
437+
value = repo.last_accessed if repo.last_accessed is not None else 0.0
438+
return (value,)
439+
440+
if key == "modified":
441+
# Use revision's last_modified if available, otherwise repo's last_modified
442+
if revision is not None:
443+
value = revision.last_modified if revision.last_modified is not None else 0.0
444+
else:
445+
value = repo.last_modified if repo.last_modified is not None else 0.0
446+
return (value,)
447+
448+
# Should never reach here due to validation above
449+
raise ValueError(f"Unsupported sort key: {key}")
450+
451+
return _sort_key, reverse
452+
453+
381454
def _resolve_deletion_targets(hf_cache_info: HFCacheInfo, targets: list[str]) -> _DeletionResolution:
382455
"""Resolve the deletion targets into a deletion resolution."""
383456
repo_lookup, revision_lookup = build_cache_index(hf_cache_info)
@@ -458,13 +531,28 @@ def ls(
458531
help="Print only IDs (repo IDs or revision hashes).",
459532
),
460533
] = False,
534+
sort: Annotated[
535+
Optional[SortOptions],
536+
typer.Option(
537+
help="Sort entries by key. Supported keys: 'accessed', 'modified', 'name', 'size'. "
538+
"Append ':asc' or ':desc' to explicitly set the order (e.g., 'modified:asc'). "
539+
"Defaults: 'accessed', 'modified', 'size' default to 'desc' (newest/biggest first); "
540+
"'name' defaults to 'asc' (alphabetical).",
541+
),
542+
] = None,
543+
limit: Annotated[
544+
Optional[int],
545+
typer.Option(
546+
help="Limit the number of results returned. Returns only the top N entries after sorting.",
547+
),
548+
] = None,
461549
) -> None:
462550
"""List cached repositories or revisions."""
463551
try:
464552
hf_cache_info = scan_cache_dir(cache_dir)
465553
except CacheNotFound as exc:
466554
print(f"Cache directory not found: {str(exc.cache_dir)}")
467-
raise typer.Exit(code=1)
555+
raise typer.Exit(code=1) from exc
468556

469557
filters = filter or []
470558

@@ -478,6 +566,20 @@ def ls(
478566
for fn in filter_fns:
479567
entries = [entry for entry in entries if fn(entry[0], entry[1], now)]
480568

569+
# Apply sorting if requested
570+
if sort:
571+
try:
572+
sort_key_fn, reverse = compile_cache_sort(sort.value)
573+
entries.sort(key=sort_key_fn, reverse=reverse)
574+
except ValueError as exc:
575+
raise typer.BadParameter(str(exc)) from exc
576+
577+
# Apply limit if requested
578+
if limit is not None:
579+
if limit < 0:
580+
raise typer.BadParameter(f"Limit must be a positive integer, got {limit}.")
581+
entries = entries[:limit]
582+
481583
if quiet:
482584
for repo, revision in entries:
483585
print(revision.commit_hash if revision is not None else repo.cache_id)

tests/test_cli.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,31 @@ def test_ls_quiet_revisions(self, runner: CliRunner) -> None:
126126
assert result.exit_code == 0
127127
assert result.stdout.strip() == revision.commit_hash
128128

129+
def test_ls_with_sort(self, runner: CliRunner) -> None:
130+
repo1 = _make_repo("user/model1", revisions=[_make_revision("d" * 40)])
131+
repo2 = _make_repo("user/model2", revisions=[_make_revision("e" * 40)])
132+
repo3 = _make_repo("user/model3", revisions=[_make_revision("f" * 40)])
133+
entries = [(repo1, None), (repo2, None), (repo3, None)]
134+
repo_refs_map = {repo1: frozenset(), repo2: frozenset(), repo3: frozenset()}
135+
136+
with (
137+
patch("huggingface_hub.cli.cache.scan_cache_dir"),
138+
patch(
139+
"huggingface_hub.cli.cache.collect_cache_entries",
140+
return_value=(entries, repo_refs_map),
141+
),
142+
):
143+
result = runner.invoke(app, ["cache", "ls", "--sort", "name:desc", "--limit", "2"])
144+
145+
assert result.exit_code == 0
146+
stdout = result.stdout
147+
148+
# Check alphabetical order
149+
assert stdout.index("model3") < stdout.index("model2") # descending order
150+
151+
# Check limit of 2 entries
152+
assert "model1" not in stdout
153+
129154
def test_rm_revision_executes_strategy(self, runner: CliRunner) -> None:
130155
revision = _make_revision("c" * 40)
131156
repo = _make_repo("user/model", revisions=[revision])

0 commit comments

Comments
 (0)