| 
105 | 105 | from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab  | 
106 | 106 | from .utils._deprecation import _deprecate_arguments  | 
107 | 107 | from .utils._typing import CallableT  | 
 | 108 | +from .utils._verification import collect_local_files, resolve_local_root, verify_maps  | 
108 | 109 | from .utils.endpoint_helpers import _is_emission_within_threshold  | 
109 | 110 | 
 
  | 
110 | 111 | 
 
  | 
111 | 112 | if TYPE_CHECKING:  | 
112 | 113 |     from .inference._providers import PROVIDER_T  | 
 | 114 | +    from .utils._verification import Verification  | 
113 | 115 | 
 
  | 
114 | 116 | R = TypeVar("R")  # Return type  | 
115 | 117 | CollectionItemType_T = Literal["model", "dataset", "space", "paper", "collection"]  | 
@@ -3080,6 +3082,84 @@ def list_repo_tree(  | 
3080 | 3082 |         for path_info in paginate(path=tree_url, headers=headers, params={"recursive": recursive, "expand": expand}):  | 
3081 | 3083 |             yield (RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info))  | 
3082 | 3084 | 
 
  | 
 | 3085 | +    @validate_hf_hub_args  | 
 | 3086 | +    def verify_repo_checksums(  | 
 | 3087 | +        self,  | 
 | 3088 | +        repo_id: str,  | 
 | 3089 | +        *,  | 
 | 3090 | +        repo_type: Optional[str] = None,  | 
 | 3091 | +        revision: Optional[str] = None,  | 
 | 3092 | +        local_dir: Optional[Union[str, Path]] = None,  | 
 | 3093 | +        cache_dir: Optional[Union[str, Path]] = None,  | 
 | 3094 | +        token: Union[str, bool, None] = None,  | 
 | 3095 | +    ) -> "Verification":  | 
 | 3096 | +        """  | 
 | 3097 | +        Verify local files for a repo against Hub checksums.  | 
 | 3098 | +
  | 
 | 3099 | +        Args:  | 
 | 3100 | +            repo_id (`str`):  | 
 | 3101 | +                A namespace (user or an organization) and a repo name separated by a `/`.  | 
 | 3102 | +            repo_type (`str`, *optional*):  | 
 | 3103 | +                The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`.  | 
 | 3104 | +                Defaults to `"model"`.  | 
 | 3105 | +            revision (`str`, *optional*):  | 
 | 3106 | +                The revision of the repository from which to get the tree. Defaults to `"main"` branch.  | 
 | 3107 | +            local_dir (`str` or `Path`, *optional*):  | 
 | 3108 | +                The local directory to verify.  | 
 | 3109 | +            cache_dir (`str` or `Path`, *optional*):  | 
 | 3110 | +                The cache directory to verify.  | 
 | 3111 | +            token (Union[bool, str, None], optional):  | 
 | 3112 | +                A valid user access token (string). Defaults to the locally saved  | 
 | 3113 | +                token, which is the recommended method for authentication (see  | 
 | 3114 | +                https://huggingface.co/docs/huggingface_hub/quick-start#authentication).  | 
 | 3115 | +                To disable authentication, pass `False`.  | 
 | 3116 | +
  | 
 | 3117 | +        Returns:  | 
 | 3118 | +            [`Verification`]: a structured result containing the verification details.  | 
 | 3119 | +
  | 
 | 3120 | +        Raises:  | 
 | 3121 | +            [`~utils.RepositoryNotFoundError`]:  | 
 | 3122 | +                If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo  | 
 | 3123 | +                does not exist.  | 
 | 3124 | +            [`~utils.RevisionNotFoundError`]:  | 
 | 3125 | +                If revision is not found (error 404) on the repo.  | 
 | 3126 | +            [`~utils.RemoteEntryNotFoundError`]:  | 
 | 3127 | +                If the tree (folder) does not exist (error 404) on the repo.  | 
 | 3128 | +
  | 
 | 3129 | +        """  | 
 | 3130 | + | 
 | 3131 | +        if repo_type is None:  | 
 | 3132 | +            repo_type = constants.REPO_TYPE_MODEL  | 
 | 3133 | + | 
 | 3134 | +        if local_dir is not None and cache_dir is not None:  | 
 | 3135 | +            raise ValueError("Pass either `local_dir` or `cache_dir`, not both.")  | 
 | 3136 | + | 
 | 3137 | +        root, remote_revision = resolve_local_root(  | 
 | 3138 | +            repo_id=repo_id,  | 
 | 3139 | +            repo_type=repo_type,  | 
 | 3140 | +            revision=revision,  | 
 | 3141 | +            cache_dir=Path(cache_dir) if cache_dir is not None else None,  | 
 | 3142 | +            local_dir=Path(local_dir) if local_dir is not None else None,  | 
 | 3143 | +        )  | 
 | 3144 | +        local_by_path = collect_local_files(root)  | 
 | 3145 | + | 
 | 3146 | +        # get remote entries  | 
 | 3147 | +        remote_by_path: dict[str, object] = {}  | 
 | 3148 | +        for entry in self.list_repo_tree(  | 
 | 3149 | +            repo_id=repo_id, recursive=True, revision=remote_revision, repo_type=repo_type, token=token  | 
 | 3150 | +        ):  | 
 | 3151 | +            path = getattr(entry, "path", None)  | 
 | 3152 | +            if not path:  | 
 | 3153 | +                continue  | 
 | 3154 | +            lfs = getattr(entry, "lfs", None)  | 
 | 3155 | +            has_lfs_sha = (getattr(lfs, "sha256", None) is not None) or (  | 
 | 3156 | +                isinstance(lfs, dict) and lfs.get("sha256") is not None  | 
 | 3157 | +            )  | 
 | 3158 | +            if hasattr(entry, "blob_id") or has_lfs_sha:  | 
 | 3159 | +                remote_by_path[path] = entry  | 
 | 3160 | + | 
 | 3161 | +        return verify_maps(remote_by_path=remote_by_path, local_by_path=local_by_path, revision=remote_revision)  | 
 | 3162 | + | 
3083 | 3163 |     @validate_hf_hub_args  | 
3084 | 3164 |     def list_repo_refs(  | 
3085 | 3165 |         self,  | 
@@ -10733,6 +10813,7 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:  | 
10733 | 10813 | list_repo_commits = api.list_repo_commits  | 
10734 | 10814 | list_repo_tree = api.list_repo_tree  | 
10735 | 10815 | get_paths_info = api.get_paths_info  | 
 | 10816 | +verify_repo_checksums = api.verify_repo_checksums  | 
10736 | 10817 | 
 
  | 
10737 | 10818 | get_model_tags = api.get_model_tags  | 
10738 | 10819 | get_dataset_tags = api.get_dataset_tags  | 
 | 
0 commit comments