|
| 1 | +#!/bin/bash |
| 2 | +# Software Name: floss-toolbox |
| 3 | +# SPDX-FileCopyrightText: Copyright (c) 2021-2022 Orange |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# |
| 6 | +# This software is distributed under the Apache 2.0 license. |
| 7 | +# |
| 8 | +# Author: Pierre-Yves LAPERSONNE <pierreyves(dot)lapersonne(at)orange(dot)com> et al. |
| 9 | + |
| 10 | +# Since...............: 09/03/2022 |
| 11 | +# Description.........: Check if there are leaks thanks to gitleaks in GitLab projects |
| 12 | + |
| 13 | +#set -euxo pipefail |
| 14 | +VERSION="1.0.0" |
| 15 | + |
| 16 | +# Config |
| 17 | +# ------ |
| 18 | + |
| 19 | +EXIT_OK=0 |
| 20 | +EXIT_BAD_ARGUMENTS=1 |
| 21 | +EXIT_BAD_SETUP=2 |
| 22 | + |
| 23 | +URL_EXTRACTER_FILE="./../github/utils/extract-repos-field-from-json.py" # TODO: Extract this Python sript to common files |
| 24 | +LEAKS_PARSER="./../github/utils/count-leaks-nodes.py" # TODO: Extract this Python sript to common files |
| 25 | +GITLEAKS_FINAL_REPORT="$$_gitleaks-final_report-count.csv" |
| 26 | + |
| 27 | +# Functions |
| 28 | +# --------- |
| 29 | + |
| 30 | +UsageAndExit(){ |
| 31 | + echo "check-leaks-from-gitlab.sh - Version $VERSION" |
| 32 | + echo "USAGE:" |
| 33 | + echo "bash check-leaks-from-gitlab.sh ORGANISATION_ID KEY TOKEN FOLDER_NAME PAGINATION TOKEN" |
| 34 | + echo "with ORGANISATION_ID: GitLab organisation ID" |
| 35 | + echo "with KEY: JSON key to use for cloning URL" |
| 36 | + echo "with PAGINATION: number if items per page" |
| 37 | + echo "with TOKEN: GitLab access token" |
| 38 | + echo "About exit codes:" |
| 39 | + echo -e "\t 0................: Normal exit" |
| 40 | + echo -e "\t 1................: Bad arguments given to the script" |
| 41 | + echo -e "\t 2................: Bad setup for the script or undefined LEAKS_PARSER file" |
| 42 | + exit $EXIT_OK |
| 43 | +} |
| 44 | + |
| 45 | +# Check setup |
| 46 | +# ----------- |
| 47 | + |
| 48 | +if [ "$#" -eq 0 ]; then |
| 49 | + UsageAndExit |
| 50 | + exit $EXIT_OK |
| 51 | +fi |
| 52 | + |
| 53 | +if [ "$#" -ne 4 ]; then |
| 54 | + echo "ERROR: Bad arguments number. Exits now" |
| 55 | + UsageAndExit |
| 56 | + exit $EXIT_BAD_ARGUMENTS |
| 57 | +fi |
| 58 | + |
| 59 | +if [ ! -f "$URL_EXTRACTER_FILE" ]; then |
| 60 | + echo "ERROR: Bad set up for URL extracter. Exits now" |
| 61 | + UsageAndExit |
| 62 | + exit $EXIT_BAD_SETUP |
| 63 | +fi |
| 64 | + |
| 65 | +if [ ! -f "$LEAKS_PARSER" ]; then |
| 66 | + echo "ERROR: Bad set up for leaks parser. Exits now" |
| 67 | + UsageAndExit |
| 68 | + exit $EXIT_BAD_SETUP |
| 69 | +fi |
| 70 | + |
| 71 | +organisation_id=$1 |
| 72 | +if [ -z "$organisation_id" -o "$organisation_id" == "" ]; then |
| 73 | + echo "ERROR: No organisation ID defined. Exits now." |
| 74 | + UsageAndExit |
| 75 | + exit $EXIT_BAD_ARGUMENTS |
| 76 | +fi |
| 77 | + |
| 78 | +cloning_url_key=$2 |
| 79 | +if [ -z "$cloning_url_key" -o "$cloning_url_key" == "" ]; then |
| 80 | + echo "ERROR: No JSON key for URL. Exits now." |
| 81 | + UsageAndExit |
| 82 | + exit $EXIT_BAD_ARGUMENTS |
| 83 | +fi |
| 84 | + |
| 85 | +pagination=$3 |
| 86 | +if [ -z "$pagination" ]; then |
| 87 | + echo "ERROR: No pagination defined. Exits now." |
| 88 | + UsageAndExit |
| 89 | + exit $EXIT_BAD_ARGUMENTS |
| 90 | +fi |
| 91 | + |
| 92 | +access_token=$4 |
| 93 | +if [ -z "$access_token" ]; then |
| 94 | + echo "ERROR: No access token is defined. Exits now." |
| 95 | + UsageAndExit |
| 96 | + exit $EXIT_BAD_ARGUMENTS |
| 97 | +fi |
| 98 | + |
| 99 | +# Run |
| 100 | +# --- |
| 101 | + |
| 102 | +echo "---------------------------------------------" |
| 103 | +echo "check-leaks-from-gitlab.sh - Version $VERSION" |
| 104 | +echo "---------------------------------------------" |
| 105 | + |
| 106 | +# Step 1 - Get all groups and subgroups projects |
| 107 | + |
| 108 | +max_number_of_pages=10 # TODO: Remove magic number for max number of pages |
| 109 | +echo "Get all projects of groups and subgroups with $pagination items per page and arbitrary $max_number_of_pages pages max..." |
| 110 | + |
| 111 | +gitlab_projects_dump_file_raw="./data/.gitlab-projects-dump.raw.json" |
| 112 | +gitlab_projects_dump_file_clean="./data/.gitlab-projects-dump.clean.json" |
| 113 | +if [ -f "$gitlab_projects_dump_file_raw" ]; then |
| 114 | + rm $gitlab_projects_dump_file_raw |
| 115 | +fi |
| 116 | + |
| 117 | +for page in `seq 1 $max_number_of_pages` |
| 118 | +do |
| 119 | + curl --silent --header "Authorization: Bearer $access_token" --location --request GET "https://gitlab.com/api/v4/groups/$organisation_id/projects?include_subgroups=true&per_page=$pagination&page=$page" >> $gitlab_projects_dump_file_raw |
| 120 | +done |
| 121 | + |
| 122 | +# Step 2 - Extract repositories URL |
| 123 | + |
| 124 | +# Because of pagination (max 100 items par ages, arbitrary 10 pages here, raw pages are concatenated in one file. |
| 125 | +# So with have pasted JSON array in one file. |
| 126 | +# We see arrays with pattern ][. Merge all arrays be replacing cumulated JSON arrays, so replacing ][ by , |
| 127 | +# By for empty pages we have the empty arrays ][ replaced by cumulated , so with remove them. |
| 128 | +# Then it remains the final array with a useless , with pattern },] replaced by }] |
| 129 | +cat $gitlab_projects_dump_file_raw | sed -e "s/\]\[/,/g" | tr -s ',' | sed -e "s/\}\,\]/\}\]/g" > $gitlab_projects_dump_file_clean |
| 130 | + |
| 131 | +url_for_cloning="./data/.url-for-cloning.txt" |
| 132 | +echo "Extract cloning from results (using '$cloning_url_key' as JSON key)..." |
| 133 | +python3 "$URL_EXTRACTER_FILE" --field $cloning_url_key --source $gitlab_projects_dump_file_clean > $url_for_cloning |
| 134 | +repo_count=`cat $url_for_cloning | wc -l | sed 's/ //g'` |
| 135 | +echo "Extraction done. Found '$repo_count' items." |
| 136 | + |
| 137 | +# Step 3 - Clone repositories |
| 138 | + |
| 139 | +dir_before_dump=`pwd` |
| 140 | +echo "Creating dump directory..." |
| 141 | +directory_name=$(date '+%Y-%m-%d') |
| 142 | +cd "$repositories_location" |
| 143 | +if [ -d "$directory_name" ]; then |
| 144 | + echo "Removing old directory with the same name" |
| 145 | + rm -rf $directory_name |
| 146 | +fi |
| 147 | +mkdir $directory_name |
| 148 | +cd $directory_name |
| 149 | +echo "Dump directory created with name '$directory_name' at location `pwd`." |
| 150 | + |
| 151 | +# Step 4 - For each repository, clone it and make a scan |
| 152 | + |
| 153 | +number_of_url=`cat "$dir_before_dump/$url_for_cloning" | wc | awk {'print $1 '}` |
| 154 | +cpt=1 |
| 155 | +echo "Dumping of $number_of_url repositories..." |
| 156 | +while read url_line; do |
| 157 | + |
| 158 | + # Step 4.1 - Clone |
| 159 | + # WARNING: gitleaks looks inside files and git histories, so for old and big projects it will take too many time! |
| 160 | + |
| 161 | + echo "Cloning ($cpt / $number_of_url) '$url_line'..." |
| 162 | + git clone "$url_line" |
| 163 | + |
| 164 | + # Step 4.2 - Extract new folder name |
| 165 | + |
| 166 | + target_folder_name=`basename -s .git $(echo "$url_line")` |
| 167 | + echo "Cloned in folder '$target_folder_name'" |
| 168 | + |
| 169 | + # Step 5.3 - Look for leaks |
| 170 | + |
| 171 | + gitleaks_file_name="$target_folder_name".gitleaks.json |
| 172 | + gitleaks detect --report-format json --report-path "$gitleaks_file_name" --source "$target_folder_name" || true # gitleaks returns 1 if leaks found |
| 173 | + |
| 174 | + # In JSON report, a project as no leak if the result file containsan empty JSON array, i.e. only the line |
| 175 | + # [] |
| 176 | + if [ -f "$gitleaks_file_name" ]; then |
| 177 | + pwd |
| 178 | + count=`python3 "../$LEAKS_PARSER" --file "$gitleaks_file_name"` |
| 179 | + |
| 180 | + if [ "$count" -eq "0" ]; then |
| 181 | + echo "✅ ;$target_folder_name;$count" >> $GITLEAKS_FINAL_REPORT |
| 182 | + echo "✅ Gitleaks did not find leaks for '$target_folder_name'" |
| 183 | + cpt_clean_repo=$((cpt_clean_repo+1)) |
| 184 | + else |
| 185 | + echo "🚨;$target_folder_name;$count" >> $GITLEAKS_FINAL_REPORT |
| 186 | + echo "🚨 WARNING! gitleaks may have found '$count' leaks for '$target_folder_name'" |
| 187 | + cpt_dirty_repo=$((cpt_dirty_repo+1)) |
| 188 | + fi |
| 189 | + else |
| 190 | + echo "💥 ERROR: The file '$gitleaks_file_name' does not exist, something has failed with gitleaks!" |
| 191 | + fi |
| 192 | + |
| 193 | + rm -rf "$target_folder_name" |
| 194 | + |
| 195 | + cpt=$((cpt+1)) |
| 196 | + |
| 197 | +done < "$dir_before_dump/$url_for_cloning" |
| 198 | + |
| 199 | +echo "Scanning done!" |
| 200 | + |
| 201 | +# Step 6 - Clean up |
| 202 | + |
| 203 | +git config --global diff.renameLimit $previous_git_diff_rename_limit # (default seems to be 0) |
| 204 | + |
| 205 | +mv $GITLEAKS_FINAL_REPORT "$dir_before_dump" |
| 206 | +echo "GitLab organisation ID...............: '$organisation_id'" |
| 207 | +echo "Total number of projects.............: '$number_of_url'" |
| 208 | +echo "Number of projects with alerts.......: '$cpt_dirty_repo'" |
| 209 | +echo "Number of projects without alerts....: '$cpt_clean_repo'" |
| 210 | +echo "Final report is......................: '$GITLEAKS_FINAL_REPORT'" |
| 211 | + |
| 212 | +rm -rf "$target_folder_name" |
| 213 | +rm -rf "$dir_before_dump/$url_for_cloning" |
| 214 | +cd "$dir_before_dump" |
| 215 | +rm -f $url_for_cloning |
| 216 | + |
| 217 | +echo "Check done!" |
0 commit comments