Merge branch 'master' of github.com:danielmiessler/SecLists

ItsIgnacioPortal · ItsIgnacioPortal · commit 106500b542e7 · 2025-02-22T05:09:14.000-03:00
diff --git a/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt b/Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt
@@ -94125,7 +94125,7 @@ index.php/search
 index.php/search-results
 index.php/search-seeds
 index.php/search/
-index.php/search/
+index.php/search/
 index.php/search?
 index.php/search_content_unece
 index.php/site-search
@@ -130292,7 +130292,7 @@ search.py
 search.ref
 search.rl
 search/
-search/
+search/
 search/"
 search/$
 search/*
diff --git a/utf8fixer.py b/utf8fixer.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+from __future__ import print_function
+from codecs import open as copen
+from os import listdir, path
+from sys import argv
+
+import unicodedata
+
+# usage: utf8-fix.py PATH [codec] [normalize]
+PATH = argv[1] if len(argv) > 1 else ""
+NORMALIZE = False
+ENCODING = None
+DEFAULT_ENCODING = "iso8859_2"  # iso8859_2 a.k.a latin2
+
+for arg in argv[2:]:
+    if arg.lower() == "normalize":
+        NORMALIZE = True
+    else:
+        ENCODING = arg
+
+
+
+def convert_file(file_path):
+    print("[*]", file_path, "fixed!")
+    foriginal = copen(file_path, "r", "utf8", errors='ignore')
+    content = foriginal.read()
+    foriginal.close()
+
+    ccontent = fix_encoding(content, ENCODING, NORMALIZE, True)
+    fconverted = copen(file_path, "w", "utf8")
+    fconverted.write(ccontent)
+    fconverted.close()
+
+def normalize_str(text):
+    return ''.join(
+        c for c in unicodedata.normalize('NFKD', text)
+        if unicodedata.category(c) != 'Mn'
+    )
+
+def fix_encoding(content, encoding=None, norm=False, verbose=False):
+    encoding = encoding or DEFAULT_ENCODING
+
+    try:
+        fixed = content.encode(encoding).decode("utf8")
+    except:
+        fixed = content
+        if verbose:
+            print("[*] error: can't fix the encoding. mixed encoding?")
+
+    if norm:
+        return normalize_str(fixed)
+    else:
+        return fixed
+
+
+if __name__ == "__main__":
+    if path.isfile(PATH):
+        convert_file(PATH)
+
+    elif path.isdir(PATH):
+
+        for ffile in listdir(PATH):
+            file_path = path.join(PATH, ffile)
+
+            if path.isfile(file_path):
+                convert_file(file_path)
+    else:
+        print(
+            "[*] error: "
+            "usage: %s FILE_OR_DIR_PATH [codec] [normalize]"
+            %
+            argv[0]
+        )