Skip to content

Commit 106500b

Browse files
Merge branch 'master' of github.com:danielmiessler/SecLists
2 parents 66fd037 + e17f446 commit 106500b

File tree

2 files changed

+75
-2
lines changed

2 files changed

+75
-2
lines changed

Discovery/Web-Content/trickest-robots-disallowed-wordlists/top-10000.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94125,7 +94125,7 @@ index.php/search
9412594125
index.php/search-results
9412694126
index.php/search-seeds
9412794127
index.php/search/
94128-
index.php/search/
94128+
index.php/search/
9412994129
index.php/search?
9413094130
index.php/search_content_unece
9413194131
index.php/site-search
@@ -130292,7 +130292,7 @@ search.py
130292130292
search.ref
130293130293
search.rl
130294130294
search/
130295-
search/
130295+
search/
130296130296
search/"
130297130297
search/$
130298130298
search/*

utf8fixer.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/python
2+
from __future__ import print_function
3+
from codecs import open as copen
4+
from os import listdir, path
5+
from sys import argv
6+
7+
import unicodedata
8+
9+
# usage: utf8-fix.py PATH [codec] [normalize]
10+
PATH = argv[1] if len(argv) > 1 else ""
11+
NORMALIZE = False
12+
ENCODING = None
13+
DEFAULT_ENCODING = "iso8859_2" # iso8859_2 a.k.a latin2
14+
15+
for arg in argv[2:]:
16+
if arg.lower() == "normalize":
17+
NORMALIZE = True
18+
else:
19+
ENCODING = arg
20+
21+
22+
23+
def convert_file(file_path):
24+
print("[*]", file_path, "fixed!")
25+
foriginal = copen(file_path, "r", "utf8", errors='ignore')
26+
content = foriginal.read()
27+
foriginal.close()
28+
29+
ccontent = fix_encoding(content, ENCODING, NORMALIZE, True)
30+
fconverted = copen(file_path, "w", "utf8")
31+
fconverted.write(ccontent)
32+
fconverted.close()
33+
34+
def normalize_str(text):
35+
return ''.join(
36+
c for c in unicodedata.normalize('NFKD', text)
37+
if unicodedata.category(c) != 'Mn'
38+
)
39+
40+
def fix_encoding(content, encoding=None, norm=False, verbose=False):
41+
encoding = encoding or DEFAULT_ENCODING
42+
43+
try:
44+
fixed = content.encode(encoding).decode("utf8")
45+
except:
46+
fixed = content
47+
if verbose:
48+
print("[*] error: can't fix the encoding. mixed encoding?")
49+
50+
if norm:
51+
return normalize_str(fixed)
52+
else:
53+
return fixed
54+
55+
56+
if __name__ == "__main__":
57+
if path.isfile(PATH):
58+
convert_file(PATH)
59+
60+
elif path.isdir(PATH):
61+
62+
for ffile in listdir(PATH):
63+
file_path = path.join(PATH, ffile)
64+
65+
if path.isfile(file_path):
66+
convert_file(file_path)
67+
else:
68+
print(
69+
"[*] error: "
70+
"usage: %s FILE_OR_DIR_PATH [codec] [normalize]"
71+
%
72+
argv[0]
73+
)

0 commit comments

Comments
 (0)