Title: Emails rules extractor
Author: chatgpt
AI written Python 3 interactive script that does the following:
Features:
Reads email addresses from a file.
Extracts:
Numbers at the end of the username (
[email protected] → 123)
The domain (e.g., gmail.com)
Filters for specific domains (gmail.com, yahoo.com, etc. — user input).
Sorts results by frequency (how often each number + domain pair appears).
Creates Hashcat rules like $1$2$3$@$g$m$a$i$l$.$c$o$m.
Writes the output to a file of your choice.
#!/usr/bin/env python3
import re
from collections import Counter, defaultdict
def extract_data_from_email(email):
match = re.match(r'^([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})$', email.strip())
if not match:
return None
user, domain = match.groups()
digits_match = re.search(r'(\d+)$', user)
if digits_match:
digits = digits_match.group(1)
return digits, domain.lower()
return None
def string_to_hashcat_rule(s):
return ''.join(f"${c}" for c in s)
def main():
input_path = input("Enter path to input file containing emails: ").strip()
domains_input = input("Enter comma-separated domains to filter (e.g., gmail.com,yahoo.com): ").strip()
output_path = input("Enter path to save generated hashcat rules: ").strip()
domains_to_include = set(domain.strip().lower() for domain in domains_input.split(',') if domain.strip())
counter = Counter()
examples = defaultdict(list)
with open(input_path, 'r', encoding='utf-8') as f:
for line in f:
email = line.strip()
extracted = extract_data_from_email(email)
if extracted:
digits, domain = extracted
if domain in domains_to_include:
key = (digits, domain)
counter[key] += 1
if len(examples[key]) < 3:
examples[key].append(email)
sorted_items = counter.most_common()
with open(output_path, 'w', encoding='utf-8') as out:
for (digits, domain), count in sorted_items:
rule = string_to_hashcat_rule(digits + '@' + domain)
out.write(f"{rule}\n")
print(f"\nDone! {len(sorted_items)} rules written to {output_path}")
print("Top 5 extracted rules with examples:")
for (digits, domain), count in sorted_items[:5]:
rule = string_to_hashcat_rule(digits + '@' + domain)
print(f" Rule: {rule} | Count: {count} | Examples: {examples[(digits, domain)]}")
if __name__ == '__main__':
main()
Example
Input:
File contains:
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
bad.email.com
Output:
$1$2$3$@$g$m$a$i$l$.$c$o$m
$4$5$6$@$g$m$a$i$l$.$c$o$m
$1$2$3$@$y$a$h$o$o$.$c$o$m