Emails rules extractor
Scripts
1
Posts
1
Posters
148
Views
1
Watching
-
Title:
Emails rules extractor
Author:
chatgptAI written Python 3 interactive script that does the following:
Features:
- Reads email addresses from a file.
Extracts:
-
Numbers at the end of the username ([email protected] → 123)
-
The domain (e.g., gmail.com)
-
Filters for specific domains (gmail.com, yahoo.com, etc. — user input).
-
Sorts results by frequency (how often each number + domain pair appears).
-
Creates Hashcat rules like $1$2$3$@$g$m$a$i$l$.$c$o$m.
-
Writes the output to a file of your choice.
#!/usr/bin/env python3 import re from collections import Counter, defaultdict def extract_data_from_email(email): match = re.match(r'^([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})$', email.strip()) if not match: return None user, domain = match.groups() digits_match = re.search(r'(\d+)$', user) if digits_match: digits = digits_match.group(1) return digits, domain.lower() return None def string_to_hashcat_rule(s): return ''.join(f"${c}" for c in s) def main(): input_path = input("Enter path to input file containing emails: ").strip() domains_input = input("Enter comma-separated domains to filter (e.g., gmail.com,yahoo.com): ").strip() output_path = input("Enter path to save generated hashcat rules: ").strip() domains_to_include = set(domain.strip().lower() for domain in domains_input.split(',') if domain.strip()) counter = Counter() examples = defaultdict(list) with open(input_path, 'r', encoding='utf-8') as f: for line in f: email = line.strip() extracted = extract_data_from_email(email) if extracted: digits, domain = extracted if domain in domains_to_include: key = (digits, domain) counter[key] += 1 if len(examples[key]) < 3: examples[key].append(email) sorted_items = counter.most_common() with open(output_path, 'w', encoding='utf-8') as out: for (digits, domain), count in sorted_items: rule = string_to_hashcat_rule(digits + '@' + domain) out.write(f"{rule}\n") print(f"\nDone! {len(sorted_items)} rules written to {output_path}") print("Top 5 extracted rules with examples:") for (digits, domain), count in sorted_items[:5]: rule = string_to_hashcat_rule(digits + '@' + domain) print(f" Rule: {rule} | Count: {count} | Examples: {examples[(digits, domain)]}") if __name__ == '__main__': main()
Example
Input:
File contains:
[email protected] [email protected] [email protected] [email protected] [email protected] bad.email.com
Output:
$1$2$3$@$g$m$a$i$l$.$c$o$m $4$5$6$@$g$m$a$i$l$.$c$o$m $1$2$3$@$y$a$h$o$o$.$c$o$m