Emails rules extractor

A1131

Title: Emails rules extractor
Author: chatgpt

AI written Python 3 interactive script that does the following:

Features:

Reads email addresses from a file.

Extracts:

Numbers at the end of the username ([email protected] → 123)
The domain (e.g., gmail.com)
Filters for specific domains (gmail.com, yahoo.com, etc. — user input).
Sorts results by frequency (how often each number + domain pair appears).
Creates Hashcat rules like $1$2$3$@$g$m$a$i$l$.$c$o$m.
Writes the output to a file of your choice.

#!/usr/bin/env python3

import re
from collections import Counter, defaultdict

def extract_data_from_email(email):
    match = re.match(r'^([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})$', email.strip())
    if not match:
        return None
    user, domain = match.groups()
    digits_match = re.search(r'(\d+)$', user)
    if digits_match:
        digits = digits_match.group(1)
        return digits, domain.lower()
    return None

def string_to_hashcat_rule(s):
    return ''.join(f"${c}" for c in s)

def main():
    input_path = input("Enter path to input file containing emails: ").strip()
    domains_input = input("Enter comma-separated domains to filter (e.g., gmail.com,yahoo.com): ").strip()
    output_path = input("Enter path to save generated hashcat rules: ").strip()

    domains_to_include = set(domain.strip().lower() for domain in domains_input.split(',') if domain.strip())
    counter = Counter()
    examples = defaultdict(list)

    with open(input_path, 'r', encoding='utf-8') as f:
        for line in f:
            email = line.strip()
            extracted = extract_data_from_email(email)
            if extracted:
                digits, domain = extracted
                if domain in domains_to_include:
                    key = (digits, domain)
                    counter[key] += 1
                    if len(examples[key]) < 3:
                        examples[key].append(email)

    sorted_items = counter.most_common()

    with open(output_path, 'w', encoding='utf-8') as out:
        for (digits, domain), count in sorted_items:
            rule = string_to_hashcat_rule(digits + '@' + domain)
            out.write(f"{rule}\n")

    print(f"\nDone! {len(sorted_items)} rules written to {output_path}")
    print("Top 5 extracted rules with examples:")
    for (digits, domain), count in sorted_items[:5]:
        rule = string_to_hashcat_rule(digits + '@' + domain)
        print(f"  Rule: {rule} | Count: {count} | Examples: {examples[(digits, domain)]}")

if __name__ == '__main__':
    main()

Example
Input:

File contains:

[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
bad.email.com

Output:

$1$2$3$@$g$m$a$i$l$.$c$o$m
$4$5$6$@$g$m$a$i$l$.$c$o$m
$1$2$3$@$y$a$h$o$o$.$c$o$m

hashpwn

Emails rules extractor

Who's Online [Full List]

Board Statistics