Last modified: Nov 09, 2023 By Alexander Williams
Python: Extracting Emails from a Text File (with Different Methods)
Method 1: Using Regular Expressions
import re
# Define a regular expression pattern for extracting emails
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
# Open a text file for reading
with open('sample.txt', 'r') as file:
text = file.read()
# Find all email addresses in the text using the pattern
email_addresses = re.findall(email_pattern, text)
# Print the extracted email addresses
for email in email_addresses:
print(email)
Output:
john.doe@example.com
jane.smith@gmail.com
...
Method 2: Using Python's Email Parser Library
import email
from email import policy
from email.parser import BytesParser
# Open a text file for reading
with open('another_sample.txt', 'rb') as file:
text = file.read()
# Parse the email content
message = BytesParser(policy=policy.default).parsebytes(text)
# Extract email addresses from the parsed email
email_addresses = [part for part in email.iterators.body_line_iterator(message)]
# Print the extracted email addresses
for email_address in email_addresses:
print(email_address)
Output:
alice.smith@example.com
bob.johnson@gmail.com
...
Method 3: Using a Custom Email Address Detection Function
import re
# Custom function to extract email addresses from text
def extract_emails(text):
# Define a regular expression pattern for extracting emails
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
# Find all email addresses in the text using the pattern
email_addresses = re.findall(email_pattern, text)
return email_addresses
# Open yet another text file for reading
with open('yet_another_sample.txt', 'r') as file:
text = file.read()
# Extract email addresses using the custom function
email_addresses = extract_emails(text)
# Print the extracted email addresses
for email in email_addresses:
print(email)
Output:
charlie.wilson@example.com
david.brown@gmail.com
...