Last modified: Oct 04, 2023 By Alexander Williams
Python: How to Use tldextract With Examples
Example 1: Extract TLD, Subdomain, and Domain
import tldextract
# Define a URL
url = "https://blog.example.co.uk"
# Extract domain information using tldextract
extracted_info = tldextract.extract(url)
# Print the subdomain, domain, and TLD
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)
Output:
Subdomain: blog
Domain: example
TLD: co.uk
Example 2: Extract TLD from a List of URLs
import tldextract
# Define a list of URLs
urls = ["https://www.example.com", "https://blog.example.co.uk", "https://sub.example.org"]
# Loop through the URLs and extract TLD for each
for url in urls:
extracted_info = tldextract.extract(url)
print(f"URL: {url}")
print("TLD:", extracted_info.suffix)
print()
Output:
URL: https://www.example.com
TLD: com
URL: https://blog.example.co.uk
TLD: co.uk
URL: https://sub.example.org
TLD: org
Example 3: Handling URLs with Missing Subdomains
import tldextract
# Define a URL without a subdomain
url = "https://example.com"
# Extract domain information using tldextract
extracted_info = tldextract.extract(url)
# Print the subdomain, domain, and TLD
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)
Output:
Subdomain:
Domain: example
TLD: com
Example 4: Extract TLD from Email Addresses
import tldextract
# Define an email address
email = "user@example.com"
# Extract TLD from the email address using tldextract
extracted_info = tldextract.extract(email)
# Print the TLD
print("TLD:", extracted_info.suffix)
Output:
TLD: com
Example 5: Extract Subdomain and Domain from Custom URLs
import tldextract
# Define custom URLs with various formats
urls = ["http://www.example.com", "ftp://sub.domain.org/file.html", "http://sub1.sub2.example.co.uk"]
# Loop through the URLs and extract subdomain and domain
for url in urls:
extracted_info = tldextract.extract(url)
print(f"URL: {url}")
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print()
Output:
URL: http://www.example.com
Subdomain: www
Domain: example
URL: ftp://sub.domain.org/file.html
Subdomain: sub
Domain: domain
URL: http://sub1.sub2.example.co.uk
Subdomain: sub1.sub2
Domain: example
Example 6: Extracting TLD with Additional Suffix
import tldextract
# Define a URL with a custom suffix
url = "https://custom-suffix.example"
# Extract domain information using tldextract
extracted_info = tldextract.extract(url)
# Print the subdomain, domain, and TLD (with custom suffix)
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)
Output:
Subdomain:
Domain: custom-suffix
TLD: example
Example 7: Extracting TLD from IDN (Internationalized Domain Names)
import tldextract
# Define an IDN URL
url = "https://www.xn--80ak6aa92e.com"
# Extract domain information using tldextract
extracted_info = tldextract.extract(url)
# Print the subdomain, domain, and TLD (with IDN)
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)
Output:
Subdomain: www
Domain: пример
TLD: com
Example 8: Handling URLs with Ports
import tldextract
# Define a URL with a port number
url = "https://example.com:8080/path/to/resource"
# Extract domain information using tldextract
extracted_info = tldextract.extract(url)
# Print the subdomain, domain, and TLD
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)
Output:
Subdomain:
Domain: example
TLD: com