Last modified: Oct 04, 2023 By Alexander Williams

Python: How to Use tldextract With Examples

Example 1: Extract TLD, Subdomain, and Domain


import tldextract

# Define a URL
url = "https://blog.example.co.uk"

# Extract domain information using tldextract
extracted_info = tldextract.extract(url)

# Print the subdomain, domain, and TLD
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)

Output:

Subdomain: blog
Domain: example
TLD: co.uk

Example 2: Extract TLD from a List of URLs


import tldextract

# Define a list of URLs
urls = ["https://www.example.com", "https://blog.example.co.uk", "https://sub.example.org"]

# Loop through the URLs and extract TLD for each
for url in urls:
    extracted_info = tldextract.extract(url)
    print(f"URL: {url}")
    print("TLD:", extracted_info.suffix)
    print()

Output:

URL: https://www.example.com
TLD: com

URL: https://blog.example.co.uk
TLD: co.uk

URL: https://sub.example.org
TLD: org

Example 3: Handling URLs with Missing Subdomains

import tldextract

# Define a URL without a subdomain
url = "https://example.com"

# Extract domain information using tldextract
extracted_info = tldextract.extract(url)

# Print the subdomain, domain, and TLD
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)

Output:

Subdomain:
Domain: example
TLD: com

Example 4: Extract TLD from Email Addresses


import tldextract

# Define an email address
email = "user@example.com"

# Extract TLD from the email address using tldextract
extracted_info = tldextract.extract(email)

# Print the TLD
print("TLD:", extracted_info.suffix)

Output:

TLD: com

Example 5: Extract Subdomain and Domain from Custom URLs


import tldextract

# Define custom URLs with various formats
urls = ["http://www.example.com", "ftp://sub.domain.org/file.html", "http://sub1.sub2.example.co.uk"]

# Loop through the URLs and extract subdomain and domain
for url in urls:
    extracted_info = tldextract.extract(url)
    print(f"URL: {url}")
    print("Subdomain:", extracted_info.subdomain)
    print("Domain:", extracted_info.domain)
    print()

Output:

URL: http://www.example.com
Subdomain: www
Domain: example

URL: ftp://sub.domain.org/file.html
Subdomain: sub
Domain: domain

URL: http://sub1.sub2.example.co.uk
Subdomain: sub1.sub2
Domain: example

Example 6: Extracting TLD with Additional Suffix


import tldextract

# Define a URL with a custom suffix
url = "https://custom-suffix.example"

# Extract domain information using tldextract
extracted_info = tldextract.extract(url)

# Print the subdomain, domain, and TLD (with custom suffix)
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)

Output:

Subdomain:
Domain: custom-suffix
TLD: example

Example 7: Extracting TLD from IDN (Internationalized Domain Names)


import tldextract

# Define an IDN URL
url = "https://www.xn--80ak6aa92e.com"

# Extract domain information using tldextract
extracted_info = tldextract.extract(url)

# Print the subdomain, domain, and TLD (with IDN)
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)

Output:

Subdomain: www
Domain: пример
TLD: com

Example 8: Handling URLs with Ports


import tldextract

# Define a URL with a port number
url = "https://example.com:8080/path/to/resource"

# Extract domain information using tldextract
extracted_info = tldextract.extract(url)

# Print the subdomain, domain, and TLD
print("Subdomain:", extracted_info.subdomain)
print("Domain:", extracted_info.domain)
print("TLD:", extracted_info.suffix)

Output:

Subdomain:
Domain: example
TLD: com