Last modified: Apr 06, 2023 By Alexander Williams

All Methods to Remove Html Tags From String in Python

Method 1: Using a Regular Expression

import re

# define the text with HTML tags
text_with_tags = "<p>This is a <strong>sample</strong> text with <a href='https://example.com'>HTML</a> tags.</p>"

# define a regular expression to match HTML tags
clean = re.compile('<.*?>')

# remove HTML tags using the regular expression
text_without_tags = re.sub(clean, '', text_with_tags)

# print the resulting text without HTML tags
print(text_without_tags)

Output:

This is a sample text with HTML tags.

Method 2: Using BeautifulSoup

from bs4 import BeautifulSoup # pip install beautifulsoup4

# define the text with HTML tags
text_with_tags = "<p>This is a <strong>sample</strong> text with <a href='https://example.com'>HTML</a> tags.</p>"

# create a BeautifulSoup object from the text
soup = BeautifulSoup(text_with_tags, 'html.parser')

# extract the text without HTML tags
text_without_tags = soup.get_text()

# print the resulting text without HTML tags
print(text_without_tags)

Output:

This is a sample text with HTML tags.

Method 3: Using lxml Library

from lxml import etree # pip install lxml

# define the text with HTML tags
text_with_tags = "<p>This is a <strong>sample</strong> text with <a href='https://example.com'>HTML</a> tags.</p>"

# create an lxml Element object from the text
root = etree.HTML(text_with_tags)

# extract the text without HTML tags
text_without_tags = etree.tostring(root, method='text', encoding='unicode')

# print the resulting text without HTML tags
print(text_without_tags)

Output:

This is a sample text with HTML tags.