Last modified: Apr 01, 2023 By Alexander Williams

BeautifulSoup Remove Header and Footer Examples

Remove Header and Footer by tag name

Example:

from bs4 import BeautifulSoup


html_doc='''
<html>
<head>
	<title>My Website</title>
</head>
<body>
	<header>
		<h1>Welcome to my website</h1>
		<nav>
			<ul>
				<li><a href="#">Home</a></li>
				<li><a href="#">About</a></li>
				<li><a href="#">Contact</a></li>
			</ul>
		</nav>
	</header>
	
	<main>
		<p>This is the main content of my website.</p>
	</main>
	
	<footer>
		<p>&copy; 2023 My Website. All rights reserved.</p>
	</footer>
</body>
</html>
'''

# Load HTML document into BeautifulSoup object
soup = BeautifulSoup(html_doc, 'html.parser')

# Find header element
header = soup.find('header')
# Remove header element
header.decompose()

# Find footer element
footer = soup.find('footer')
# Remove footer element
footer.decompose()

# Get cleaned HTML string
cleaned_html = str(soup)

# Print
print(cleaned_html)

Output:

<html>
<head>
<title>My Website</title>
</head>
<body>

<main>
<p>This is the main content of my website.</p>
</main>

</body>
</html>

Remove header and footer by CSS class name

Example:

from bs4 import BeautifulSoup

html = """
<!DOCTYPE html>
<html>
<head>
	<title>My Website</title>
</head>
<body>
	<header class="page-header">
		<h1>Welcome to my website</h1>
		<nav>
			<ul>
				<li><a href="#">Home</a></li>
				<li><a href="#">About</a></li>
				<li><a href="#">Contact</a></li>
			</ul>
		</nav>
	</header>
	
	<main>
		<p>This is the main content of my website.</p>
	</main>
	
	<footer class="page-footer">
		<p>&copy; 2023 My Website. All rights reserved.</p>
	</footer>
</body>
</html>
"""

soup = BeautifulSoup(html, 'html.parser')

# Remove header and footer elements by CSS class name
header = soup.find(class_='page-header')
header.decompose()

footer = soup.find(class_='page-footer')
footer.decompose()

# Get cleaned HTML string
cleaned_html = str(soup)

# Print
print(cleaned_html)

Remove header and footer by HTML tag hierarchy

example:

from bs4 import BeautifulSoup

html = """
<!DOCTYPE html>
<html>
<head>
	<title>My Website</title>
</head>
<body>
	<div id="page">
		<header>
			<h1>Welcome to my website</h1>
			<nav>
				<ul>
					<li><a href="#">Home</a></li>
					<li><a href="#">About</a></li>
					<li><a href="#">Contact</a></li>
				</ul>
			</nav>
		</header>
		
		<main>
			<p>This is the main content of my website.</p>
		</main>
		
		<footer>
			<p>&copy; 2023 My Website. All rights reserved.</p>
		</footer>
	</div>
</body>
</html>
"""

soup = BeautifulSoup(html, 'html.parser')

# Remove header and footer elements by HTML tag hierarchy
header = soup.find('div', id='page').header
header.decompose()

footer = soup.find('div', id='page').footer
footer.decompose()

# Get cleaned HTML string
cleaned_html = str(soup)

# Print
print(cleaned_html)

Remove header and footer by ID

Example:

from bs4 import BeautifulSoup

html = """
<!DOCTYPE html>
<html>
<head>
	<title>My Website</title>
</head>
<body>
	<header id="page-header">
		<h1>Welcome to my website</h1>
		<nav>
			<ul>
				<li><a href="#">Home</a></li>
				<li><a href="#">About</a></li>
				<li><a href="#">Contact</a></li>
			</ul>
		</nav>
	</header>
	
	<main>
		<p>This is the main content of my website.</p>
	</main>
	
	<footer id="page-footer">
		<p>&copy; 2023 My Website. All rights reserved.</p>
	</footer>
</body>
</html>
"""

soup = BeautifulSoup(html, 'html.parser')

# Remove header and footer elements by ID
header = soup.find(id='page-header')
header.decompose()

footer = soup.find(id='page-footer')
footer.decompose()

# Get cleaned HTML string
cleaned_html = str(soup)

# Print
print(cleaned_html)