Last modified: Apr 01, 2023 By Alexander Williams
BeautifulSoup Remove Header and Footer Examples
Remove Header and Footer by tag name
Example:
from bs4 import BeautifulSoup
html_doc='''
<html>
<head>
<title>My Website</title>
</head>
<body>
<header>
<h1>Welcome to my website</h1>
<nav>
<ul>
<li><a href="#">Home</a></li>
<li><a href="#">About</a></li>
<li><a href="#">Contact</a></li>
</ul>
</nav>
</header>
<main>
<p>This is the main content of my website.</p>
</main>
<footer>
<p>© 2023 My Website. All rights reserved.</p>
</footer>
</body>
</html>
'''
# Load HTML document into BeautifulSoup object
soup = BeautifulSoup(html_doc, 'html.parser')
# Find header element
header = soup.find('header')
# Remove header element
header.decompose()
# Find footer element
footer = soup.find('footer')
# Remove footer element
footer.decompose()
# Get cleaned HTML string
cleaned_html = str(soup)
# Print
print(cleaned_html)
Output:
<html>
<head>
<title>My Website</title>
</head>
<body>
<main>
<p>This is the main content of my website.</p>
</main>
</body>
</html>
Remove header and footer by CSS class name
Example:
from bs4 import BeautifulSoup
html = """
<!DOCTYPE html>
<html>
<head>
<title>My Website</title>
</head>
<body>
<header class="page-header">
<h1>Welcome to my website</h1>
<nav>
<ul>
<li><a href="#">Home</a></li>
<li><a href="#">About</a></li>
<li><a href="#">Contact</a></li>
</ul>
</nav>
</header>
<main>
<p>This is the main content of my website.</p>
</main>
<footer class="page-footer">
<p>© 2023 My Website. All rights reserved.</p>
</footer>
</body>
</html>
"""
soup = BeautifulSoup(html, 'html.parser')
# Remove header and footer elements by CSS class name
header = soup.find(class_='page-header')
header.decompose()
footer = soup.find(class_='page-footer')
footer.decompose()
# Get cleaned HTML string
cleaned_html = str(soup)
# Print
print(cleaned_html)
Remove header and footer by HTML tag hierarchy
example:
from bs4 import BeautifulSoup
html = """
<!DOCTYPE html>
<html>
<head>
<title>My Website</title>
</head>
<body>
<div id="page">
<header>
<h1>Welcome to my website</h1>
<nav>
<ul>
<li><a href="#">Home</a></li>
<li><a href="#">About</a></li>
<li><a href="#">Contact</a></li>
</ul>
</nav>
</header>
<main>
<p>This is the main content of my website.</p>
</main>
<footer>
<p>© 2023 My Website. All rights reserved.</p>
</footer>
</div>
</body>
</html>
"""
soup = BeautifulSoup(html, 'html.parser')
# Remove header and footer elements by HTML tag hierarchy
header = soup.find('div', id='page').header
header.decompose()
footer = soup.find('div', id='page').footer
footer.decompose()
# Get cleaned HTML string
cleaned_html = str(soup)
# Print
print(cleaned_html)
Remove header and footer by ID
Example:
from bs4 import BeautifulSoup
html = """
<!DOCTYPE html>
<html>
<head>
<title>My Website</title>
</head>
<body>
<header id="page-header">
<h1>Welcome to my website</h1>
<nav>
<ul>
<li><a href="#">Home</a></li>
<li><a href="#">About</a></li>
<li><a href="#">Contact</a></li>
</ul>
</nav>
</header>
<main>
<p>This is the main content of my website.</p>
</main>
<footer id="page-footer">
<p>© 2023 My Website. All rights reserved.</p>
</footer>
</body>
</html>
"""
soup = BeautifulSoup(html, 'html.parser')
# Remove header and footer elements by ID
header = soup.find(id='page-header')
header.decompose()
footer = soup.find(id='page-footer')
footer.decompose()
# Get cleaned HTML string
cleaned_html = str(soup)
# Print
print(cleaned_html)