Last modified: Nov 07, 2023 By Alexander Williams
BeautifulSoup: How to Use find_all_next() With Examples
Example 1: Find All Paragraphs After a Header
from bs4 import BeautifulSoup
# Sample HTML content
html_content = """
<h1>Welcome to My Blog</h1>
<p>This is the first paragraph.</p>
<p>This is the second paragraph.</p>
<h2>Another Section</h2>
<p>This is a paragraph in another section.</p>
"""
# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find the first h1 tag in the document
first_h1 = soup.find('h1')
# Use find_all_next() to find all p tags following the h1 tag
following_paragraphs = first_h1.find_all_next('p')
# Print the text of each paragraph found
for paragraph in following_paragraphs:
print("Paragraph: ", paragraph.text)
Output:
Paragraph: This is the first paragraph.
Paragraph: This is the second paragraph.
Paragraph: This is a paragraph in another section.
Example 2: Finding All Next Sibling Tags of a Certain Class
# Sample HTML content with classes
html_content = """
<div class="content">
<p class="intro">Introduction paragraph.</p>
<p class="follow-up">Follow-up paragraph.</p>
<p class="conclusion">Conclusion paragraph.</p>
</div>
"""
# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find the paragraph with class 'intro'
intro_paragraph = soup.find('p', class_='intro')
# Use find_all_next() to find all sibling tags with class 'follow-up'
follow_ups = intro_paragraph.find_all_next('p', class_='follow-up')
# Print the text of each follow-up paragraph found
for follow_up in follow_ups:
print("Follow-up Paragraph: ", follow_up.text)
Output:
Follow-up Paragraph: Follow-up paragraph.
Example 3: Extracting All Next Elements Until a Certain Tag
# Sample HTML content with different sections
html_content = """
<div>
<h1>Blog Title</h1>
<p>Some introduction.</p>
<div class="ad">Advertisement</div>
<p>More content.</p>
<h2>Subsection</h2>
<p>Content in subsection.</p>
</div>
"""
# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find the first div tag in the document
first_div = soup.find('div')
# Use find_all_next() to find all elements until the h2 tag
elements_until_h2 = first_div.find_all_next(text=True)
# Stop at the h2 tag and print everything before it
for el in elements_until_h2:
if el.parent.name == "h2":
break
print("Content: ", el.strip())
Output:
Content: Blog Title
Content: Some introduction.
Content: Advertisement
Content: More content.