Last modified: Oct 31, 2023 By Alexander Williams
BeautifulSoup recursive Argument Examples
Example 1: Default Behavior (Recursively Parsing)
from bs4 import BeautifulSoup
html = """
<div>
<p>Paragraph 1</p>
<div>
<p>Paragraph 2</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find all paragraphs recursively
paragraphs = soup.find_all('p')
# Print the paragraphs
for p in paragraphs:
print(p.text)
Output:
Paragraph 1
Paragraph 2
Example 2: Disable Recursive Parsing
from bs4 import BeautifulSoup
html = """
<div>
<p>Paragraph 1</p>
<div>
<p>Paragraph 2</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find all paragraphs without recursion
paragraphs = soup.find_all('p', recursive=False)
# Print the paragraphs
for p in paragraphs:
print(p.text)
Output:
Paragraph 1
Example 3: Recursive Parsing for Specific Tags
from bs4 import BeautifulSoup
html = """
<div>
<p>Paragraph 1</p>
<div>
<p>Paragraph 2</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find all paragraphs with recursive parsing within <div> tags
paragraphs = soup.find_all('p', recursive=True)
# Print the paragraphs
for p in paragraphs:
print(p.text)
Output:
Paragraph 1
Paragraph 2
Example 4: Recursive Parsing for Specific Tags (Alternative Method)
from bs4 import BeautifulSoup
html = """
<div>
<p>Paragraph 1</p>
<div>
<p>Paragraph 2</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find all paragraphs with recursive parsing within <div> tags
paragraphs = soup.find_all('p', recursive=True, limit=2)
# Print the paragraphs
for p in paragraphs:
print(p.text)
Output:
Paragraph 1
Paragraph 2
Example 5: Recursive Parsing in Nested Structures
from bs4 import BeautifulSoup
html = """
<div>
<div>
<p>Paragraph 1</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find all paragraphs with recursive parsing within nested structures
paragraphs = soup.find_all('p', recursive=True)
# Print the paragraphs
for p in paragraphs:
print(p.text)
Output:
Paragraph 1
Example 6: Non-Recursive Parsing in Nested Structures
from bs4 import BeautifulSoup
html = """
<div>
<div>
<p>Paragraph 1</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find all paragraphs without recursive parsing within nested structures
paragraphs = soup.find_all('p', recursive=False)
# Print the paragraphs
for p in paragraphs:
print(p.text)
Output:
Paragraph 1