Last modified: Oct 31, 2023 By Alexander Williams
BeautifulSoup `limit` Argument Examples
Example 1: Limit the Number of Results
from bs4 import BeautifulSoup
html = """
<ul>
<li>Item 1</li>
<li>Item 2</li>
<li>Item 3</li>
<li>Item 4</li>
<li>Item 5</li>
</ul>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the first 3 list items
items = soup.find_all('li', limit=3)
# Print the list items
for item in items:
print(item.text)
Output:
Item 1
Item 2
Item 3
Example 2: Limit Parsing to the First Match
from bs4 import BeautifulSoup
html = """
<div>
<p>First Paragraph</p>
<p>Second Paragraph</p>
<p>Third Paragraph</p>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the first <p> element only
paragraph = soup.find('p', limit=1)
# Print the paragraph
print(paragraph.text)
Output:
First Paragraph
Example 3: Limit Search in Nested Structures
from bs4 import BeautifulSoup
html = """
<div>
<div>
<p>Paragraph 1</p>
</div>
<div>
<p>Paragraph 2</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the first <p> element in the nested structures
paragraph = soup.find('p', limit=1)
# Print the paragraph
print(paragraph.text)
Output:
Paragraph 1
Example 4: Limit Search in Nested Structures with `find_all`
from bs4 import BeautifulSoup
html = """
<div>
<div>
<p>Paragraph 1</p>
</div>
<div>
<p>Paragraph 2</p>
</div>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the first 2 <p> elements in the nested structures
paragraphs = soup.find_all('p', limit=2)
# Print the paragraphs
for paragraph in paragraphs:
print(paragraph.text)
Output:
Paragraph 1
Paragraph 2
Example 5: Limit Search in Tables
from bs4 import BeautifulSoup
html = """
<table>
<tr>
<td>Row 1, Cell 1</td>
<td>Row 1, Cell 2</td>
</tr>
<tr>
<td>Row 2, Cell 1</td>
<td>Row 2, Cell 2</td>
</tr>
<tr>
<td>Row 3, Cell 1</td>
<td>Row 3, Cell 2</td>
</tr>
</table>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the first 2 rows in the table
rows = soup.find_all('tr', limit=2)
# Print the rows
for row in rows:
print([cell.text for cell in row.find_all('td')])
Output:
['Row 1, Cell 1', 'Row 1, Cell 2']
['Row 2, Cell 1', 'Row 2, Cell 2']
Example 6: Limit Search to Specific Sections
from bs4 import BeautifulSoup
html = """
<div class="section">
<p>Section 1</p>
<p>Section 1</p>
</div>
<div class="section">
<p>Section 2</p>
<p>Section 2</p>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
# Find the first 2 <div> elements with class "section"
sections = soup.find_all('div', class_='section', limit=2)
# Print the sections
for section in sections:
print([p.text for p in section.find_all('p')])
Output:
['Section 1', 'Section 1']
['Section 2', 'Section 2']