Last modified: Jan 10, 2023 By Alexander Williams

Beautifulsoup Find by Role Attribute and Get Role Value

Find All By Role Attribute

from bs4 import BeautifulSoup # Import BeautifulSoup module

# 👇 HTML Source
hrml_source = '''
<div class="page container" role="main">      
    <p>Hello Python</p>
</div>

<div class="page container" role="body">      
    <p>Hello BeautifulSoup</p>
</div>

<div class="page container">      
    <p>Hello Django</p>
</div>
'''

soup = BeautifulSoup(hrml_source, 'html.parser') # 👉️ Parsing

roles = soup.find_all("div", {"role":True})  # 👉️ Find all Div with the role attribute

print(roles)  # 👉️ Print Results

Output:

[<div class="page container" role="main">
<p>Hello Python</p>
</div>, <div class="page container" role="body">
<p>Hello BeautifulSoup</p>
</div>]

 

for role in roles:
    print(role['role']) # 👉️ Print Role Value

Output:

main
body

Find All By Role Attribute Value

from bs4 import BeautifulSoup # Import BeautifulSoup module

# 👇 HTML Source
hrml_source = '''
<div class="page container" role="main">      
    <p>Hello Python</p>
</div>

<div class="page container" role="body">      
    <p>Hello BeautifulSoup</p>
</div>

<div class="page container">      
    <p>Hello Django</p>
</div>
'''

soup = BeautifulSoup(hrml_source, 'html.parser') # 👉️ Parsing

roles = soup.find_all("div", {"role":"main"})  # 👉️ Find all Div with "main" in role attribute value

print(roles)  # 👉️ Print Results

Output:

[<div class="page container" role="main">
<p>Hello Python</p>
</div>]

Select All By Role Attribute

from bs4 import BeautifulSoup # Import BeautifulSoup module

# 👇 HTML Source
hrml_source = '''
<div class="page container" role="main">      
    <p>Hello Python</p>
</div>

<div class="page container" role="body">      
    <p>Hello BeautifulSoup</p>
</div>

<div class="page container">      
    <p>Hello Django</p>
</div>
'''

soup = BeautifulSoup(hrml_source, 'html.parser') # 👉️ Parsing

roles = soup.select("div[role]")  # 👉️ Select all Div with the role attribute

Output:

[<div class="page container" role="main">
<p>Hello Python</p>
</div>, <div class="page container" role="body">
<p>Hello BeautifulSoup</p>
</div>]

Select All By Role Attribute Value

from bs4 import BeautifulSoup # Import BeautifulSoup module

# 👇 HTML Source
hrml_source = '''
<div class="page container" role="main">      
    <p>Hello Python</p>
</div>

<div class="page container" role="body">      
    <p>Hello BeautifulSoup</p>
</div>

<div class="page container">      
    <p>Hello Django</p>
</div>
'''

soup = BeautifulSoup(hrml_source, 'html.parser') # 👉️ Parsing

roles = soup.select("div[role=main]")  # 👉️ Select all Div with "main" in the role value

print(roles)  # 👉️ Print Results

Output:

[<div class="page container" role="main">
<p>Hello Python</p>
</div>]