Last modified: Nov 19, 2025 By Alexander Williams
Validate Excel Input Files in Python with xlrd
Excel files often contain critical business data. Validating them before processing prevents errors.
This guide shows how to validate Excel input files using Python's xlrd library.
Why Validate Excel Files?
Data validation ensures your analysis uses clean, reliable data. It catches problems early.
Invalid data can cause crashes or incorrect results. Validation prevents these issues.
It verifies file structure, data types, and business rules before processing.
Installing xlrd Library
First, install xlrd using pip. It reads Excel files in Python.
# Install xlrd using pip
pip install xlrd
Xlrd supports .xls files. For .xlsx files, use openpyxl instead.
Basic File Validation
Start by checking if the file exists and is accessible.
import xlrd
import os
def validate_file_exists(file_path):
"""Check if Excel file exists and is accessible"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found")
if not os.path.isfile(file_path):
raise ValueError(f"{file_path} is not a file")
# Try to open the file
try:
workbook = xlrd.open_workbook(file_path)
print("File is valid and accessible")
return workbook
except xlrd.XLRDError as e:
raise ValueError(f"Invalid Excel file: {e}")
# Example usage
file_path = "data.xls"
workbook = validate_file_exists(file_path)
File is valid and accessible
Validate Sheet Structure
Check if required sheets exist in the workbook.
def validate_sheets(workbook, required_sheets):
"""Validate that required sheets exist"""
sheet_names = workbook.sheet_names()
for sheet in required_sheets:
if sheet not in sheet_names:
raise ValueError(f"Required sheet '{sheet}' not found")
print(f"All required sheets found: {required_sheets}")
return True
# Example usage
required_sheets = ["Sales", "Customers", "Products"]
validate_sheets(workbook, required_sheets)
All required sheets found: ['Sales', 'Customers', 'Products']
Validate Data Types and Format
Check column data types and ensure they match expectations.
def validate_data_types(sheet, column_rules):
"""Validate data types in specified columns"""
for row_idx in range(1, sheet.nrows): # Skip header
for col_idx, expected_type in column_rules.items():
cell_value = sheet.cell_value(row_idx, col_idx)
cell_type = sheet.cell_type(row_idx, col_idx)
if expected_type == "number" and cell_type != xlrd.XL_CELL_NUMBER:
raise ValueError(f"Row {row_idx+1}, Col {col_idx+1}: Expected number, got {type(cell_value)}")
if expected_type == "text" and cell_type != xlrd.XL_CELL_TEXT:
raise ValueError(f"Row {row_idx+1}, Col {col_idx+1}: Expected text, got {type(cell_value)}")
print("All data types validated successfully")
# Example usage
sheet = workbook.sheet_by_name("Sales")
column_rules = {0: "text", 1: "number", 2: "number"} # Column indices and expected types
validate_data_types(sheet, column_rules)
All data types validated successfully
Check for Empty Cells
Identify missing or empty cells in required columns.
def validate_no_empty_cells(sheet, required_columns):
"""Check for empty cells in required columns"""
empty_cells = []
for row_idx in range(1, sheet.nrows):
for col_idx in required_columns:
cell_value = sheet.cell_value(row_idx, col_idx)
if cell_value == "" or cell_value is None:
empty_cells.append((row_idx+1, col_idx+1))
if empty_cells:
raise ValueError(f"Empty cells found at: {empty_cells}")
else:
print("No empty cells in required columns")
# Example usage
required_columns = [0, 1] # Column indices that cannot be empty
validate_no_empty_cells(sheet, required_columns)
No empty cells in required columns
Validate Data Ranges
Ensure numerical values fall within acceptable ranges.
def validate_numeric_ranges(sheet, column_ranges):
"""Validate numerical values are within specified ranges"""
out_of_range = []
for row_idx in range(1, sheet.nrows):
for col_idx, (min_val, max_val) in column_ranges.items():
cell_value = sheet.cell_value(row_idx, col_idx)
if not (min_val <= cell_value <= max_val):
out_of_range.append((row_idx+1, col_idx+1, cell_value))
if out_of_range:
raise ValueError(f"Values out of range: {out_of_range}")
else:
print("All numeric values within specified ranges")
# Example usage
column_ranges = {
1: (0, 1000), # Column 1: values between 0-1000
2: (1, 100) # Column 2: values between 1-100
}
validate_numeric_ranges(sheet, column_ranges)
All numeric values within specified ranges
Complete Validation Function
Combine all validation checks into one comprehensive function.
def validate_excel_file(file_path, validation_rules):
"""Complete Excel file validation"""
try:
# File existence and accessibility
workbook = validate_file_exists(file_path)
# Sheet validation
validate_sheets(workbook, validation_rules['required_sheets'])
# Data validation for each sheet
for sheet_name, sheet_rules in validation_rules['sheet_rules'].items():
sheet = workbook.sheet_by_name(sheet_name)
if 'data_types' in sheet_rules:
validate_data_types(sheet, sheet_rules['data_types'])
if 'required_columns' in sheet_rules:
validate_no_empty_cells(sheet, sheet_rules['required_columns'])
if 'numeric_ranges' in sheet_rules:
validate_numeric_ranges(sheet, sheet_rules['numeric_ranges'])
print("Excel file validation completed successfully!")
return True
except Exception as e:
print(f"Validation failed: {e}")
return False
# Define validation rules
validation_rules = {
'required_sheets': ['Sales', 'Customers'],
'sheet_rules': {
'Sales': {
'data_types': {0: 'text', 1: 'number'},
'required_columns': [0, 1],
'numeric_ranges': {1: (0, 1000)}
}
}
}
# Run complete validation
result = validate_excel_file("data.xls", validation_rules)
Excel file validation completed successfully!
Handling Validation Errors
Create informative error messages for different validation failures.
class ExcelValidationError(Exception):
"""Custom exception for Excel validation errors"""
pass
def detailed_validation(file_path, rules):
"""Validation with detailed error reporting"""
errors = []
try:
workbook = xlrd.open_workbook(file_path)
except xlrd.XLRDError:
errors.append("File is not a valid Excel file or is corrupted")
return errors
# Check sheets
existing_sheets = workbook.sheet_names()
for required_sheet in rules['required_sheets']:
if required_sheet not in existing_sheets:
errors.append(f"Missing required sheet: {required_sheet}")
# Return all errors at once
return errors
# Example with errors
file_path = "invalid_data.xls"
rules = {'required_sheets': ['Sales', 'Inventory']}
errors = detailed_validation(file_path, rules)
if errors:
print("Validation errors found:")
for error in errors:
print(f"- {error}")
else:
print("No validation errors found")
Validation errors found:
- Missing required sheet: Inventory
Advanced Validation Techniques
For more complex data processing, consider integrating xlrd with pandas.
This approach combines xlrd's validation with pandas' powerful data manipulation.
You can integrate Python xlrd with pandas for data analysis to enhance your workflow.
Working with Large Files
Large Excel files require efficient processing techniques.
Learn how to read large Excel files efficiently with Python xlrd to handle big datasets.
Date and Time Validation
Excel dates need special handling in validation.
Discover how to handle Excel dates and times with Python xlrd for accurate temporal data.
Conclusion
Validating Excel input files is crucial for reliable data processing.
Xlrd provides robust tools for file, sheet, and data validation.
Implement these techniques to ensure data quality in your Python applications.
Always validate early and provide clear error messages for users.