During a recent data processing project, I developed an ArcPy script to automate geodatabase schema documentation for technical reports, eliminating manual field inventory tasks. This solution leverages arcpy for data extraction and xlwt for Excel output generation.
Output Preview
The script produces structured Excel sheets detailing field names, aliases, data types, lengths, and spatial references:
Implementation Code
# -*- coding: utf-8 -*-
import os
import sys
import time
from datetime import datetime
import arcpy
import xlwt
# Reload system encoding for UTF-8 compatibility
reload(sys)
sys.setdefaultencoding("utf-8")
def ensure_directory(path):
"""Create directory structure if missing"""
if os.path.isdir(path):
if not os.path.exists(path):
os.makedirs(path)
else:
parent_dir = os.path.dirname(path)
if not os.path.exists(parent_dir):
os.makedirs(parent_dir)
if __name__ == '__main__':
# Configuration
gdb_path = r'F:\gisData\慈溪变更数据\基础数据\地类图斑.gdb'
timestamp = datetime.now().strftime("%Y%m%d%H%M")
output_excel = f'./results/field-info-{timestamp}.xls' # Avoid Chinese characters in path
ensure_directory(output_excel)
if not arcpy.Exists(gdb_path):
raise IOError("Error: Geodatabase not found!")
arcpy.env.workspace = gdb_path
start_time = time.time()
# Initialize Excel workbook
workbook = xlwt.Workbook(encoding='utf-8')
sheet = workbook.add_sheet(u'Field Information')
# Column headers
headers = [
u'序号', u'字段序号', u'要素类名称', u'字段名称',
u'别称', u'字段类型', u'字段长度', u'要素类型',
u'要素路径', u'坐标系'
]
for col, header in enumerate(headers):
sheet.write(0, col, header)
# Process feature classes
feature_classes = arcpy.ListFeatureClasses()
if not feature_classes:
raise IOError("Error: No feature classes found!")
row_index = 0
for fc in feature_classes:
print(f"Processing: {fc}")
fields = arcpy.ListFields(fc)
for field_idx, field in enumerate(fields):
# Skip system fields
if field.name in ['Shape', 'SHAPE', 'OBJECTID', 'OBJECTID_1', 'OBJECTID_2', 'Shape_Length', 'Shape_Area']:
continue
row_index += 1
fc_path = os.path.join(gdb_path, fc)
desc = arcpy.Describe(fc_path)
# Write field metadata
sheet.write(row_index, 0, row_index) # Sequence number
sheet.write(row_index, 1, f"Field{field_idx+1}") # Field index
sheet.write(row_index, 2, fc.encode('utf-8')) # Feature class name
sheet.write(row_index, 3, field.name) # Field name
sheet.write(row_index, 4, field.aliasName) # Field alias
sheet.write(row_index, 5, field.type) # Data type
sheet.write(row_index, 6, field.length) # Field length
sheet.write(row_index, 7, desc.shapeType) # Geometry type
sheet.write(row_index, 8, fc_path.encode('utf-8')) # Full path
sheet.write(row_index, 9, desc.spatialReference.name) # Coordinate system
# Save output
try:
print("Saving Excel document...")
workbook.save(output_excel)
print(f"Success: Output saved to {output_excel}")
except Exception as e:
raise ValueError(f"Export failed: {str(e)}")
# Performance metrics
elapsed = time.time() - start_time
print(f"Execution time: {elapsed:.2f} seconds")
Key Features
Automated Schema Extraction:
- Iterates through all feature classes in a geodatabase
- Captures essential field metadata (name, alias, type, length)
Spatial Context Documentation:
- Records geometry types and coordinate systems
- Includes full feature class paths
Output Optimization:
- Filters system fields (OBJECTID, Shape_*)
- Generates timestamped Excel files
Error Handling:
- Validates geodatabase existence
- Manages directory creation
Usage Note: Paths containing Chinese characters may require additional encoding handling in some environments.