Add documentation to the spreadsheet processing methods

This commit is contained in:
Dana Lambert 2021-10-19 10:39:07 +13:00
parent 1cb7403f6f
commit 530089c19c
2 changed files with 28 additions and 2 deletions

View file

@ -1,6 +1,9 @@
from openpyxl import load_workbook
def get_pk_mapping(object, mapping_key="name"):
""" Returns a dictionary mapping a django model primary key to another given field.
"""
pk_mapping = {}
for instance in object.objects.all():
pk_mapping[getattr(instance, mapping_key)] = instance.pk
@ -9,6 +12,8 @@ def get_pk_mapping(object, mapping_key="name"):
def get_col_mappings(sheet, start_col, row_index):
""" Returns a dictionary that maps a spreadsheet cell value to a corresponding column index.
"""
col_mappings = {}
for row in sheet.iter_rows(min_col=start_col, min_row=row_index, max_row=row_index, values_only=True):
for i, col_name in enumerate(row):
@ -18,6 +23,9 @@ def get_col_mappings(sheet, start_col, row_index):
def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
""" Given a list of comma separated values from the spreadsheet. Returns a list of primary keys that
correspond to the relevant values with any given mapping fixes applied.
"""
pk_list = []
for value in values_str.split(','):
processed_value = value.lstrip().rstrip().replace(
@ -35,7 +43,9 @@ def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
def get_spreadsheet(data_path, spreadsheet_filename):
""" Returns a spreadsheet from a resources directory given the data path and
spreadsheet filename.
"""
spreadsheet_path = data_path / 'resources' / spreadsheet_filename
workbook = load_workbook(filename=spreadsheet_path)
return workbook.active

View file

@ -7,7 +7,7 @@ import right_tree.api.data
from ._spreadsheet_helpers import *
from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel
# Mapping adjustments between the shapefile ecological regions and those in the spreadsheet
ECO_REGION_ADJUSTMENTS = {
"Whakatane": "Whatkatane",
"North West Nelson": "North-west Nelson",
@ -17,6 +17,7 @@ ECO_REGION_ADJUSTMENTS = {
"Sounds Wellington": "Sounds-Wellington"
}
# Relevant columns and information used to retrieve information from the spreadsheet
PLANT_COLS = {
'name': {"expected_type": str, "max_length": 50},
'maxheight': {"expected_type": float},
@ -37,22 +38,27 @@ PLANT_COLS = {
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
}
# Spreadsheet constants
SPREADSHEET_FILENAME = 'plant_data.xlsx'
DATA_START_COL = 3
DATA_START_ROW = 7
INFO_HEADER_ROW = 6
# Data directory path
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
# Mappings between values in the spreadsheet and primary key values in the database
ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion)
SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder)
SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant)
TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level")
# Spreadsheet and corresponding value to column index mappings
SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME)
INFO_COL_INDEXES = get_col_mappings(
SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW)
# Template for the plant json to add as an entry for the fixtures
PLANT_JSON_TEMPLATE = {
"model": "api.plant",
"pk": None,
@ -61,6 +67,8 @@ PLANT_JSON_TEMPLATE = {
def check_field_type(field, field_value):
""" Checks the validity of a feild value collected from the spreadsheet
"""
expected_field_type = PLANT_COLS[field]['expected_type']
model_field_name = PLANT_COLS[field].get('model_name', field)
null_allowed = PLANT_COLS[field].get('null_allowed', False)
@ -82,6 +90,8 @@ def check_field_type(field, field_value):
def get_plant_json_from_row(row_data):
""" Returns a json object representing a plant row from the spreadsheet.
"""
plant_json_fields = {}
for field, field_index in INFO_COL_INDEXES.items():
if field not in PLANT_COLS:
@ -129,6 +139,8 @@ def get_plant_json_from_row(row_data):
def get_plant_json_fixture(sheet):
""" Returns a django fixture json that represents the plant information extracted from the spreadsheet.
"""
plant_json_fixture = []
skipped_count = 0
created_count = 0
@ -143,13 +155,17 @@ def get_plant_json_fixture(sheet):
else:
skipped_count += 1
# Print summary of data extraction from the spreadsheet
print("Created plants fixture.")
print(f"Rows Created: {created_count}")
print(f"Rows Skipped: {skipped_count}")
return plant_json_fixture
def save_plant_fixture(fixture):
""" Saves the plant fixture to the django api fixtures directory.
"""
fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json'
fixture_filepath.write_text(json.dumps(fixture))