right-tree/backend/right_tree/api/management/commands/createplantfixtures.py

181 lines
7.1 KiB
Python

from django.core.management.base import BaseCommand
import json
from pathlib import Path
import right_tree.api.data
from ._spreadsheet_helpers import *
from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel
# Mapping adjustments between the shapefile ecological regions and those in the spreadsheet
ECO_REGION_ADJUSTMENTS = {
"Whakatane": "Whatkatane",
"North West Nelson": "North-west Nelson",
"Aorangi": "Aorrangi",
"Mackenzie": "MacKenzie",
"Southland Hills": "Southland Foothills",
"Sounds Wellington": "Sounds-Wellington"
}
# Relevant columns and information used to retrieve information from the spreadsheet
PLANT_COLS = {
'name': {"expected_type": str, "max_length": 50},
'maxheight': {"expected_type": float},
'spacing': {"expected_type": float},
'commonname': {"expected_type": str, "null_allowed": True, "max_length": 50},
'synonym': {"expected_type": str, "null_allowed": True, "max_length": 200},
'region': {"expected_type": list, "model_name": "ecological_regions"},
'soilorder': {"expected_type": list, "model_name": "soil_order"},
'wet': {"expected_type": list, "model_name": "soil_variants"},
'mesic': {"expected_type": list, "model_name": "soil_variants"},
'dry': {"expected_type": list, "model_name": "soil_variants"},
'water': {"expected_type": int, "model_name": "water_tolerance"},
'drought': {"expected_type": int, "model_name": "drought_tolerance"},
'frost': {"expected_type": int, "model_name": "frost_tolerance"},
'salinity': {"expected_type": int, "model_name": "salinity_tolerance"},
'purpose': {"expected_type": str, "null_allowed": True},
'stage': {"expected_type": int},
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
}
# Spreadsheet constants
SPREADSHEET_FILENAME = 'plant_data.xlsx'
DATA_START_COL = 3
DATA_START_ROW = 7
INFO_HEADER_ROW = 6
# Data directory path
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
# Mappings between values in the spreadsheet and primary key values in the database
ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion)
SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder)
SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant)
TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level")
# Spreadsheet and corresponding value to column index mappings
SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME)
INFO_COL_INDEXES = get_col_mappings(
SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW)
# Template for the plant json to add as an entry for the fixtures
PLANT_JSON_TEMPLATE = {
"model": "api.plant",
"pk": None,
"fields": {}
}
def check_field_type(field, field_value):
""" Checks the validity of a feild value collected from the spreadsheet
"""
expected_field_type = PLANT_COLS[field]['expected_type']
model_field_name = PLANT_COLS[field].get('model_name', field)
null_allowed = PLANT_COLS[field].get('null_allowed', False)
max_length = PLANT_COLS[field].get('max_length', False)
is_valid_type = isinstance(field_value, expected_field_type)
is_int_when_float = expected_field_type == float and isinstance(
field_value, int)
is_valid_null = field_value is None and null_allowed
is_over_max_length = max_length and isinstance(
field_value, str) and len(field_value) > max_length
if not(is_valid_type or is_int_when_float or is_valid_null):
raise TypeError(
f"Invalid json type for field {model_field_name} with value {field_value}. Expected '{expected_field_type}' but got '{type(field_value)}'.")
elif is_over_max_length:
raise TypeError(
f"Invalid string length for {model_field_name} with value {field_value}. Expected length to be under {max_length} but was {len(field_value)}.")
def get_plant_json_from_row(row_data):
""" Returns a json object representing a plant row from the spreadsheet.
"""
plant_json_fields = {}
for field, field_index in INFO_COL_INDEXES.items():
if field not in PLANT_COLS:
continue
model_field_name = PLANT_COLS[field].get('model_name', field)
try:
if field == "region":
regions_list = get_pk_list_from_str(
row_data[field_index], ECO_REGION_PK_MAPPING, ECO_REGION_ADJUSTMENTS)
plant_json_fields[model_field_name] = regions_list
elif field == "soilorder":
soil_orders_list = get_pk_list_from_str(
row_data[field_index], SOIL_ORDER_PK_MAPPING)
plant_json_fields[model_field_name] = soil_orders_list
elif field in {'wet', 'mesic', 'dry'}:
soil_variant_pk = SOIL_VARIANT_PK_MAPPING[field.capitalize()]
plant_json_fields[model_field_name] = plant_json_fields.get(
model_field_name, []) + [soil_variant_pk]
elif field in {'water', 'drought', 'frost', 'salinity'}:
plant_json_fields[model_field_name] = TOLERANCE_PK_MAPPING[row_data[field_index]]
elif field in PLANT_COLS:
plant_json_fields[model_field_name] = row_data[field_index]
check_field_type(field, plant_json_fields[model_field_name])
except Exception as e:
name_index = INFO_COL_INDEXES['name']
print(
f"Error occured while adding the row for {row_data[name_index]}.")
print(F"{type(e)}: {e}")
print("SKIPPING ROW...")
print("----------------------------------------------")
return {}
plant_json = PLANT_JSON_TEMPLATE.copy()
plant_json["fields"] = plant_json_fields
return plant_json
def get_plant_json_fixture(sheet):
""" Returns a django fixture json that represents the plant information extracted from the spreadsheet.
"""
plant_json_fixture = []
skipped_count = 0
created_count = 0
for row in sheet.iter_rows(min_col=DATA_START_COL, min_row=DATA_START_ROW, values_only=True):
plant_json = get_plant_json_from_row(row)
# If there is invalid data in a row, it will be skipped
if plant_json != {}:
plant_json_fixture.append(plant_json)
created_count += 1
else:
skipped_count += 1
# Print summary of data extraction from the spreadsheet
print("Created plants fixture.")
print(f"Rows Created: {created_count}")
print(f"Rows Skipped: {skipped_count}")
return plant_json_fixture
def save_plant_fixture(fixture):
""" Saves the plant fixture to the django api fixtures directory.
"""
fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json'
fixture_filepath.write_text(json.dumps(fixture))
class Command(BaseCommand):
help = 'Ingests the plant spreadsheet data into the database'
def handle(self, *args, **options):
self.stdout.write('Creating plant fixtures...')
plant_fixture = get_plant_json_fixture(SPREADSHEET)
save_plant_fixture(plant_fixture)
self.stdout.write(self.style.SUCCESS(
'Plant fixtures created and saved successfully.'))