From 299e609c25e21f2956f6f2d17a43f63af05549fb Mon Sep 17 00:00:00 2001 From: Dana Lambert Date: Fri, 15 Oct 2021 14:40:14 +1300 Subject: [PATCH] Add django command to populate the database with spreadsheet plant data --- backend/.gitignore | 5 +- backend/requirements.txt | 1 + .../commands/_spreadsheet_helpers.py | 41 +++++ .../commands/createplantfixtures.py | 165 ++++++++++++++++++ .../api/management/commands/resetplants.py | 11 ++ 5 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 backend/right_tree/api/management/commands/_spreadsheet_helpers.py create mode 100644 backend/right_tree/api/management/commands/createplantfixtures.py create mode 100644 backend/right_tree/api/management/commands/resetplants.py diff --git a/backend/.gitignore b/backend/.gitignore index e30397b..28d3762 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -1,3 +1,6 @@ *.pyc *.sqlite3 -__pycache__ \ No newline at end of file +__pycache__ + +resources +right_tree/api/data/fixtures/plants.json \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index f3dc6cb..35f9bd1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -2,3 +2,4 @@ Django==3.2.8 psycopg2-binary>=2.8 djangorestframework==3.12.4 django-cors-headers==3.10.0 +openpyxl==3.0.9 \ No newline at end of file diff --git a/backend/right_tree/api/management/commands/_spreadsheet_helpers.py b/backend/right_tree/api/management/commands/_spreadsheet_helpers.py new file mode 100644 index 0000000..974f81f --- /dev/null +++ b/backend/right_tree/api/management/commands/_spreadsheet_helpers.py @@ -0,0 +1,41 @@ +from openpyxl import load_workbook + +def get_pk_mapping(object, mapping_key="name"): + pk_mapping = {} + for instance in object.objects.all(): + pk_mapping[getattr(instance, mapping_key)] = instance.pk + + return pk_mapping + + +def get_col_mappings(sheet, start_col, row_index): + col_mappings = {} + for row in sheet.iter_rows(min_col=start_col, min_row=row_index, max_row=row_index, values_only=True): + for i, col_name in enumerate(row): + col_mappings[col_name] = i + + return col_mappings + + +def get_pk_list_from_str(values_str, pk_mapping, fixes={}): + pk_list = [] + for value in values_str.split(','): + processed_value = value.lstrip().rstrip().replace( + '_', ' ').replace('-', ' ').replace('’', '\'') + + # Applies any mapping adjustments between spreadsheet data and the database values + if fixes and processed_value in fixes: + processed_value = fixes[processed_value] + + # Adds the pk value for the value in the databse + if processed_value in pk_mapping: + pk_list.append(pk_mapping[processed_value]) + + return pk_list + + +def get_spreadsheet(data_path, spreadsheet_filename): + spreadsheet_path = data_path / 'resources' / spreadsheet_filename + workbook = load_workbook(filename=spreadsheet_path) + return workbook.active + diff --git a/backend/right_tree/api/management/commands/createplantfixtures.py b/backend/right_tree/api/management/commands/createplantfixtures.py new file mode 100644 index 0000000..33dc4bb --- /dev/null +++ b/backend/right_tree/api/management/commands/createplantfixtures.py @@ -0,0 +1,165 @@ +from django.core.management.base import BaseCommand + +import json +from pathlib import Path + +import right_tree.api.data +from ._spreadsheet_helpers import * +from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel + + +ECO_REGION_ADJUSTMENTS = { + "Whakatane": "Whatkatane", + "North West Nelson": "North-west Nelson", + "Aorangi": "Aorrangi", + "Mackenzie": "MacKenzie", + "Southland Hills": "Southland Foothills", + "Sounds Wellington": "Sounds-Wellington" +} + +PLANT_COLS = { + 'name': {"expected_type": str, "max_length": 50}, + 'maxheight': {"expected_type": float}, + 'spacing': {"expected_type": float}, + 'commonname': {"expected_type": str, "null_allowed": True, "max_length": 50}, + 'synonym': {"expected_type": str, "null_allowed": True, "max_length": 200}, + 'region': {"expected_type": list, "model_name": "ecological_regions"}, + 'soilorder': {"expected_type": list, "model_name": "soil_order"}, + 'wet': {"expected_type": list, "model_name": "soil_variants"}, + 'mesic': {"expected_type": list, "model_name": "soil_variants"}, + 'dry': {"expected_type": list, "model_name": "soil_variants"}, + 'water': {"expected_type": int, "model_name": "water_tolerance"}, + 'drought': {"expected_type": int, "model_name": "drought_tolerance"}, + 'frost': {"expected_type": int, "model_name": "frost_tolerance"}, + 'salinity': {"expected_type": int, "model_name": "salinity_tolerance"}, + 'purpose': {"expected_type": str, "null_allowed": True}, + 'stage': {"expected_type": int}, + 'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50} +} + +SPREADSHEET_FILENAME = 'plant_data.xlsx' +DATA_START_COL = 3 +DATA_START_ROW = 7 +INFO_HEADER_ROW = 6 + +DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent + +ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion) +SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder) +SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant) +TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level") + +SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME) +INFO_COL_INDEXES = get_col_mappings( + SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW) + +PLANT_JSON_TEMPLATE = { + "model": "api.plant", + "pk": None, + "fields": {} +} + + +def check_field_type(field, field_value): + expected_field_type = PLANT_COLS[field]['expected_type'] + model_field_name = PLANT_COLS[field].get('model_name', field) + null_allowed = PLANT_COLS[field].get('null_allowed', False) + max_length = PLANT_COLS[field].get('max_length', False) + + is_valid_type = isinstance(field_value, expected_field_type) + is_int_when_float = expected_field_type == float and isinstance( + field_value, int) + is_valid_null = field_value is None and null_allowed + is_over_max_length = max_length and isinstance( + field_value, str) and len(field_value) > max_length + + if not(is_valid_type or is_int_when_float or is_valid_null): + raise TypeError( + f"Invalid json type for field {model_field_name} with value {field_value}. Expected '{expected_field_type}' but got '{type(field_value)}'.") + elif is_over_max_length: + raise TypeError( + f"Invalid string length for {model_field_name} with value {field_value}. Expected length to be under {max_length} but was {len(field_value)}.") + + +def get_plant_json_from_row(row_data): + plant_json_fields = {} + for field, field_index in INFO_COL_INDEXES.items(): + if field not in PLANT_COLS: + continue + + model_field_name = PLANT_COLS[field].get('model_name', field) + try: + if field == "region": + regions_list = get_pk_list_from_str( + row_data[field_index], ECO_REGION_PK_MAPPING, ECO_REGION_ADJUSTMENTS) + plant_json_fields[model_field_name] = regions_list + + elif field == "soilorder": + soil_orders_list = get_pk_list_from_str( + row_data[field_index], SOIL_ORDER_PK_MAPPING) + plant_json_fields[model_field_name] = soil_orders_list + + elif field in {'wet', 'mesic', 'dry'}: + soil_variant_pk = SOIL_VARIANT_PK_MAPPING[field.capitalize()] + plant_json_fields[model_field_name] = plant_json_fields.get( + model_field_name, []) + [soil_variant_pk] + + elif field in {'water', 'drought', 'frost', 'salinity'}: + plant_json_fields[model_field_name] = TOLERANCE_PK_MAPPING[row_data[field_index]] + + elif field in PLANT_COLS: + plant_json_fields[model_field_name] = row_data[field_index] + + check_field_type(field, plant_json_fields[model_field_name]) + + except Exception as e: + name_index = INFO_COL_INDEXES['name'] + print( + f"Error occured while adding the row for {row_data[name_index]}.") + print(F"{type(e)}: {e}") + print("SKIPPING ROW...") + print("----------------------------------------------") + + return {} + + plant_json = PLANT_JSON_TEMPLATE.copy() + plant_json["fields"] = plant_json_fields + + return plant_json + + +def get_plant_json_fixture(sheet): + plant_json_fixture = [] + skipped_count = 0 + created_count = 0 + + for row in sheet.iter_rows(min_col=DATA_START_COL, min_row=DATA_START_ROW, values_only=True): + plant_json = get_plant_json_from_row(row) + + # If there is invalid data in a row, it will be skipped + if plant_json != {}: + plant_json_fixture.append(plant_json) + created_count += 1 + else: + skipped_count += 1 + + print("Created plants fixture.") + print(f"Rows Created: {created_count}") + print(f"Rows Skipped: {skipped_count}") + return plant_json_fixture + + +def save_plant_fixture(fixture): + fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json' + fixture_filepath.write_text(json.dumps(fixture)) + + +class Command(BaseCommand): + help = 'Ingests the plant spreadsheet data into the database' + + def handle(self, *args, **options): + self.stdout.write('Creating plant fixtures...') + plant_fixture = get_plant_json_fixture(SPREADSHEET) + save_plant_fixture(plant_fixture) + self.stdout.write(self.style.SUCCESS( + 'Plant fixtures created and saved successfully.')) diff --git a/backend/right_tree/api/management/commands/resetplants.py b/backend/right_tree/api/management/commands/resetplants.py new file mode 100644 index 0000000..4ae2214 --- /dev/null +++ b/backend/right_tree/api/management/commands/resetplants.py @@ -0,0 +1,11 @@ +from django.core.management.base import BaseCommand +from right_tree.api.models import Plant + + +class Command(BaseCommand): + help = 'Removes all plant objects from the database' + + def handle(self, *args, **options): + self.stdout.write(self.style.WARNING( + 'Removing all plant objects from the database.')) + Plant.objects.all().delete()