Add django command to populate the database with spreadsheet plant data
This commit is contained in:
parent
aa57c6e101
commit
299e609c25
5 changed files with 222 additions and 1 deletions
3
backend/.gitignore
vendored
3
backend/.gitignore
vendored
|
@ -1,3 +1,6 @@
|
||||||
*.pyc
|
*.pyc
|
||||||
*.sqlite3
|
*.sqlite3
|
||||||
__pycache__
|
__pycache__
|
||||||
|
|
||||||
|
resources
|
||||||
|
right_tree/api/data/fixtures/plants.json
|
|
@ -2,3 +2,4 @@ Django==3.2.8
|
||||||
psycopg2-binary>=2.8
|
psycopg2-binary>=2.8
|
||||||
djangorestframework==3.12.4
|
djangorestframework==3.12.4
|
||||||
django-cors-headers==3.10.0
|
django-cors-headers==3.10.0
|
||||||
|
openpyxl==3.0.9
|
|
@ -0,0 +1,41 @@
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
def get_pk_mapping(object, mapping_key="name"):
|
||||||
|
pk_mapping = {}
|
||||||
|
for instance in object.objects.all():
|
||||||
|
pk_mapping[getattr(instance, mapping_key)] = instance.pk
|
||||||
|
|
||||||
|
return pk_mapping
|
||||||
|
|
||||||
|
|
||||||
|
def get_col_mappings(sheet, start_col, row_index):
|
||||||
|
col_mappings = {}
|
||||||
|
for row in sheet.iter_rows(min_col=start_col, min_row=row_index, max_row=row_index, values_only=True):
|
||||||
|
for i, col_name in enumerate(row):
|
||||||
|
col_mappings[col_name] = i
|
||||||
|
|
||||||
|
return col_mappings
|
||||||
|
|
||||||
|
|
||||||
|
def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
|
||||||
|
pk_list = []
|
||||||
|
for value in values_str.split(','):
|
||||||
|
processed_value = value.lstrip().rstrip().replace(
|
||||||
|
'_', ' ').replace('-', ' ').replace('’', '\'')
|
||||||
|
|
||||||
|
# Applies any mapping adjustments between spreadsheet data and the database values
|
||||||
|
if fixes and processed_value in fixes:
|
||||||
|
processed_value = fixes[processed_value]
|
||||||
|
|
||||||
|
# Adds the pk value for the value in the databse
|
||||||
|
if processed_value in pk_mapping:
|
||||||
|
pk_list.append(pk_mapping[processed_value])
|
||||||
|
|
||||||
|
return pk_list
|
||||||
|
|
||||||
|
|
||||||
|
def get_spreadsheet(data_path, spreadsheet_filename):
|
||||||
|
spreadsheet_path = data_path / 'resources' / spreadsheet_filename
|
||||||
|
workbook = load_workbook(filename=spreadsheet_path)
|
||||||
|
return workbook.active
|
||||||
|
|
|
@ -0,0 +1,165 @@
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import right_tree.api.data
|
||||||
|
from ._spreadsheet_helpers import *
|
||||||
|
from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel
|
||||||
|
|
||||||
|
|
||||||
|
ECO_REGION_ADJUSTMENTS = {
|
||||||
|
"Whakatane": "Whatkatane",
|
||||||
|
"North West Nelson": "North-west Nelson",
|
||||||
|
"Aorangi": "Aorrangi",
|
||||||
|
"Mackenzie": "MacKenzie",
|
||||||
|
"Southland Hills": "Southland Foothills",
|
||||||
|
"Sounds Wellington": "Sounds-Wellington"
|
||||||
|
}
|
||||||
|
|
||||||
|
PLANT_COLS = {
|
||||||
|
'name': {"expected_type": str, "max_length": 50},
|
||||||
|
'maxheight': {"expected_type": float},
|
||||||
|
'spacing': {"expected_type": float},
|
||||||
|
'commonname': {"expected_type": str, "null_allowed": True, "max_length": 50},
|
||||||
|
'synonym': {"expected_type": str, "null_allowed": True, "max_length": 200},
|
||||||
|
'region': {"expected_type": list, "model_name": "ecological_regions"},
|
||||||
|
'soilorder': {"expected_type": list, "model_name": "soil_order"},
|
||||||
|
'wet': {"expected_type": list, "model_name": "soil_variants"},
|
||||||
|
'mesic': {"expected_type": list, "model_name": "soil_variants"},
|
||||||
|
'dry': {"expected_type": list, "model_name": "soil_variants"},
|
||||||
|
'water': {"expected_type": int, "model_name": "water_tolerance"},
|
||||||
|
'drought': {"expected_type": int, "model_name": "drought_tolerance"},
|
||||||
|
'frost': {"expected_type": int, "model_name": "frost_tolerance"},
|
||||||
|
'salinity': {"expected_type": int, "model_name": "salinity_tolerance"},
|
||||||
|
'purpose': {"expected_type": str, "null_allowed": True},
|
||||||
|
'stage': {"expected_type": int},
|
||||||
|
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
|
||||||
|
}
|
||||||
|
|
||||||
|
SPREADSHEET_FILENAME = 'plant_data.xlsx'
|
||||||
|
DATA_START_COL = 3
|
||||||
|
DATA_START_ROW = 7
|
||||||
|
INFO_HEADER_ROW = 6
|
||||||
|
|
||||||
|
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
|
||||||
|
|
||||||
|
ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion)
|
||||||
|
SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder)
|
||||||
|
SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant)
|
||||||
|
TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level")
|
||||||
|
|
||||||
|
SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME)
|
||||||
|
INFO_COL_INDEXES = get_col_mappings(
|
||||||
|
SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW)
|
||||||
|
|
||||||
|
PLANT_JSON_TEMPLATE = {
|
||||||
|
"model": "api.plant",
|
||||||
|
"pk": None,
|
||||||
|
"fields": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_field_type(field, field_value):
|
||||||
|
expected_field_type = PLANT_COLS[field]['expected_type']
|
||||||
|
model_field_name = PLANT_COLS[field].get('model_name', field)
|
||||||
|
null_allowed = PLANT_COLS[field].get('null_allowed', False)
|
||||||
|
max_length = PLANT_COLS[field].get('max_length', False)
|
||||||
|
|
||||||
|
is_valid_type = isinstance(field_value, expected_field_type)
|
||||||
|
is_int_when_float = expected_field_type == float and isinstance(
|
||||||
|
field_value, int)
|
||||||
|
is_valid_null = field_value is None and null_allowed
|
||||||
|
is_over_max_length = max_length and isinstance(
|
||||||
|
field_value, str) and len(field_value) > max_length
|
||||||
|
|
||||||
|
if not(is_valid_type or is_int_when_float or is_valid_null):
|
||||||
|
raise TypeError(
|
||||||
|
f"Invalid json type for field {model_field_name} with value {field_value}. Expected '{expected_field_type}' but got '{type(field_value)}'.")
|
||||||
|
elif is_over_max_length:
|
||||||
|
raise TypeError(
|
||||||
|
f"Invalid string length for {model_field_name} with value {field_value}. Expected length to be under {max_length} but was {len(field_value)}.")
|
||||||
|
|
||||||
|
|
||||||
|
def get_plant_json_from_row(row_data):
|
||||||
|
plant_json_fields = {}
|
||||||
|
for field, field_index in INFO_COL_INDEXES.items():
|
||||||
|
if field not in PLANT_COLS:
|
||||||
|
continue
|
||||||
|
|
||||||
|
model_field_name = PLANT_COLS[field].get('model_name', field)
|
||||||
|
try:
|
||||||
|
if field == "region":
|
||||||
|
regions_list = get_pk_list_from_str(
|
||||||
|
row_data[field_index], ECO_REGION_PK_MAPPING, ECO_REGION_ADJUSTMENTS)
|
||||||
|
plant_json_fields[model_field_name] = regions_list
|
||||||
|
|
||||||
|
elif field == "soilorder":
|
||||||
|
soil_orders_list = get_pk_list_from_str(
|
||||||
|
row_data[field_index], SOIL_ORDER_PK_MAPPING)
|
||||||
|
plant_json_fields[model_field_name] = soil_orders_list
|
||||||
|
|
||||||
|
elif field in {'wet', 'mesic', 'dry'}:
|
||||||
|
soil_variant_pk = SOIL_VARIANT_PK_MAPPING[field.capitalize()]
|
||||||
|
plant_json_fields[model_field_name] = plant_json_fields.get(
|
||||||
|
model_field_name, []) + [soil_variant_pk]
|
||||||
|
|
||||||
|
elif field in {'water', 'drought', 'frost', 'salinity'}:
|
||||||
|
plant_json_fields[model_field_name] = TOLERANCE_PK_MAPPING[row_data[field_index]]
|
||||||
|
|
||||||
|
elif field in PLANT_COLS:
|
||||||
|
plant_json_fields[model_field_name] = row_data[field_index]
|
||||||
|
|
||||||
|
check_field_type(field, plant_json_fields[model_field_name])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
name_index = INFO_COL_INDEXES['name']
|
||||||
|
print(
|
||||||
|
f"Error occured while adding the row for {row_data[name_index]}.")
|
||||||
|
print(F"{type(e)}: {e}")
|
||||||
|
print("SKIPPING ROW...")
|
||||||
|
print("----------------------------------------------")
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
plant_json = PLANT_JSON_TEMPLATE.copy()
|
||||||
|
plant_json["fields"] = plant_json_fields
|
||||||
|
|
||||||
|
return plant_json
|
||||||
|
|
||||||
|
|
||||||
|
def get_plant_json_fixture(sheet):
|
||||||
|
plant_json_fixture = []
|
||||||
|
skipped_count = 0
|
||||||
|
created_count = 0
|
||||||
|
|
||||||
|
for row in sheet.iter_rows(min_col=DATA_START_COL, min_row=DATA_START_ROW, values_only=True):
|
||||||
|
plant_json = get_plant_json_from_row(row)
|
||||||
|
|
||||||
|
# If there is invalid data in a row, it will be skipped
|
||||||
|
if plant_json != {}:
|
||||||
|
plant_json_fixture.append(plant_json)
|
||||||
|
created_count += 1
|
||||||
|
else:
|
||||||
|
skipped_count += 1
|
||||||
|
|
||||||
|
print("Created plants fixture.")
|
||||||
|
print(f"Rows Created: {created_count}")
|
||||||
|
print(f"Rows Skipped: {skipped_count}")
|
||||||
|
return plant_json_fixture
|
||||||
|
|
||||||
|
|
||||||
|
def save_plant_fixture(fixture):
|
||||||
|
fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json'
|
||||||
|
fixture_filepath.write_text(json.dumps(fixture))
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Ingests the plant spreadsheet data into the database'
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
self.stdout.write('Creating plant fixtures...')
|
||||||
|
plant_fixture = get_plant_json_fixture(SPREADSHEET)
|
||||||
|
save_plant_fixture(plant_fixture)
|
||||||
|
self.stdout.write(self.style.SUCCESS(
|
||||||
|
'Plant fixtures created and saved successfully.'))
|
11
backend/right_tree/api/management/commands/resetplants.py
Normal file
11
backend/right_tree/api/management/commands/resetplants.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from right_tree.api.models import Plant
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Removes all plant objects from the database'
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
self.stdout.write(self.style.WARNING(
|
||||||
|
'Removing all plant objects from the database.'))
|
||||||
|
Plant.objects.all().delete()
|
Loading…
Reference in a new issue