From 039d8d14ec287a35adc2695411a3d9d299b38d22 Mon Sep 17 00:00:00 2001 From: Dana Lambert Date: Tue, 2 Nov 2021 09:20:51 +1300 Subject: [PATCH] Update plant ingestion script for new spreadsheet format --- .../commands/createplantfixtures.py | 53 ++++++++++--------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/backend/right_tree/api/management/commands/createplantfixtures.py b/backend/right_tree/api/management/commands/createplantfixtures.py index 23c2c7d..181d1ff 100644 --- a/backend/right_tree/api/management/commands/createplantfixtures.py +++ b/backend/right_tree/api/management/commands/createplantfixtures.py @@ -19,30 +19,30 @@ ECO_REGION_ADJUSTMENTS = { # Relevant columns and information used to retrieve information from the spreadsheet PLANT_COLS = { - 'name': {"expected_type": str, "max_length": 50}, - 'maxheight': {"expected_type": float}, - 'spacing': {"expected_type": float}, - 'commonname': {"expected_type": str, "null_allowed": True, "max_length": 50}, - 'synonym': {"expected_type": str, "null_allowed": True, "max_length": 200}, - 'region': {"expected_type": list, "model_name": "ecological_regions"}, - 'soilorder': {"expected_type": list, "model_name": "soil_order"}, - 'wet': {"expected_type": list, "model_name": "soil_variants"}, - 'mesic': {"expected_type": list, "model_name": "soil_variants"}, - 'dry': {"expected_type": list, "model_name": "soil_variants"}, - 'water': {"expected_type": int, "model_name": "water_tolerance"}, - 'drought': {"expected_type": int, "model_name": "drought_tolerance"}, - 'frost': {"expected_type": int, "model_name": "frost_tolerance"}, - 'salinity': {"expected_type": int, "model_name": "salinity_tolerance"}, - 'purpose': {"expected_type": str, "null_allowed": True}, - 'stage': {"expected_type": int}, - 'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50} + 'SCIENTIFIC NAME': {"str": "name", "expected_type": str, "max_length": 50}, + 'MAX HT': {"str": "maxheight", "expected_type": float}, + 'SPACING': {"str": "spacing", "expected_type": float}, + 'COMMON NAME': {"str": "commonname", "expected_type": str, "null_allowed": True, "max_length": 200}, + 'SYNONYM': {"str": "synonym", "expected_type": str, "null_allowed": True, "max_length": 200}, + 'ECOLOGICAL REGION': {"str": "region", "expected_type": list, "model_name": "ecological_regions"}, + 'SOIL ORDER': {"str": "soilorder", "expected_type": list, "model_name": "soil_order"}, + 'Wet': {"str": "wet", "expected_type": list, "model_name": "soil_variants"}, + 'Mesic': {"str": "mesic", "expected_type": list, "model_name": "soil_variants"}, + 'Dry': {"str": "dry", "expected_type": list, "model_name": "soil_variants"}, + 'Water': {"str": "water", "expected_type": int, "model_name": "water_tolerance"}, + 'Drought': {"str": "drought", "expected_type": int, "model_name": "drought_tolerance"}, + 'Frost': {"str": "frost", "expected_type": int, "model_name": "frost_tolerance"}, + 'Salinity': {"str": "salinity", "expected_type": int, "model_name": "salinity_tolerance"}, + 'ES': {"str": "purpose", "expected_type": str, "null_allowed": True}, + 'STAGE': {"str": "stage", "expected_type": int}, + 'GrowthForm': {"str": "growthform", "expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50} } # Spreadsheet constants SPREADSHEET_FILENAME = 'plant_data.xlsx' DATA_START_COL = 3 -DATA_START_ROW = 7 -INFO_HEADER_ROW = 6 +DATA_START_ROW = 6 +INFO_HEADER_ROW = 4 # Data directory path DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent @@ -96,20 +96,21 @@ def get_plant_json_from_row(row_data): for field, field_index in INFO_COL_INDEXES.items(): if field not in PLANT_COLS: continue - - model_field_name = PLANT_COLS[field].get('model_name', field) + + field_str = PLANT_COLS[field].get('str', field) + model_field_name = PLANT_COLS[field].get('model_name', field_str) try: - if field == "region": + if field_str == "region": regions_list = get_pk_list_from_str( row_data[field_index], ECO_REGION_PK_MAPPING, ECO_REGION_ADJUSTMENTS) plant_json_fields[model_field_name] = regions_list - elif field == "soilorder": + elif field_str == "soilorder": soil_orders_list = get_pk_list_from_str( row_data[field_index], SOIL_ORDER_PK_MAPPING) plant_json_fields[model_field_name] = soil_orders_list - elif field in {'wet', 'mesic', 'dry'}: + elif field_str in {'wet', 'mesic', 'dry'}: if row_data[field_index] != None: soil_variant_pk = SOIL_VARIANT_PK_MAPPING[field.capitalize()] plant_json_fields[model_field_name] = plant_json_fields.get( @@ -117,7 +118,7 @@ def get_plant_json_from_row(row_data): else: continue - elif field in {'water', 'drought', 'frost', 'salinity'}: + elif field_str in {'water', 'drought', 'frost', 'salinity'}: plant_json_fields[model_field_name] = TOLERANCE_PK_MAPPING[row_data[field_index]] elif field in PLANT_COLS: @@ -126,7 +127,7 @@ def get_plant_json_from_row(row_data): check_field_value(field, plant_json_fields[model_field_name]) except Exception as e: - name_index = INFO_COL_INDEXES['name'] + name_index = INFO_COL_INDEXES['SCIENTIFIC NAME'] print( f"Error occured while adding the row for {row_data[name_index]}.") print(F"{type(e)}: {e}")