Update plant ingestion script for new spreadsheet format

This commit is contained in:
Dana Lambert 2021-11-02 09:20:51 +13:00
parent 8855bcb2ba
commit 039d8d14ec

View file

@ -19,30 +19,30 @@ ECO_REGION_ADJUSTMENTS = {
# Relevant columns and information used to retrieve information from the spreadsheet # Relevant columns and information used to retrieve information from the spreadsheet
PLANT_COLS = { PLANT_COLS = {
'name': {"expected_type": str, "max_length": 50}, 'SCIENTIFIC NAME': {"str": "name", "expected_type": str, "max_length": 50},
'maxheight': {"expected_type": float}, 'MAX HT': {"str": "maxheight", "expected_type": float},
'spacing': {"expected_type": float}, 'SPACING': {"str": "spacing", "expected_type": float},
'commonname': {"expected_type": str, "null_allowed": True, "max_length": 50}, 'COMMON NAME': {"str": "commonname", "expected_type": str, "null_allowed": True, "max_length": 200},
'synonym': {"expected_type": str, "null_allowed": True, "max_length": 200}, 'SYNONYM': {"str": "synonym", "expected_type": str, "null_allowed": True, "max_length": 200},
'region': {"expected_type": list, "model_name": "ecological_regions"}, 'ECOLOGICAL REGION': {"str": "region", "expected_type": list, "model_name": "ecological_regions"},
'soilorder': {"expected_type": list, "model_name": "soil_order"}, 'SOIL ORDER': {"str": "soilorder", "expected_type": list, "model_name": "soil_order"},
'wet': {"expected_type": list, "model_name": "soil_variants"}, 'Wet': {"str": "wet", "expected_type": list, "model_name": "soil_variants"},
'mesic': {"expected_type": list, "model_name": "soil_variants"}, 'Mesic': {"str": "mesic", "expected_type": list, "model_name": "soil_variants"},
'dry': {"expected_type": list, "model_name": "soil_variants"}, 'Dry': {"str": "dry", "expected_type": list, "model_name": "soil_variants"},
'water': {"expected_type": int, "model_name": "water_tolerance"}, 'Water': {"str": "water", "expected_type": int, "model_name": "water_tolerance"},
'drought': {"expected_type": int, "model_name": "drought_tolerance"}, 'Drought': {"str": "drought", "expected_type": int, "model_name": "drought_tolerance"},
'frost': {"expected_type": int, "model_name": "frost_tolerance"}, 'Frost': {"str": "frost", "expected_type": int, "model_name": "frost_tolerance"},
'salinity': {"expected_type": int, "model_name": "salinity_tolerance"}, 'Salinity': {"str": "salinity", "expected_type": int, "model_name": "salinity_tolerance"},
'purpose': {"expected_type": str, "null_allowed": True}, 'ES': {"str": "purpose", "expected_type": str, "null_allowed": True},
'stage': {"expected_type": int}, 'STAGE': {"str": "stage", "expected_type": int},
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50} 'GrowthForm': {"str": "growthform", "expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
} }
# Spreadsheet constants # Spreadsheet constants
SPREADSHEET_FILENAME = 'plant_data.xlsx' SPREADSHEET_FILENAME = 'plant_data.xlsx'
DATA_START_COL = 3 DATA_START_COL = 3
DATA_START_ROW = 7 DATA_START_ROW = 6
INFO_HEADER_ROW = 6 INFO_HEADER_ROW = 4
# Data directory path # Data directory path
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
@ -96,20 +96,21 @@ def get_plant_json_from_row(row_data):
for field, field_index in INFO_COL_INDEXES.items(): for field, field_index in INFO_COL_INDEXES.items():
if field not in PLANT_COLS: if field not in PLANT_COLS:
continue continue
model_field_name = PLANT_COLS[field].get('model_name', field) field_str = PLANT_COLS[field].get('str', field)
model_field_name = PLANT_COLS[field].get('model_name', field_str)
try: try:
if field == "region": if field_str == "region":
regions_list = get_pk_list_from_str( regions_list = get_pk_list_from_str(
row_data[field_index], ECO_REGION_PK_MAPPING, ECO_REGION_ADJUSTMENTS) row_data[field_index], ECO_REGION_PK_MAPPING, ECO_REGION_ADJUSTMENTS)
plant_json_fields[model_field_name] = regions_list plant_json_fields[model_field_name] = regions_list
elif field == "soilorder": elif field_str == "soilorder":
soil_orders_list = get_pk_list_from_str( soil_orders_list = get_pk_list_from_str(
row_data[field_index], SOIL_ORDER_PK_MAPPING) row_data[field_index], SOIL_ORDER_PK_MAPPING)
plant_json_fields[model_field_name] = soil_orders_list plant_json_fields[model_field_name] = soil_orders_list
elif field in {'wet', 'mesic', 'dry'}: elif field_str in {'wet', 'mesic', 'dry'}:
if row_data[field_index] != None: if row_data[field_index] != None:
soil_variant_pk = SOIL_VARIANT_PK_MAPPING[field.capitalize()] soil_variant_pk = SOIL_VARIANT_PK_MAPPING[field.capitalize()]
plant_json_fields[model_field_name] = plant_json_fields.get( plant_json_fields[model_field_name] = plant_json_fields.get(
@ -117,7 +118,7 @@ def get_plant_json_from_row(row_data):
else: else:
continue continue
elif field in {'water', 'drought', 'frost', 'salinity'}: elif field_str in {'water', 'drought', 'frost', 'salinity'}:
plant_json_fields[model_field_name] = TOLERANCE_PK_MAPPING[row_data[field_index]] plant_json_fields[model_field_name] = TOLERANCE_PK_MAPPING[row_data[field_index]]
elif field in PLANT_COLS: elif field in PLANT_COLS:
@ -126,7 +127,7 @@ def get_plant_json_from_row(row_data):
check_field_value(field, plant_json_fields[model_field_name]) check_field_value(field, plant_json_fields[model_field_name])
except Exception as e: except Exception as e:
name_index = INFO_COL_INDEXES['name'] name_index = INFO_COL_INDEXES['SCIENTIFIC NAME']
print( print(
f"Error occured while adding the row for {row_data[name_index]}.") f"Error occured while adding the row for {row_data[name_index]}.")
print(F"{type(e)}: {e}") print(F"{type(e)}: {e}")