Add documentation to the spreadsheet processing methods
This commit is contained in:
parent
1cb7403f6f
commit
530089c19c
2 changed files with 28 additions and 2 deletions
|
@ -1,6 +1,9 @@
|
|||
from openpyxl import load_workbook
|
||||
|
||||
|
||||
def get_pk_mapping(object, mapping_key="name"):
|
||||
""" Returns a dictionary mapping a django model primary key to another given field.
|
||||
"""
|
||||
pk_mapping = {}
|
||||
for instance in object.objects.all():
|
||||
pk_mapping[getattr(instance, mapping_key)] = instance.pk
|
||||
|
@ -9,6 +12,8 @@ def get_pk_mapping(object, mapping_key="name"):
|
|||
|
||||
|
||||
def get_col_mappings(sheet, start_col, row_index):
|
||||
""" Returns a dictionary that maps a spreadsheet cell value to a corresponding column index.
|
||||
"""
|
||||
col_mappings = {}
|
||||
for row in sheet.iter_rows(min_col=start_col, min_row=row_index, max_row=row_index, values_only=True):
|
||||
for i, col_name in enumerate(row):
|
||||
|
@ -18,6 +23,9 @@ def get_col_mappings(sheet, start_col, row_index):
|
|||
|
||||
|
||||
def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
|
||||
""" Given a list of comma separated values from the spreadsheet. Returns a list of primary keys that
|
||||
correspond to the relevant values with any given mapping fixes applied.
|
||||
"""
|
||||
pk_list = []
|
||||
for value in values_str.split(','):
|
||||
processed_value = value.lstrip().rstrip().replace(
|
||||
|
@ -35,7 +43,9 @@ def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
|
|||
|
||||
|
||||
def get_spreadsheet(data_path, spreadsheet_filename):
|
||||
""" Returns a spreadsheet from a resources directory given the data path and
|
||||
spreadsheet filename.
|
||||
"""
|
||||
spreadsheet_path = data_path / 'resources' / spreadsheet_filename
|
||||
workbook = load_workbook(filename=spreadsheet_path)
|
||||
return workbook.active
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import right_tree.api.data
|
|||
from ._spreadsheet_helpers import *
|
||||
from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel
|
||||
|
||||
|
||||
# Mapping adjustments between the shapefile ecological regions and those in the spreadsheet
|
||||
ECO_REGION_ADJUSTMENTS = {
|
||||
"Whakatane": "Whatkatane",
|
||||
"North West Nelson": "North-west Nelson",
|
||||
|
@ -17,6 +17,7 @@ ECO_REGION_ADJUSTMENTS = {
|
|||
"Sounds Wellington": "Sounds-Wellington"
|
||||
}
|
||||
|
||||
# Relevant columns and information used to retrieve information from the spreadsheet
|
||||
PLANT_COLS = {
|
||||
'name': {"expected_type": str, "max_length": 50},
|
||||
'maxheight': {"expected_type": float},
|
||||
|
@ -37,22 +38,27 @@ PLANT_COLS = {
|
|||
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
|
||||
}
|
||||
|
||||
# Spreadsheet constants
|
||||
SPREADSHEET_FILENAME = 'plant_data.xlsx'
|
||||
DATA_START_COL = 3
|
||||
DATA_START_ROW = 7
|
||||
INFO_HEADER_ROW = 6
|
||||
|
||||
# Data directory path
|
||||
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
|
||||
|
||||
# Mappings between values in the spreadsheet and primary key values in the database
|
||||
ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion)
|
||||
SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder)
|
||||
SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant)
|
||||
TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level")
|
||||
|
||||
# Spreadsheet and corresponding value to column index mappings
|
||||
SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME)
|
||||
INFO_COL_INDEXES = get_col_mappings(
|
||||
SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW)
|
||||
|
||||
# Template for the plant json to add as an entry for the fixtures
|
||||
PLANT_JSON_TEMPLATE = {
|
||||
"model": "api.plant",
|
||||
"pk": None,
|
||||
|
@ -61,6 +67,8 @@ PLANT_JSON_TEMPLATE = {
|
|||
|
||||
|
||||
def check_field_type(field, field_value):
|
||||
""" Checks the validity of a feild value collected from the spreadsheet
|
||||
"""
|
||||
expected_field_type = PLANT_COLS[field]['expected_type']
|
||||
model_field_name = PLANT_COLS[field].get('model_name', field)
|
||||
null_allowed = PLANT_COLS[field].get('null_allowed', False)
|
||||
|
@ -82,6 +90,8 @@ def check_field_type(field, field_value):
|
|||
|
||||
|
||||
def get_plant_json_from_row(row_data):
|
||||
""" Returns a json object representing a plant row from the spreadsheet.
|
||||
"""
|
||||
plant_json_fields = {}
|
||||
for field, field_index in INFO_COL_INDEXES.items():
|
||||
if field not in PLANT_COLS:
|
||||
|
@ -129,6 +139,8 @@ def get_plant_json_from_row(row_data):
|
|||
|
||||
|
||||
def get_plant_json_fixture(sheet):
|
||||
""" Returns a django fixture json that represents the plant information extracted from the spreadsheet.
|
||||
"""
|
||||
plant_json_fixture = []
|
||||
skipped_count = 0
|
||||
created_count = 0
|
||||
|
@ -143,13 +155,17 @@ def get_plant_json_fixture(sheet):
|
|||
else:
|
||||
skipped_count += 1
|
||||
|
||||
# Print summary of data extraction from the spreadsheet
|
||||
print("Created plants fixture.")
|
||||
print(f"Rows Created: {created_count}")
|
||||
print(f"Rows Skipped: {skipped_count}")
|
||||
|
||||
return plant_json_fixture
|
||||
|
||||
|
||||
def save_plant_fixture(fixture):
|
||||
""" Saves the plant fixture to the django api fixtures directory.
|
||||
"""
|
||||
fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json'
|
||||
fixture_filepath.write_text(json.dumps(fixture))
|
||||
|
||||
|
|
Loading…
Reference in a new issue