Add documentation to the spreadsheet processing methods
This commit is contained in:
parent
1cb7403f6f
commit
530089c19c
2 changed files with 28 additions and 2 deletions
|
@ -1,6 +1,9 @@
|
||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
|
||||||
def get_pk_mapping(object, mapping_key="name"):
|
def get_pk_mapping(object, mapping_key="name"):
|
||||||
|
""" Returns a dictionary mapping a django model primary key to another given field.
|
||||||
|
"""
|
||||||
pk_mapping = {}
|
pk_mapping = {}
|
||||||
for instance in object.objects.all():
|
for instance in object.objects.all():
|
||||||
pk_mapping[getattr(instance, mapping_key)] = instance.pk
|
pk_mapping[getattr(instance, mapping_key)] = instance.pk
|
||||||
|
@ -9,6 +12,8 @@ def get_pk_mapping(object, mapping_key="name"):
|
||||||
|
|
||||||
|
|
||||||
def get_col_mappings(sheet, start_col, row_index):
|
def get_col_mappings(sheet, start_col, row_index):
|
||||||
|
""" Returns a dictionary that maps a spreadsheet cell value to a corresponding column index.
|
||||||
|
"""
|
||||||
col_mappings = {}
|
col_mappings = {}
|
||||||
for row in sheet.iter_rows(min_col=start_col, min_row=row_index, max_row=row_index, values_only=True):
|
for row in sheet.iter_rows(min_col=start_col, min_row=row_index, max_row=row_index, values_only=True):
|
||||||
for i, col_name in enumerate(row):
|
for i, col_name in enumerate(row):
|
||||||
|
@ -18,6 +23,9 @@ def get_col_mappings(sheet, start_col, row_index):
|
||||||
|
|
||||||
|
|
||||||
def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
|
def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
|
||||||
|
""" Given a list of comma separated values from the spreadsheet. Returns a list of primary keys that
|
||||||
|
correspond to the relevant values with any given mapping fixes applied.
|
||||||
|
"""
|
||||||
pk_list = []
|
pk_list = []
|
||||||
for value in values_str.split(','):
|
for value in values_str.split(','):
|
||||||
processed_value = value.lstrip().rstrip().replace(
|
processed_value = value.lstrip().rstrip().replace(
|
||||||
|
@ -35,7 +43,9 @@ def get_pk_list_from_str(values_str, pk_mapping, fixes={}):
|
||||||
|
|
||||||
|
|
||||||
def get_spreadsheet(data_path, spreadsheet_filename):
|
def get_spreadsheet(data_path, spreadsheet_filename):
|
||||||
|
""" Returns a spreadsheet from a resources directory given the data path and
|
||||||
|
spreadsheet filename.
|
||||||
|
"""
|
||||||
spreadsheet_path = data_path / 'resources' / spreadsheet_filename
|
spreadsheet_path = data_path / 'resources' / spreadsheet_filename
|
||||||
workbook = load_workbook(filename=spreadsheet_path)
|
workbook = load_workbook(filename=spreadsheet_path)
|
||||||
return workbook.active
|
return workbook.active
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ import right_tree.api.data
|
||||||
from ._spreadsheet_helpers import *
|
from ._spreadsheet_helpers import *
|
||||||
from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel
|
from right_tree.api.models import EcologicalRegion, SoilOrder, SoilVariant, ToleranceLevel
|
||||||
|
|
||||||
|
# Mapping adjustments between the shapefile ecological regions and those in the spreadsheet
|
||||||
ECO_REGION_ADJUSTMENTS = {
|
ECO_REGION_ADJUSTMENTS = {
|
||||||
"Whakatane": "Whatkatane",
|
"Whakatane": "Whatkatane",
|
||||||
"North West Nelson": "North-west Nelson",
|
"North West Nelson": "North-west Nelson",
|
||||||
|
@ -17,6 +17,7 @@ ECO_REGION_ADJUSTMENTS = {
|
||||||
"Sounds Wellington": "Sounds-Wellington"
|
"Sounds Wellington": "Sounds-Wellington"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Relevant columns and information used to retrieve information from the spreadsheet
|
||||||
PLANT_COLS = {
|
PLANT_COLS = {
|
||||||
'name': {"expected_type": str, "max_length": 50},
|
'name': {"expected_type": str, "max_length": 50},
|
||||||
'maxheight': {"expected_type": float},
|
'maxheight': {"expected_type": float},
|
||||||
|
@ -37,22 +38,27 @@ PLANT_COLS = {
|
||||||
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
|
'growthform': {"expected_type": str, "model_name": "growth_form", "null_allowed": True, "max_length": 50}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Spreadsheet constants
|
||||||
SPREADSHEET_FILENAME = 'plant_data.xlsx'
|
SPREADSHEET_FILENAME = 'plant_data.xlsx'
|
||||||
DATA_START_COL = 3
|
DATA_START_COL = 3
|
||||||
DATA_START_ROW = 7
|
DATA_START_ROW = 7
|
||||||
INFO_HEADER_ROW = 6
|
INFO_HEADER_ROW = 6
|
||||||
|
|
||||||
|
# Data directory path
|
||||||
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
|
DATA_DIR_PATH = Path(right_tree.api.data.__file__).resolve().parent
|
||||||
|
|
||||||
|
# Mappings between values in the spreadsheet and primary key values in the database
|
||||||
ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion)
|
ECO_REGION_PK_MAPPING = get_pk_mapping(EcologicalRegion)
|
||||||
SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder)
|
SOIL_ORDER_PK_MAPPING = get_pk_mapping(SoilOrder)
|
||||||
SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant)
|
SOIL_VARIANT_PK_MAPPING = get_pk_mapping(SoilVariant)
|
||||||
TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level")
|
TOLERANCE_PK_MAPPING = get_pk_mapping(ToleranceLevel, "level")
|
||||||
|
|
||||||
|
# Spreadsheet and corresponding value to column index mappings
|
||||||
SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME)
|
SPREADSHEET = get_spreadsheet(DATA_DIR_PATH, SPREADSHEET_FILENAME)
|
||||||
INFO_COL_INDEXES = get_col_mappings(
|
INFO_COL_INDEXES = get_col_mappings(
|
||||||
SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW)
|
SPREADSHEET, DATA_START_COL, INFO_HEADER_ROW)
|
||||||
|
|
||||||
|
# Template for the plant json to add as an entry for the fixtures
|
||||||
PLANT_JSON_TEMPLATE = {
|
PLANT_JSON_TEMPLATE = {
|
||||||
"model": "api.plant",
|
"model": "api.plant",
|
||||||
"pk": None,
|
"pk": None,
|
||||||
|
@ -61,6 +67,8 @@ PLANT_JSON_TEMPLATE = {
|
||||||
|
|
||||||
|
|
||||||
def check_field_type(field, field_value):
|
def check_field_type(field, field_value):
|
||||||
|
""" Checks the validity of a feild value collected from the spreadsheet
|
||||||
|
"""
|
||||||
expected_field_type = PLANT_COLS[field]['expected_type']
|
expected_field_type = PLANT_COLS[field]['expected_type']
|
||||||
model_field_name = PLANT_COLS[field].get('model_name', field)
|
model_field_name = PLANT_COLS[field].get('model_name', field)
|
||||||
null_allowed = PLANT_COLS[field].get('null_allowed', False)
|
null_allowed = PLANT_COLS[field].get('null_allowed', False)
|
||||||
|
@ -82,6 +90,8 @@ def check_field_type(field, field_value):
|
||||||
|
|
||||||
|
|
||||||
def get_plant_json_from_row(row_data):
|
def get_plant_json_from_row(row_data):
|
||||||
|
""" Returns a json object representing a plant row from the spreadsheet.
|
||||||
|
"""
|
||||||
plant_json_fields = {}
|
plant_json_fields = {}
|
||||||
for field, field_index in INFO_COL_INDEXES.items():
|
for field, field_index in INFO_COL_INDEXES.items():
|
||||||
if field not in PLANT_COLS:
|
if field not in PLANT_COLS:
|
||||||
|
@ -129,6 +139,8 @@ def get_plant_json_from_row(row_data):
|
||||||
|
|
||||||
|
|
||||||
def get_plant_json_fixture(sheet):
|
def get_plant_json_fixture(sheet):
|
||||||
|
""" Returns a django fixture json that represents the plant information extracted from the spreadsheet.
|
||||||
|
"""
|
||||||
plant_json_fixture = []
|
plant_json_fixture = []
|
||||||
skipped_count = 0
|
skipped_count = 0
|
||||||
created_count = 0
|
created_count = 0
|
||||||
|
@ -143,13 +155,17 @@ def get_plant_json_fixture(sheet):
|
||||||
else:
|
else:
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
|
|
||||||
|
# Print summary of data extraction from the spreadsheet
|
||||||
print("Created plants fixture.")
|
print("Created plants fixture.")
|
||||||
print(f"Rows Created: {created_count}")
|
print(f"Rows Created: {created_count}")
|
||||||
print(f"Rows Skipped: {skipped_count}")
|
print(f"Rows Skipped: {skipped_count}")
|
||||||
|
|
||||||
return plant_json_fixture
|
return plant_json_fixture
|
||||||
|
|
||||||
|
|
||||||
def save_plant_fixture(fixture):
|
def save_plant_fixture(fixture):
|
||||||
|
""" Saves the plant fixture to the django api fixtures directory.
|
||||||
|
"""
|
||||||
fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json'
|
fixture_filepath = DATA_DIR_PATH / 'fixtures' / 'plants.json'
|
||||||
fixture_filepath.write_text(json.dumps(fixture))
|
fixture_filepath.write_text(json.dumps(fixture))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue