Harmony Suite CUSTOM SoT Build

This section describes how to generate the Harmony Suite CUSTOM SoT (Source of Truth) Build

Steps to Build Harmony Suite CUSTOM SoT (Source of Truth) Build

Please find below steps in building a CUSTOM SoT.

  1. Preprocess the raw input data

    • Ensure raw input data is ready for processing. At this point you can do some level of data quality assurance and field reformatting.

Sample Step 1 Preprocessing Script

import csv
from datetime import datetime
import re


# Function to transform date from ccyymmdd000000 to DD/MM/CCYY
def transform_date(date_str):
    if date_str and len(date_str) == 14:
        return datetime.strptime(date_str[:8], '%Y%m%d').strftime('%d/%m/%Y')
    return date_str

def split_rd_location(input_string):
    match = re.match(r'(RD \d+) (.+)', input_string)
    if match:
        return match.group(1), match.group(2)
    else:
        return None, None
    
def run_step_1(input_dir: str, processed_dir: str):
    # Process the CSV file to update the date fields and apply the new logic

    input_file_path = './TIL_ADDRESS.csv'
    temp_file_path = './TIL_ADDRESS_step1.csv'

    with open(input_file_path, 'r', encoding='utf-8-sig') as infile, open(temp_file_path, 'w', encoding='utf-8', newline='') as outfile:
        reader = csv.DictReader(infile)
        fieldnames = reader.fieldnames
    
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            # Transform date fields in the input file
            for date_field in ['CREATED_DATE', 'ALPHA_MODIFIED_DATE', 'GEOM_MODIFIED_DATE']:
                if date_field in row:
                    row[date_field] = transform_date(row[date_field])
        
            # Apply the new logic: if HOUSE_LOW equals HOUSE_HIGH, empty HOUSE_HIGH
            if row['HOUSE_LOW'] == row['HOUSE_HIGH']:
                row['HOUSE_HIGH'] = ''

            if row['FLAT_LOW'] == row['FLAT_HIGH']:
                row['FLAT_HIGH'] = ''

            if row['HOUSE_LOW_SUFFIX'] == row['HOUSE_HIGH_SUFFIX']:
                row['HOUSE_HIGH_SUFFIX'] = ''
        
            if row['RURAL_DELIVERY_NUMBER'] != '':
                til_town_name = row['TIL_TOWN_NAME']
                rd, location = split_rd_location(row['RURAL_DELIVERY_NUMBER'])
                row['RURAL_DELIVERY_NUMBER'] = rd
                if til_town_name != '':
                    row['TIL_TOWN_NAME'] = location
        
            # Assemble FULL_ADDRESS
            subdwelling = ""
            if row['FLAT_HIGH']:
                subdwelling = f"{row['UNIT_TYPE']} {row['FLAT_LOW']}-{row['FLAT_HIGH']}/"
            else:
                subdwelling = f"{row['UNIT_TYPE']} {row['FLAT_LOW']}"
        
            street = ""
            if row['HOUSE_HIGH']:
                street = f"{row['HOUSE_LOW']}{row['HOUSE_LOW_SUFFIX']}-{row['HOUSE_HIGH']}{row['HOUSE_HIGH_SUFFIX']}"
            else:
                street = f"{row['HOUSE_LOW']}{row['HOUSE_LOW_SUFFIX']}"
        
            full_address = f"{subdwelling} {street} {row['FULL_PRIMARY_ROAD_NAME']}, {row['LOCALITY_NAME']} {row['TIL_TOWN_NAME']} {row['POSTCODE']}"
            row['FULL_ADDRESS'] = full_address.strip()
        
            # Write the updated row to the temporary file
            writer.writerow(row)

    # Replace the original file with the updated file
    # import os
    # os.replace(temp_file_path, input_file_path)

    print(f"Updated input file saved as {temp_file_path}.")

if __name__ == "__main__":
    input_dir = "path_to_input_directory"
    processed_dir = "path_to_processed_directory"
    run_step_1(input_dir, processed_dir) 
  1. Field Mapping

  • This step is mapping address components to their respective fields in the index.

Sample Step 2 Field Mapping

import csv

#from app.app_settings import app_settings
# from utils.files import copy_file_to_dir

# Define the field mapping
field_mapping = {
    "ID": "id",
    "UNIT_TYPE": "flat_type",
    "FLAT_LOW": "flat_number_1",
    "FLAT_HIGH": "flat_number_2",
    "HOUSE_LOW": "street_number_1",
    "HOUSE_HIGH": "street_number_2",
    "HOUSE_LOW_SUFFIX": "street_number_1_suffix",
    "HOUSE_HIGH_SUFFIX": "street_number_2_suffix",
    "LEVEL_NO": "level_number",
    "HABITATION_NAME": "building_name",
    "PRIMARY_NAME": "street_name",
    "PRIMARY_TYPE": "street_type",
    "PRIMARY_SUFFIX": "street_suffix1",
    "FULL_ADDRESS": "full_address",
    "LOCALITY_NAME": "locality",    
    "TIL_TA_NAME": "city",
    "POSTCODE": "postcode",
    "RURAL_DELIVERY_NUMBER": "delivery_number",
    "WGS84_LONG": "longitude",
    "WGS84_LAT": "latitude"
}

# Process the transformed CSV file
def run_step_2(processing_dir: str, output_dir: str):
    input_file_path = './TIL_ADDRESS_step1.csv'

    output_file_path = './tui-custom.txt'

    with open(input_file_path, 'r', encoding='utf-8') as infile, open(output_file_path, 'w', encoding='utf-8', newline='') as outfile:
    
        reader = csv.DictReader(infile)
        fieldnames = [field_mapping.get(field, field) for field in reader.fieldnames]
        fieldnames.append("delivery_type")  # Add the new field to the header
    
        # Write the updated header to the output file
        outfile.write('|'.join(fieldnames) + '\n')

        for row in reader:
            # Exclude records with ADDRESS_TYPE = 'Alias'
            if row.get('ADDRESS_TYPE') == 'Alias':
                continue
        
            updated_row = {field_mapping.get(key, key): value for key, value in row.items()}
        
            # Ensure the FULL_ADDRESS field is not enclosed in double quotes
            if 'full_address' in updated_row:
                updated_row['full_address'] = updated_row['full_address'].replace('"', '')
        
            # Add the delivery_type field based on RURAL_DELIVERY_NUMBER
            delivery_number = updated_row.get('delivery_number', '')
            delivery_type = f"RD {delivery_number}" if delivery_number else ""
            updated_row['delivery_type'] = delivery_type

            # Write the updated row to the output file
            outfile.write('|'.join(updated_row.get(field, '') for field in fieldnames) + '\n')

    print(f"Processed file saved as {output_file_path}.")

if __name__ == "__main__":
    input_dir = "path_to_input_directory"
    processed_dir = "path_to_processed_directory"
    output_dir = "path_to_output_directory"

    # Call the second step
    run_step_2(processed_dir, output_dir)

  1. Copy the result file from step 2 above to to harmony-tools/customfile/delivery folder

  1. Run the Batch Tool:

Use HelloBuilder.cmd to generate Harmony CUSTOM SoT.

Configuration

In case you need some bespoke customisations, you can tweak them at custom.properties. See below.

  1. Run HelloBuilder.cmd - This is the job which creates the index.

  1. Once done, copy the generated index to your Harmony_home/ROAD folder like so:

Restart your server to enable the changes.

Support and Assistance

If you encounter any issues or have questions about the Custom SoT Build, feel free to reach out to our support team at [email protected].

Last updated