# Harmony Suite CUSTOM SoT Build

## **Steps to Build Harmony Suite CUSTOM SoT (Source of Truth) Build**

**Please find below steps in building a CUSTOM SoT.**

1. **Preprocess the raw input data**
   * Ensure raw input data is ready for processing. At this point you can do some level of data quality assurance and field reformatting.&#x20;

**Sample Step 1 Preprocessing Script**

```
import csv
from datetime import datetime
import re


# Function to transform date from ccyymmdd000000 to DD/MM/CCYY
def transform_date(date_str):
    if date_str and len(date_str) == 14:
        return datetime.strptime(date_str[:8], '%Y%m%d').strftime('%d/%m/%Y')
    return date_str

def split_rd_location(input_string):
    match = re.match(r'(RD \d+) (.+)', input_string)
    if match:
        return match.group(1), match.group(2)
    else:
        return None, None
    
def run_step_1(input_dir: str, processed_dir: str):
    # Process the CSV file to update the date fields and apply the new logic

    input_file_path = './TIL_ADDRESS.csv'
    temp_file_path = './TIL_ADDRESS_step1.csv'

    with open(input_file_path, 'r', encoding='utf-8-sig') as infile, open(temp_file_path, 'w', encoding='utf-8', newline='') as outfile:
        reader = csv.DictReader(infile)
        fieldnames = reader.fieldnames
    
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            # Transform date fields in the input file
            for date_field in ['CREATED_DATE', 'ALPHA_MODIFIED_DATE', 'GEOM_MODIFIED_DATE']:
                if date_field in row:
                    row[date_field] = transform_date(row[date_field])
        
            # Apply the new logic: if HOUSE_LOW equals HOUSE_HIGH, empty HOUSE_HIGH
            if row['HOUSE_LOW'] == row['HOUSE_HIGH']:
                row['HOUSE_HIGH'] = ''

            if row['FLAT_LOW'] == row['FLAT_HIGH']:
                row['FLAT_HIGH'] = ''

            if row['HOUSE_LOW_SUFFIX'] == row['HOUSE_HIGH_SUFFIX']:
                row['HOUSE_HIGH_SUFFIX'] = ''
        
            if row['RURAL_DELIVERY_NUMBER'] != '':
                til_town_name = row['TIL_TOWN_NAME']
                rd, location = split_rd_location(row['RURAL_DELIVERY_NUMBER'])
                row['RURAL_DELIVERY_NUMBER'] = rd
                if til_town_name != '':
                    row['TIL_TOWN_NAME'] = location
        
            # Assemble FULL_ADDRESS
            subdwelling = ""
            if row['FLAT_HIGH']:
                subdwelling = f"{row['UNIT_TYPE']} {row['FLAT_LOW']}-{row['FLAT_HIGH']}/"
            else:
                subdwelling = f"{row['UNIT_TYPE']} {row['FLAT_LOW']}"
        
            street = ""
            if row['HOUSE_HIGH']:
                street = f"{row['HOUSE_LOW']}{row['HOUSE_LOW_SUFFIX']}-{row['HOUSE_HIGH']}{row['HOUSE_HIGH_SUFFIX']}"
            else:
                street = f"{row['HOUSE_LOW']}{row['HOUSE_LOW_SUFFIX']}"
        
            full_address = f"{subdwelling} {street} {row['FULL_PRIMARY_ROAD_NAME']}, {row['LOCALITY_NAME']} {row['TIL_TOWN_NAME']} {row['POSTCODE']}"
            row['FULL_ADDRESS'] = full_address.strip()
        
            # Write the updated row to the temporary file
            writer.writerow(row)

    # Replace the original file with the updated file
    # import os
    # os.replace(temp_file_path, input_file_path)

    print(f"Updated input file saved as {temp_file_path}.")

if __name__ == "__main__":
    input_dir = "path_to_input_directory"
    processed_dir = "path_to_processed_directory"
    run_step_1(input_dir, processed_dir) 
```

2. **Field Mapping**&#x20;

* This step is mapping address components to their respective fields in the index.&#x20;

**Sample Step 2 Field Mapping**

```
import csv

#from app.app_settings import app_settings
# from utils.files import copy_file_to_dir

# Define the field mapping
field_mapping = {
    "ID": "id",
    "UNIT_TYPE": "flat_type",
    "FLAT_LOW": "flat_number_1",
    "FLAT_HIGH": "flat_number_2",
    "HOUSE_LOW": "street_number_1",
    "HOUSE_HIGH": "street_number_2",
    "HOUSE_LOW_SUFFIX": "street_number_1_suffix",
    "HOUSE_HIGH_SUFFIX": "street_number_2_suffix",
    "LEVEL_NO": "level_number",
    "HABITATION_NAME": "building_name",
    "PRIMARY_NAME": "street_name",
    "PRIMARY_TYPE": "street_type",
    "PRIMARY_SUFFIX": "street_suffix1",
    "FULL_ADDRESS": "full_address",
    "LOCALITY_NAME": "locality",    
    "TIL_TA_NAME": "city",
    "POSTCODE": "postcode",
    "RURAL_DELIVERY_NUMBER": "delivery_number",
    "WGS84_LONG": "longitude",
    "WGS84_LAT": "latitude"
}

# Process the transformed CSV file
def run_step_2(processing_dir: str, output_dir: str):
    input_file_path = './TIL_ADDRESS_step1.csv'

    output_file_path = './tui-custom.txt'

    with open(input_file_path, 'r', encoding='utf-8') as infile, open(output_file_path, 'w', encoding='utf-8', newline='') as outfile:
    
        reader = csv.DictReader(infile)
        fieldnames = [field_mapping.get(field, field) for field in reader.fieldnames]
        fieldnames.append("delivery_type")  # Add the new field to the header
    
        # Write the updated header to the output file
        outfile.write('|'.join(fieldnames) + '\n')

        for row in reader:
            # Exclude records with ADDRESS_TYPE = 'Alias'
            if row.get('ADDRESS_TYPE') == 'Alias':
                continue
        
            updated_row = {field_mapping.get(key, key): value for key, value in row.items()}
        
            # Ensure the FULL_ADDRESS field is not enclosed in double quotes
            if 'full_address' in updated_row:
                updated_row['full_address'] = updated_row['full_address'].replace('"', '')
        
            # Add the delivery_type field based on RURAL_DELIVERY_NUMBER
            delivery_number = updated_row.get('delivery_number', '')
            delivery_type = f"RD {delivery_number}" if delivery_number else ""
            updated_row['delivery_type'] = delivery_type

            # Write the updated row to the output file
            outfile.write('|'.join(updated_row.get(field, '') for field in fieldnames) + '\n')

    print(f"Processed file saved as {output_file_path}.")

if __name__ == "__main__":
    input_dir = "path_to_input_directory"
    processed_dir = "path_to_processed_directory"
    output_dir = "path_to_output_directory"

    # Call the second step
    run_step_2(processed_dir, output_dir)
```

3. **Copy the result file from step 2 above to to harmony-tools/customfile/delivery folder**

<figure><img src="https://2735524619-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2Fhp3nSGDGzW6K0AYaMW45%2Fuploads%2Fg1stzr2mX6h1GIIubCxh%2Fimage.png?alt=media&#x26;token=45cb68d2-f8d8-4d5e-ab54-c49c2237f13b" alt=""><figcaption></figcaption></figure>

4. **Run the Batch Tool:**&#x20;

Use HelloBuilder.cmd to generate Harmony CUSTOM SoT.&#x20;

<figure><img src="https://2735524619-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2Fhp3nSGDGzW6K0AYaMW45%2Fuploads%2FdS9ZVjHYMunoO5kd6a0m%2Fimage.png?alt=media&#x26;token=cdb2d6f7-7565-4823-a8c1-9246d3793c2a" alt=""><figcaption></figcaption></figure>

## Configuration&#x20;

In case you need some bespoke customisations, you can tweak them at custom.properties. See below.&#x20;

<figure><img src="https://2735524619-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2Fhp3nSGDGzW6K0AYaMW45%2Fuploads%2FSHNi0Qet7uVeKqJasHyT%2Fimage.png?alt=media&#x26;token=9bd9d492-d793-43d0-a9c3-1ac2b9594da2" alt=""><figcaption></figcaption></figure>

5. Run HelloBuilder.cmd - This is the job which creates the index.&#x20;

<figure><img src="https://2735524619-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2Fhp3nSGDGzW6K0AYaMW45%2Fuploads%2FGZm76g9HIoedBz2k0QGj%2Fimage.png?alt=media&#x26;token=89811379-6958-4a8b-80f9-74b898753281" alt=""><figcaption></figcaption></figure>

6. Once done, copy the generated index to your Harmony\_home/ROAD folder like so:&#x20;

<figure><img src="https://2735524619-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2Fhp3nSGDGzW6K0AYaMW45%2Fuploads%2Fh2wegkA8XZzQYdI3kcmb%2Fimage.png?alt=media&#x26;token=b568d955-7697-4ed5-9151-e281fc639a3f" alt=""><figcaption></figcaption></figure>

Restart your server to enable the changes.&#x20;

## Support and Assistance

If you encounter any issues or have questions about the Custom SoT Build, feel free to reach out to our support team at [**Customer.Support@gbgplc.com**](mailto:Customer.Support@gbgplc.com).
