RealTime

Interact with Tamr RealTime match service

import pandas as pd
import tamr_toolbox as tbox


# Initialize Tamr match client -- note port must be 9170 for RealTime match service
match_client = tbox.utils.client.create(
    username="user", password="pw", host="localhost", port=9170
)

# Get client attached to default Tamr port to access projects, and get relevant mastering project
tamr_client = tbox.utils.client.create(username="user", password="pw", host="localhost", port=9100)
project = tamr_client.projects.by_name("my_mastering_proj")


# Get source data and rename columns to match project unified dataset as needed
filename = "/Data/source_data/incoming_employees.csv"
employee_data = pd.read_csv(filename)
employee_data.rename(columns={"last_name": "family_name"})

# Get RealTime match results and add them into the data
lookup_results = tbox.realtime.matching.match_query(
    match_client=match_client,
    project=project,
    records=employee_data.to_dict("records"),
    type="clusters",
    max_num_matches=1,
)
employee_data["tamr_id"] = employee_data.index.map(
    lambda k: lookup_results[k][0]["matchedRecordId"] if lookup_results[k] else None
)