from fastapi import APIRouter, Query
from pydantic import BaseModel, ConfigDict, Field
from typing import Dict, Any, List, cast
import pandas as pd
from code_matcher import find_top_k_neighbors, MatchResult
# The person will have to specify how to create a dictionary that creates the right query. The query will have to be flat.
router = APIRouter()
# Pydantic model for a match result that wraps a generic record and a similarity score.
class MatchResultModel(BaseModel):
record: Dict[str, Any] = Field(
description="The matched record containing company information",
examples=[{
"company_name": "HEPPNER - SOCIETE DE TRANSPORTS",
"SIREN": "763200202",
"reasoning___VAT_number": "Found VAT number FR42169800202 in header of the document.",
"VAT_number": "FR42169800202",
"city": "ST PIERRE DES CORPS",
"postal_code": "37700",
"country": "FR",
"email": "jean-michel@heppner-group.com"
}]
)
similarity: float = Field(
description="Similarity score between 0 and 1, where 1 indicates a perfect match",
ge=0,
le=1,
examples=[0.95]
)
model_config = ConfigDict(
from_attributes=True
)
# Load data once at module level.
df = pd.read_csv("data/client_codes.csv")
# Cast the records to the expected type.
database: List[Dict[str, Any]] = cast(List[Dict[str, Any]], df.to_dict("records"))
# Create a Pydantic model for the request body
class MatchRequest(BaseModel):
record: Dict[str, Any] = Field(
description="Record to match against the database",
examples=[{
"company_name": "HEPPNER - SOCIETE DE TRANSPORTS",
"SIREN": "763200202",
"reasoning___VAT_number": "Found VAT number FR42169800202 in header of the document.",
"VAT_number": "FR42169800202",
"city": "ST PIERRE DES CORPS",
"postal_code": "37700",
"country": "FR",
"email": "jean-michel@heppner-group.com"
}]
)
k: int = Field(
default=20,
description="Number of matching results to return",
ge=1,
le=100,
examples=[20]
)
@router.post(
"/match",
response_model=List[MatchResultModel],
description="Find the k closest matching records for a given query record",
responses={
200: {
"description": "Successful matching results",
"content": {
"application/json": {
"example": [
{
"record": {
"code": "HEPP37ST",
"company_name": "HEPPNER - SOCIETE DE TRANSPORTS",
"SIREN": "763200202",
"VAT_number": "FR42169800202",
"city": "ST PIERRE DES CORPS",
"postal_code": "37700",
"country": "FR",
"email": "jean-michel@heppner-group.com"
},
"similarity": 0.95
},
{
"record": {
"code": "HEPP75PA",
"company_name": "HEPPNER TRANSPORT SAS",
"SIREN": "763200203",
"VAT_number": "FR42169800203",
"city": "PARIS",
"postal_code": "75001",
"country": "FR",
"email": "contact@heppner-group.com"
},
"similarity": 0.85
}
]
}
}
}
}
)
async def find_matches(
request: MatchRequest
) -> List[MatchResultModel]:
"""
Endpoint to find the k closest matching records for a given query.
Example of record received from the frontend:
Record = {
"company_name": "HEPPNER - SOCIETE DE TRANSPORTS",
"SIREN": "763200202",
"reasoning___VAT_number": "Found VAT number FR42169800202 in header of the document.",
"VAT_number": "FR42169800202",
"city": "ST PIERRE DES CORPS",
"postal_code": "37700",
"country": "FR",
"email": "jean-michel@heppner-group.com"
}
You can also adapt the record to the query if necessary:
adapted_record = {
"name": record["company_name"],
"SIREN": record["SIREN"],
"VAT": record["VAT_number"],
"city": record["city"],
"zip_code": record["postal_code"],
"country": record["country"],
"email": record["email"]
}
Args:
query: Query object containing search criteria.
k: Number of results to return (default 5).
Returns:
A list of the k most similar records with their similarity scores.
"""
# Convert the generic query model to a dictionary.
results = find_top_k_neighbors(request.record, database, request.k)
# Convert each result to a MatchResultModel.
return [
MatchResultModel(record=res["record"], similarity=res["similarity"])
for res in results
]
@router.get(
"/agencies",
response_model=List[str],
description="Fetch a list of available agencies",
responses={
200: {
"description": "List of agency names",
"content": {
"application/json": {
"example": [
"AGENCY_1",
"AGENCY_2",
"AGENCY_3",
"AGENCY_4",
"AGENCY_5"
]
}
}
}
}
)
async def fetch_agencies() -> List[str]:
"""
Endpoint to fetch a hardcoded list of agencies.
Returns:
A list of agency names.
"""
agencies = [
"AGENCE_1",
"AGENCE_2",
"AGENCE_3",
"AGENCE_4",
"AGENCE_5"
]
return agencies