Automatically detect the delimiter of generic BOM imports

The detectFields does this anyway, so use that guessed value further on
This commit is contained in:
Jan Böhmer 2026-03-15 21:35:38 +01:00
parent de371877b9
commit 74e5102943
2 changed files with 27 additions and 16 deletions

View file

@ -240,7 +240,8 @@ class ProjectController extends AbstractController
}
// Detect fields and get suggestions
$detected_fields = $BOMImporter->detectFields($file_content);
$detected_delimiter = $BOMImporter->detectDelimiter($file_content);
$detected_fields = $BOMImporter->detectFields($file_content, $detected_delimiter);
$suggested_mapping = $BOMImporter->getSuggestedFieldMapping($detected_fields);
// Create mapping of original field names to sanitized field names for template
@ -257,7 +258,7 @@ class ProjectController extends AbstractController
$builder->add('delimiter', ChoiceType::class, [
'label' => 'project.bom_import.delimiter',
'required' => true,
'data' => ',',
'data' => $detected_delimiter,
'choices' => [
'project.bom_import.delimiter.comma' => ',',
'project.bom_import.delimiter.semicolon' => ';',

View file

@ -721,26 +721,36 @@ class BOMImporter
return $mapped;
}
/**
* Try to detect the separator used in the CSV data by analyzing the first line and counting occurrences of common delimiters.
* @param string $data
* @return string
*/
public function detectDelimiter(string $data): string
{
$delimiters = [',', ';', "\t"];
$lines = explode("\n", $data, 2);
$header_line = $lines[0] ?? '';
$delimiter_counts = [];
foreach ($delimiters as $delim) {
$delimiter_counts[$delim] = substr_count($header_line, $delim);
}
// Choose the delimiter with the highest count, default to comma if all are zero
$max_count = max($delimiter_counts);
$delimiter = array_search($max_count, $delimiter_counts, true);
if ($max_count === 0 || $delimiter === false) {
$delimiter = ',';
}
return $delimiter;
}
/**
* Detect available fields in CSV data for field mapping UI
*/
public function detectFields(string $data, ?string $delimiter = null): array
{
if ($delimiter === null) {
// Detect delimiter by counting occurrences in the first row (header)
$delimiters = [',', ';', "\t"];
$lines = explode("\n", $data, 2);
$header_line = $lines[0] ?? '';
$delimiter_counts = [];
foreach ($delimiters as $delim) {
$delimiter_counts[$delim] = substr_count($header_line, $delim);
}
// Choose the delimiter with the highest count, default to comma if all are zero
$max_count = max($delimiter_counts);
$delimiter = array_search($max_count, $delimiter_counts, true);
if ($max_count === 0 || $delimiter === false) {
$delimiter = ',';
}
$delimiter = $this->detectDelimiter($data);
}
// Handle potential BOM (Byte Order Mark) at the beginning
$data = preg_replace('/^\xEF\xBB\xBF/', '', $data);