validation errors; empty list means valid */ public function validate(string $text, string $qualityTier = 'rich'): array { $errors = []; $trimmed = trim($text); if ($trimmed === '') { $errors[] = 'Biography is empty.'; return $errors; } $wordCount = str_word_count($trimmed); if ($wordCount < self::MIN_WORDS) { $errors[] = "Biography is too short ({$wordCount} words, minimum " . self::MIN_WORDS . ').'; } if ($wordCount > self::MAX_WORDS) { $errors[] = "Biography is too long ({$wordCount} words, maximum " . self::MAX_WORDS . ').'; } if ($this->containsMarkdown($trimmed)) { $errors[] = 'Biography contains markdown or structural formatting.'; } if ($this->hasMultipleParagraphs($trimmed)) { $errors[] = 'Biography contains multiple paragraphs; must be a single paragraph.'; } foreach (self::FORBIDDEN_PHRASES as $phrase) { if (str_contains(mb_strtolower($trimmed), $phrase)) { $errors[] = "Biography contains forbidden phrase: \"{$phrase}\"."; break; } } $repetitionError = $this->checkRepetition($trimmed); if ($repetitionError !== null) { $errors[] = $repetitionError; } if ($qualityTier === 'sparse' && $this->soundsTooRichForSparseProfile($trimmed)) { $errors[] = 'Biography sounds too claim-heavy for a sparse creator profile.'; } return $errors; } public function isValid(string $text, string $qualityTier = 'rich'): bool { return $this->validate($text, $qualityTier) === []; } // ------------------------------------------------------------------------- private function containsMarkdown(string $text): bool { // Headings: #, ##, ### if (preg_match('/^\s*#{1,6}\s/m', $text)) { return true; } // Bullets: lines starting with -, *, or numbered list if (preg_match('/^\s*[-*]\s/m', $text)) { return true; } if (preg_match('/^\s*\d+\.\s/m', $text)) { return true; } // Bold / italic markers if (preg_match('/\*\*|__|\*[^*]|_[^_]/', $text)) { return true; } // Code blocks or inline code if (str_contains($text, '`') || str_contains($text, '```')) { return true; } return false; } private function hasMultipleParagraphs(string $text): bool { // Two or more consecutive newlines indicate paragraph break. return (bool) preg_match('/\n\s*\n/', $text); } /** * Check whether any formulaic phrase appears more than once, * which usually indicates a recycled or low-quality output. */ private function checkRepetition(string $text): ?string { $lower = mb_strtolower($text); foreach (self::REPETITION_PHRASES as $phrase) { // Count non-overlapping occurrences. $count = substr_count($lower, $phrase); if ($count >= 2) { return "Biography repeats the phrase \"{$phrase}\" too many times."; } } return null; } /** * For sparse-profile biographies, reject text that sounds too achievement-heavy. * These signals typically appear only in rich profiles and would be hallucinated * or misleading when the creator has very little public history. */ private function soundsTooRichForSparseProfile(string $text): bool { $lower = mb_strtolower($text); $richIndicators = [ 'featured', 'best-performing', 'standout', 'milestone', 'comeback', 'evolution', 'remaster', 'era', 'streak', 'downloads', 'most productive', ]; $hitCount = 0; foreach ($richIndicators as $indicator) { if (str_contains($lower, $indicator)) { $hitCount++; } } // If a sparse profile biography references 3+ rich signals, it likely hallucinated them. return $hitCount >= 3; } }