Автоматический резюме-аналитик

import re

import spacy

from docx import Document

import PyPDF2

nlp = spacy.load("en_core_web_sm")

# Шаблоны для контактов

EMAIL_REGEX = r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+'

PHONE_REGEX = r'(\+?\d{1,3})?\s?(\(?\d{3,4}\)?[\s.-]?)?\d{3}[\s.-]?\d{2,4}[\s.-]?\d{2,4}'

SKILLS = ["python", "sql", "docker", "tensorflow", "pandas", "linux", "git", "rest", "flask"]

def extract_text_from_pdf(file_path):

text = ""

with open(file_path, 'rb') as file:

reader = PyPDF2.PdfReader(file)

for page in reader.pages:

text += page.extract_text()

return text

def extract_text_from_docx(file_path):

doc = Document(file_path)

return '\n'.join([p.text for p in doc.paragraphs])

def extract_contacts(text):

emails = re.findall(EMAIL_REGEX, text)

phones = re.findall(PHONE_REGEX, text)

phone_list = [''.join(p).strip() for p in phones]

return list(set(emails)), list(set(phone_list))

def extract_skills(text):

found = []

lowered = text.lower()

for skill in SKILLS:

if skill in lowered:

found.append(skill)

return found

def estimate_experience_years(text):

# Пример: ищем годы вида "2019", "2021" и считаем разницу

years = sorted(set(map(int, re.findall(r'\b(20\d{2})\b', text))))

if years:

return max(years) - min(years)

return 0

def analyze_resume(text):

emails, phones = extract_contacts(text)

skills = extract_skills(text)

experience_years = estimate_experience_years(text)

return {

"emails": emails,

"phones": phones,

"skills_found": skills,

"estimated_experience_years": experience_years

}

if __name__ == "__main__":

path = input("Введите путь к резюме (.pdf или .docx): ").strip()

if path.endswith(".pdf"):

resume_text = extract_text_from_pdf(path)

elif path.endswith(".docx"):

resume_text = extract_text_from_docx(path)

else:

print("❌ Неподдерживаемый формат.")

exit()

result = analyze_resume(resume_text)

print("\n📋 Результат анализа:")

for key, value in result.items():

print(f"{key}: {value}")

Автоматический резюме-аналитик

Report Page