Автоматический резюме-аналитик

Автоматический резюме-аналитик


import re

import spacy

from docx import Document

import PyPDF2


nlp = spacy.load("en_core_web_sm")


# Шаблоны для контактов

EMAIL_REGEX = r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+'

PHONE_REGEX = r'(\+?\d{1,3})?\s?(\(?\d{3,4}\)?[\s.-]?)?\d{3}[\s.-]?\d{2,4}[\s.-]?\d{2,4}'


SKILLS = ["python", "sql", "docker", "tensorflow", "pandas", "linux", "git", "rest", "flask"]


def extract_text_from_pdf(file_path):

  text = ""

  with open(file_path, 'rb') as file:

    reader = PyPDF2.PdfReader(file)

    for page in reader.pages:

      text += page.extract_text()

  return text


def extract_text_from_docx(file_path):

  doc = Document(file_path)

  return '\n'.join([p.text for p in doc.paragraphs])


def extract_contacts(text):

  emails = re.findall(EMAIL_REGEX, text)

  phones = re.findall(PHONE_REGEX, text)

  phone_list = [''.join(p).strip() for p in phones]

  return list(set(emails)), list(set(phone_list))


def extract_skills(text):

  found = []

  lowered = text.lower()

  for skill in SKILLS:

    if skill in lowered:

      found.append(skill)

  return found


def estimate_experience_years(text):

  # Пример: ищем годы вида "2019", "2021" и считаем разницу

  years = sorted(set(map(int, re.findall(r'\b(20\d{2})\b', text))))

  if years:

    return max(years) - min(years)

  return 0


def analyze_resume(text):

  emails, phones = extract_contacts(text)

  skills = extract_skills(text)

  experience_years = estimate_experience_years(text)

   

  return {

    "emails": emails,

    "phones": phones,

    "skills_found": skills,

    "estimated_experience_years": experience_years

  }


if __name__ == "__main__":

  path = input("Введите путь к резюме (.pdf или .docx): ").strip()

  if path.endswith(".pdf"):

    resume_text = extract_text_from_pdf(path)

  elif path.endswith(".docx"):

    resume_text = extract_text_from_docx(path)

  else:

    print("❌ Неподдерживаемый формат.")

    exit()


  result = analyze_resume(resume_text)


  print("\n📋 Результат анализа:")

  for key, value in result.items():

    print(f"{key}: {value}")

Report Page