Автоматический резюме-аналитик
import re
import spacy
from docx import Document
import PyPDF2
nlp = spacy.load("en_core_web_sm")
# Шаблоны для контактов
EMAIL_REGEX = r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+'
PHONE_REGEX = r'(\+?\d{1,3})?\s?(\(?\d{3,4}\)?[\s.-]?)?\d{3}[\s.-]?\d{2,4}[\s.-]?\d{2,4}'
SKILLS = ["python", "sql", "docker", "tensorflow", "pandas", "linux", "git", "rest", "flask"]
def extract_text_from_pdf(file_path):
text = ""
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(file_path):
doc = Document(file_path)
return '\n'.join([p.text for p in doc.paragraphs])
def extract_contacts(text):
emails = re.findall(EMAIL_REGEX, text)
phones = re.findall(PHONE_REGEX, text)
phone_list = [''.join(p).strip() for p in phones]
return list(set(emails)), list(set(phone_list))
def extract_skills(text):
found = []
lowered = text.lower()
for skill in SKILLS:
if skill in lowered:
found.append(skill)
return found
def estimate_experience_years(text):
# Пример: ищем годы вида "2019", "2021" и считаем разницу
years = sorted(set(map(int, re.findall(r'\b(20\d{2})\b', text))))
if years:
return max(years) - min(years)
return 0
def analyze_resume(text):
emails, phones = extract_contacts(text)
skills = extract_skills(text)
experience_years = estimate_experience_years(text)
return {
"emails": emails,
"phones": phones,
"skills_found": skills,
"estimated_experience_years": experience_years
}
if __name__ == "__main__":
path = input("Введите путь к резюме (.pdf или .docx): ").strip()
if path.endswith(".pdf"):
resume_text = extract_text_from_pdf(path)
elif path.endswith(".docx"):
resume_text = extract_text_from_docx(path)
else:
print("❌ Неподдерживаемый формат.")
exit()
result = analyze_resume(resume_text)
print("\n📋 Результат анализа:")
for key, value in result.items():
print(f"{key}: {value}")