Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add string parsing #59

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions pyresparser/resume_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,15 @@ def __init__(
'total_experience': None,
}
self.__resume = resume
if not isinstance(self.__resume, io.BytesIO):
ext = os.path.splitext(self.__resume)[1].split('.')[1]

if isinstance(self.__resume, str):
ext = ''
else:
ext = self.__resume.name.split('.')[1]
if not isinstance(self.__resume, io.BytesIO):
ext = os.path.splitext(self.__resume)[1].split('.')[1]
else:
ext = self.__resume.name.split('.')[1]

self.__text_raw = utils.extract_text(self.__resume, '.' + ext)
self.__text = ' '.join(self.__text_raw.split())
self.__nlp = nlp(self.__text)
Expand Down
8 changes: 8 additions & 0 deletions pyresparser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ def extract_text_from_doc(doc_path):
return ' '


def extract_text_from_string(string):
''' Helper function to return input string for testing '''
return string

def extract_text(file_path, extension):
'''
Wrapper function to detect the file extension and call text
Expand All @@ -162,13 +166,17 @@ def extract_text(file_path, extension):
:param extension: extension of file `file_name`
'''
text = ''

if extension == '.pdf':
for page in extract_text_from_pdf(file_path):
text += ' ' + page
elif extension == '.docx':
text = extract_text_from_docx(file_path)
elif extension == '.doc':
text = extract_text_from_doc(file_path)
else:
text = extract_text_from_string(file_path)

return text


Expand Down
File renamed without changes.
24 changes: 20 additions & 4 deletions test_name.py → test/test_pyresparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import urllib
from urllib.request import Request, urlopen
from pyresparser import ResumeParser
from pathlib import Path

def get_remote_data():
try:
Expand All @@ -21,9 +22,9 @@ def get_remote_data():
return 'File not found. Please provide correct URL for resume file.'

def get_local_data():
data = ResumeParser('OmkarResume.pdf').get_extracted_data()
data = ResumeParser(str(Path(__file__).parent.resolve() / 'fixtures/OmkarResume.pdf')).get_extracted_data()
return data

def test_remote_name():
data = get_remote_data()
assert 'Omkar Pathak' == data[0]['name']
Expand All @@ -32,10 +33,25 @@ def test_remote_phone_number():
data = get_remote_data()
assert '8087996634' == data[0]['mobile_number']

def test_local_name():
def test_local_skills():
data = get_local_data()
assert 'Omkar Pathak' == data['name']
assert 'C++' in data['skills']

def test_local_phone_number():
data = get_local_data()
assert '8087996634' == data['mobile_number']

def test_extract_string():

string = (f"Joe Bloggs email: [email protected] \n"
f"Professional Experience \n"
f"Microsoft \n Jan 2017 - Mar 2020 \n"
f"Analyst \n"
f"Created monthly Excel and Powerpoint reports highlighting KPIs in a clear and simple format. \n"
f"Used predictive modelling to detect patterns in customer behaviour using Python. \n"
f"Education \n"
f"University of Oxford \n"
f"BSc in Computer Science \n")

data = ResumeParser(string).get_extracted_data()
assert 'Excel' in data['skills']