Extracting multiple fields from resume with Python

Question

I am trying to process a lot resume in Python. An example of a resume may look like the below. Unfortunately, each resume may not use the same format. Is there a good way to do this besides using regex to extract certain fields from the resume (assuming I converted all of them into plain text) with python?

Name: Someone
Tel: xxx-xxxxxxx
Add: 123 Some Street
Email: [email protected]

Objective/Goal
To obtain a position in...

Education
2004 - 2006: University of XYZ


Work Experience
2006 - 2008: Programmer

Skills
Programming skills: Python, ..

Let's say I am only interested to extract a few fields in there, how can I get all the text between the field name and the next field? For example I just want to get the name and work experience field, it should return the following.

NameField = 'Someone'
WorkExpField = '2006 - 2008: Programmer...'

sihrc · Accepted Answer

My "I'm going to try this, but too lazy to make pretty" approach for different format resumes. I'm willing to test it out on different resume formats. Additional advice/opinions welcome!

import string

class Resume():
    def __init__(self,filename):
        self.filepath = filename
        self.load()
        self.parse()

    def load(self):
        with open(self.filepath,'rb') as f:
            self.content = f.read().splitlines()

    def checkLine(self,word,value, content, line):
        if word in content.lower():
            value = self.addValue(value,line)
        return value

    def addValue(self,value,line):
        value[line] = value.get(line,0) + 1
        return value

    def dict_List(self,dict_, content):
        new = [(key,value) for key,value in dict_.items() if dict_[key] == max(dict_.values())]
        return [(x[0],content[x[0]]) for x in sorted(new)]

    def get_name(self):
        names = []
        for each in self.name:
            if each[0] not in self.headings:
                each = each[1].replace('Name',"")
                if each[0] not in string.letters:
                    each = each[1:]
                names.append(each.strip())
            else:
                index = self.headings[self.headings.index(each[0])+1]
                names.append("
".join(self.content[each[0]+1:index]))
        if len(names) == 1:
            return names[0]
        else:
            return names

    def get_work(self):
        experience = []
        for each in self.work:
            index = self.headings[self.headings.index(each[0])+1]
            experience.append("
".join(self.content[each[0]+1:index]))
        if len(experience) == 1:
            return experience[0]
        else:
            return epxerience

    def parse(self):
        name = dict()
        work_experience = dict()
        isHeading = dict()
        for line_num in range(len(self.content)):
            for checkName in ["name",":"]:
                name.update(self.checkLine(checkName,name,self.content[line_num], line_num))
            for checkWork in ["work","experience"]:
                work_experience.update(self.checkLine(checkWork,work_experience, self.content[line_num],line_num))
            if line_num != len(self.content) - 1:
                if len(self.content[line_num + 1]) > len(self.content[line_num]):
                    isHeading.update(self.addValue(isHeading,line_num))
            if line_num > 0:
                if self.content[line_num - 1] == "":
                    isHeading.update(self.addValue(isHeading,line_num))
            if len(self.content[line_num]) == len(self.content[line_num].lstrip()):
                isHeading.update(self.addValue(isHeading,line_num))
            if self.content[line_num] == "":
                isHeading[line_num] = isHeading.get(line_num,0) - 1

        self.name = self.dict_List(name, self.content)
        self.work = self.dict_List(work_experience, self.content)
        self.headings = self.dict_List(isHeading, self.content)
        self.headings = [x[0] for x in self.headings]



if __name__ == "__main__":
    resume = Resume(filename = 'sampleresume.txt')
    print resume.get_name()
    print resume.get_work()

Yields:

Someone
2006 - 2008: Programmer

Extracting multiple fields from resume with Python

Tags:

python

python-2.7

Cryssie

1 Answers

sihrc

Recent Activity

Donate For Us

Extracting multiple fields from resume with Python

Tags:

python

python-2.7

Cryssie

1 Answers

sihrc

Related questions

Recent Activity

Donate For Us