whu-textual-analysis/exam/original-files/Problem_1_template.py

# -*- coding: utf-8 -*-
"""
Created on Sat Jul 30 15:20:32 2022

@author: Alexander Hillert, Goethe University
"""

# import packages
import re

# define working directory
# adjust it to your computer
directory = "YOUR DIRECTORY"


# =============================================================================
# Part A: Creating an Overview File on the Call Participants
# =============================================================================

# Create output file
output_csv_file=open(directory+'Problem_1_Overview_Calls.csv','w',encoding="utf-8")
# Write variable names to the first line of the output file
# 1) Call-ID
# 2) Filename
# 3) Fiscal Quarter
# 4) Fiscal Year
# 5) Date of the call in the format YYYYMMDD
# 6) Time of the call, e.g., 05:00 PM GMT
# 7) number of non-corporate call participants
# 8) the names of all corporate participants and their positions -> each item 
#    should be written in a seperate column
output_csv_file.write('ID;Filename;Fiscal_Quarter;Fiscal_Year;Date;Time;\
#Analysts')
# There can be up to 4 corporate particiapnts
for i in range(1,5):
    output_csv_file.write(';Name_'+str(i)+';Position_'+str(i))
output_csv_file.write('\n')

# Open the overfiew file "Overview_File_Problem_1.csv" to call the earnings calls
overview_file=open(directory+'Overview_File_Problem_1.csv','r',encoding="utf-8")
overview_text=overview_file.read()
list_earnings_calls=overview_text.split("\n")
# The last line is empty -> drop it
while list_earnings_calls.count("")>0:
    list_earnings_calls.remove("")


# iterate all earnings conference calls
for i in range(1, len(list_earnings_calls)):
    
    # reset the variables
    fiscal_quarter=""
    fiscal_year=""
    date=""
    time=""
    
    # we split the entire transcripts into three parts
    # its header
    header_text=""
    # the list of non-corporate participants
    participants_text=""
    # the list of corporate participants
    corporates_text=""
    
    # the number of analysts joining the call
    number_analysts=0
    
    # variables for manager name and position
    manager_name=""
    manager_position=""
    manager_position_edited=""
    
    # a list of manager names for part b)
    manager_name_list=[]
    
    # get the filename of each earnings call
    call_information=list_earnings_calls[i].split(";")
    call_id=call_information[0]
    filename=call_information[1]
    
    # open the call transcript
    call_file=open(directory+'Problem_1_Sample/'+filename,'r',encoding="utf-8")
    call_text=call_file.read()
    
    # Get information on the call
    # FOr example:
    # Q1 2013 Bank of America Corporation Earnings Conference Call
    # 04/17/2013 08:30 AM GMT
    
    # the header ends where the list of corporate particpants starts
    match_corporates=re.search(TO BE COMPLETED,call_text)
    if match_corporates:
        header_text=call_text[TO BE COMPLETED]

    
    # get the fiscal quarter and year from the header text
    match_fiscal_quarter=re.search(TO BE COMPLETED,header_text)
    if match_fiscal_quarter:
        fiscal_quarter=match_fiscal_quarter.group(0)
    match_fiscal_year=re.search(TO BE COMPLETED,header_text)
    if match_fiscal_year:
        fiscal_year=match_fiscal_year.group(0)

    # get date and time of the call
    # date
    match_date=re.search(TO BE COMPLETED,header_text)
    if match_date:
        date=match_date.group(0)
        # the date in the output file should be formatted as YYYYMMDD
        # so, you need to rearrange the date text
        year=date[TO BE COMPLETED]
        month=date[TO BE COMPLETED]
        day=date[TO BE COMPLETED]
        date_formatted=year+month+day
    # time
    match_time=re.search(TO BE COMPLETED,header_text)
    if match_time:
        time=match_time.group(0)
    
    
    # count the number of analysts
    # the relevant text part starts with, for example,
    # ================================================================================
    # Conference Call Participiants
    # ================================================================================
    # 
    # * Chris Mutascio
    #   Keefe, Bruyette & Woods - Analyst
    # * Thomas Laturneau
    #   FBR - Analyst
    
    # and ends with the beginning of the presentation
    # ================================================================================
    # Presentation
    # --------------------------------------------------------------------------------
    
    match_participants=re.search(TO BE COMPLETED,call_text)
    match_presentation=re.search(TO BE COMPLETED,call_text)
    # if you find both boundaries
    if match_participants and match_presentation:
        # get the text in between
        participants_text=call_text[TO BE COMPLETED]
    
    # split the text of the participants that you have just identified
    # in a way that each element refers to one analyst.
    analyst_list=participants_text.split(TO BE COMPLETED)
    # depending on how you split, you might need re.split()
    
    # check whether you get empty elements and/or elements that do not refer
    # to analysts -> remove them
    while TO BE COMPLETED>0:
        TO BE COMPLETED
        
    # after these steps and checks, the number of analysts is the length of your analyst list
    number_analysts=TO BE COMPLETED
    
    
    # get the names of the corporate participants and their position
    # remember that you already have the beginning of corporate participants
    # see above at around line 90
    # the corporate participants come before the list of non-corporate participants
    corporates_text=call_text[TO BE COMPLETED]
    # like before, split this text such that one element refers to one corporate participant
    corporates_list=corporates_text.split(TO BE COMPLETED)
    # check whether you get empty elements and/or elements that do not refer
    # to corporate participants -> remove them
    while TO BE COMPLETED>0:
        TO BE COMPLETED

        
    # write the call information to the output file
    output_csv_file.write(str(call_id)+";"+filename+";"+fiscal_quarter+";"+fiscal_year+";"\
                          +date_formatted+";"+time+";"+str(number_analysts))    
    
    # now, we need to add the information on the corporate participants
    # go over all corporate participants
    for j in range(len(corporates_list)):
        # depending on how you split the text of corporate participants,
        # one element of your list could contain the name of the mangager in the first
        # line and their position in the second line.
        # ADJUST THE FOLLOWING COMMANDS IF YOU USED A DIFFERENT SPLIT.
        
        # split each element of the list of corporate participants further 
        # into name and position
        manager_entry=corporates_list[j]
        manager_entry_parts=manager_entry.split(TO BE COMPLETED)
        manager_name=manager_entry_parts[TO BE COMPLETED]
        
        # for part b) of the problem it is helpful to have a list of all
        # manager names. With this list, we can identify whether a statement
        # comes from a managers (-> answer) or from an analyst (-> question)
        manager_name_list.append(manager_name)
        
        
        manager_position=manager_entry_parts[TO BE COMPLETED]
        # Like before, the template assumes a very specific type of split here
        # So depending on your approach, you might need to change the commands below.
        # the position is just the text part after " - "
        # For example
        # Bank of America Corporation - CEO
        # the position is "CEO"
        manager_position_edited=re.TO BE COMPLETED

        # write the manager names and positions to the output file
        output_csv_file.write(";"+manager_name+";"+manager_position_edited)
        
    output_csv_file.write("\n")   
    
    
    print("For earnings call "+str(i)+" part a) has been completed.")
    
    # =========================================================================
    # Part B: Extracting the Call Segments
    # =========================================================================
    
    # set variables
    presentation_text=""
    qanda_text=""
    qanda_list=[]
    question_text=""
    answer_text=""

    
    # identify the presentation
    # the begin of the presentation has already been identified above
    # see at around line 140
    #
    # the presentation ends where the Q and A part begins
    # ================================================================================
    # Questions and Answers
    # --------------------------------------------------------------------------------
    match_qanda=re.search(TO BE COMPLETED,call_text)
    presentation_text=call_text[TO BE COMPLETED]
    
    # drop operator statements
    # search for the beginning of an operator statement
    match_operator=re.search(TO BE COMPLETED,presentation_text)
    while match_operator:
        match_operator_start=match_operator.start()
        # search for the end of the operator statement
        # Hint: search only after the beginning of the operator statement
        # Hint 2: remember to keep track of your coordinates (.start() and .end())
        match_operator_end=re.search(TO BE COMPLETED,TO BE COMPLETED)
        
        # keep the text before the operator statement and the text after
        # the approach is similar to removing tables (see Problem 4 and 5 from class)
        presentation_text=presentation_text[TO BE COMPLETED]
        
        # check whether there is another match
        match_operator=re.search(TO BE COMPLETED,presentation_text)

    # sometimes there are technical remarks like "(inaudible)", "(corrected by company after the call)",
    # or "(technical difficulty)" -> drop those
    TO BE COMPLETED
    # there are several ways to approach this editing step (e.g., re.sub())
        
    
    # drop information on the speakers, e.g.,
    # -------------------------------------------------------------------------
    # Deborah Crawford,  Facebook, Inc. - Director of IR    [2]
    # -------------------------------------------------------------------------
    match_speaker=re.search(TO BE COMPLETED,presentation_text)
    while match_speaker:
        # the task is similar to the Operator statement but be careful
        # to only remove the speaker name but NOT the text of the speaker.
        presentation_text=presentation_text TO BE COMPLETED
        # check whether there is another speaker name
        match_speaker=re.search(TO BE COMPLETED,presentation_text)
        
    
    # write the text of the presentation to an output file
    # make sure that the folder "Problem_1_Conference_Call_Segments" exists.
    output_file_presentation=open(directory+'Problem_1_Conference_Call_Segments/call_'+str(call_id)+'_presentation.txt',"w",encoding='utf-8')
    output_file_presentation.write(presentation_text)
    
    # Close file
    output_file_presentation.close()


    # -------------------------------------------------------------------------
    # identify questions and answers
    # -------------------------------------------------------------------------
    # you already have the start of the Q&A section (see at around lines 235)
    qanda_text=call_text[match_qanda.end():]
    
    # the earnings call transcript ends with definitions
    # remove these/keep the text before the definitions
    match_definitions=re.search("\n-{1,}\nDefinitions\n-{1,}\n",qanda_text)
    if match_definitions:
        # keep the text before
        qanda_text=qanda_text[TO BE COMPLETED]
        
    # split the Q and A part by speaker
    qanda_list=re.split(TO BE COMPLETED,qanda_text)
    
    # variables to count the number of answers
    answer_counter=1
    # and questions
    question_counter=1
    
    # go over all speakers/statements that you obtained from the previous split
    # you now have to decide whether the speaker is an analyst (-> question)
    # or a corporate participant (-> answer)
    for k in range(TO BE COMPLETED):
        
        # identify the speaker name to check whether it is a corporate participant.
        # For example
        # --------------------------------------------------------------------------------
        # Bruce Thompson,  Bank of America Corporation - CFO    [3]
        # --------------------------------------------------------------------------------
        #
        speaker_text_part=qanda_list[k]
        # split the text part of the kth speaker
        # into his*her name and the rest
        # NOTE: re.search() and re.sub() are also nice ways to accomplish the goal
        speaker_text_sub_parts=re.split(TO BE COMPLETED,qanda_list[k])
        # get the name of the speaker from the previous split
        # in the example above, we need to get "Bruce Thompson"
        speaker_name=speaker_text_sub_parts[TO BE COMPLETED]
        # depending on your split, you might need some further editing to
        # get onyl the name ("Bruce Thompson") without any additional information.
        
        
        # the second part of speaker_text_sub_parts is (probably) the statement
        # of the speaker (again, it depends on your split)
        text=speaker_text_sub_parts[TO BE COMPLETED]
        
        # sometimes there are technical remarks like "(inaudible)", "(corrected by company after the call)",
        # or "(technical difficulty)" -> drop those
        text=TO BE COMPLETED
        # there are several ways to approach this editing step (e.g., re.sub())
        
        # check whether the speaker name is in the manager list from part a) (see at around line 195)
        if speaker_name in manager_name_list:
            # the name of the speaker is in the list of corporate participants
            # -> it is a management answer
            
            answer_text=answer_text+"Answer_"+str(answer_counter)+":\n"+text+"\n"
            answer_counter=answer_counter+1
            
        else:
            # it is either an analyst question or an operator statement
            # be careful to check the condition below. depending on how your
            # speaker names look like, you may need .count() and/or re.search() instead of .startswith()
            if speaker_name.startswith("Operator") or TO BE COMPLETED:
                pass

            else:
                # it is an analyst question
                question_text=question_text+"Question_"+str(question_counter)+":\n"+text+"\n"
                question_counter=question_counter+1
                         
    # write the texts to output files
    # make sure that the subfolder exists.
    output_file_answers=open(directory+'Problem_1_Conference_Call_Segments/call_'+str(call_id)+'_answers.txt',"w",encoding='utf-8')
    output_file_questions=open(directory+'Problem_1_Conference_Call_Segments/call_'+str(call_id)+'_questions.txt',"w",encoding='utf-8')
    output_file_answers.write(answer_text)
    output_file_questions.write(question_text)
    
    # Close files
    output_file_answers.close()
    output_file_questions.close()
    call_file.close() 


# Close files
overview_file.close()
output_csv_file.close()

print("Problem 1 completed.")
Add the files for the take-home exam 2022-08-05 00:08:32 +02:00			`# -- coding: utf-8 --`
			`"""`
			`Created on Sat Jul 30 15:20:32 2022`

			`@author: Alexander Hillert, Goethe University`
			`"""`

			`# import packages`
			`import re`

			`# define working directory`
			`# adjust it to your computer`
			`directory = "YOUR DIRECTORY"`


			`# =============================================================================`
			`# Part A: Creating an Overview File on the Call Participants`
			`# =============================================================================`

			`# Create output file`
			`output_csv_file=open(directory+'Problem_1_Overview_Calls.csv','w',encoding="utf-8")`
			`# Write variable names to the first line of the output file`
			`# 1) Call-ID`
			`# 2) Filename`
			`# 3) Fiscal Quarter`
			`# 4) Fiscal Year`
			`# 5) Date of the call in the format YYYYMMDD`
			`# 6) Time of the call, e.g., 05:00 PM GMT`
			`# 7) number of non-corporate call participants`
			`# 8) the names of all corporate participants and their positions -> each item`
			`# should be written in a seperate column`
			`output_csv_file.write('ID;Filename;Fiscal_Quarter;Fiscal_Year;Date;Time;\`
			`#Analysts')`
			`# There can be up to 4 corporate particiapnts`
			`for i in range(1,5):`
			`output_csv_file.write(';Name_'+str(i)+';Position_'+str(i))`
			`output_csv_file.write('\n')`

			`# Open the overfiew file "Overview_File_Problem_1.csv" to call the earnings calls`
			`overview_file=open(directory+'Overview_File_Problem_1.csv','r',encoding="utf-8")`
			`overview_text=overview_file.read()`
			`list_earnings_calls=overview_text.split("\n")`
			`# The last line is empty -> drop it`
			`while list_earnings_calls.count("")>0:`
			`list_earnings_calls.remove("")`



			`# iterate all earnings conference calls`
			`for i in range(1, len(list_earnings_calls)):`

			`# reset the variables`
			`fiscal_quarter=""`
			`fiscal_year=""`
			`date=""`
			`time=""`

			`# we split the entire transcripts into three parts`
			`# its header`
			`header_text=""`
			`# the list of non-corporate participants`
			`participants_text=""`
			`# the list of corporate participants`
			`corporates_text=""`

			`# the number of analysts joining the call`
			`number_analysts=0`

			`# variables for manager name and position`
			`manager_name=""`
			`manager_position=""`
			`manager_position_edited=""`

			`# a list of manager names for part b)`
			`manager_name_list=[]`

			`# get the filename of each earnings call`
			`call_information=list_earnings_calls[i].split(";")`
			`call_id=call_information[0]`
			`filename=call_information[1]`

			`# open the call transcript`
			`call_file=open(directory+'Problem_1_Sample/'+filename,'r',encoding="utf-8")`
			`call_text=call_file.read()`

			`# Get information on the call`
			`# FOr example:`
			`# Q1 2013 Bank of America Corporation Earnings Conference Call`
			`# 04/17/2013 08:30 AM GMT`

			`# the header ends where the list of corporate particpants starts`
			`match_corporates=re.search(TO BE COMPLETED,call_text)`
			`if match_corporates:`
			`header_text=call_text[TO BE COMPLETED]`


			`# get the fiscal quarter and year from the header text`
			`match_fiscal_quarter=re.search(TO BE COMPLETED,header_text)`
			`if match_fiscal_quarter:`
			`fiscal_quarter=match_fiscal_quarter.group(0)`
			`match_fiscal_year=re.search(TO BE COMPLETED,header_text)`
			`if match_fiscal_year:`
			`fiscal_year=match_fiscal_year.group(0)`

			`# get date and time of the call`
			`# date`
			`match_date=re.search(TO BE COMPLETED,header_text)`
			`if match_date:`
			`date=match_date.group(0)`
			`# the date in the output file should be formatted as YYYYMMDD`
			`# so, you need to rearrange the date text`
			`year=date[TO BE COMPLETED]`
			`month=date[TO BE COMPLETED]`
			`day=date[TO BE COMPLETED]`
			`date_formatted=year+month+day`
			`# time`
			`match_time=re.search(TO BE COMPLETED,header_text)`
			`if match_time:`
			`time=match_time.group(0)`


			`# count the number of analysts`
			`# the relevant text part starts with, for example,`
			`# ================================================================================`
			`# Conference Call Participiants`
			`# ================================================================================`
			`#`
			`# * Chris Mutascio`
			`# Keefe, Bruyette & Woods - Analyst`
			`# * Thomas Laturneau`
			`# FBR - Analyst`

			`# and ends with the beginning of the presentation`
			`# ================================================================================`
			`# Presentation`
			`# --------------------------------------------------------------------------------`

			`match_participants=re.search(TO BE COMPLETED,call_text)`
			`match_presentation=re.search(TO BE COMPLETED,call_text)`
			`# if you find both boundaries`
			`if match_participants and match_presentation:`
			`# get the text in between`
			`participants_text=call_text[TO BE COMPLETED]`

			`# split the text of the participants that you have just identified`
			`# in a way that each element refers to one analyst.`
			`analyst_list=participants_text.split(TO BE COMPLETED)`
			`# depending on how you split, you might need re.split()`

			`# check whether you get empty elements and/or elements that do not refer`
			`# to analysts -> remove them`
			`while TO BE COMPLETED>0:`
			`TO BE COMPLETED`

			`# after these steps and checks, the number of analysts is the length of your analyst list`
			`number_analysts=TO BE COMPLETED`


			`# get the names of the corporate participants and their position`
			`# remember that you already have the beginning of corporate participants`
			`# see above at around line 90`
			`# the corporate participants come before the list of non-corporate participants`
			`corporates_text=call_text[TO BE COMPLETED]`
			`# like before, split this text such that one element refers to one corporate participant`
			`corporates_list=corporates_text.split(TO BE COMPLETED)`
			`# check whether you get empty elements and/or elements that do not refer`
			`# to corporate participants -> remove them`
			`while TO BE COMPLETED>0:`
			`TO BE COMPLETED`


			`# write the call information to the output file`
			`output_csv_file.write(str(call_id)+";"+filename+";"+fiscal_quarter+";"+fiscal_year+";"\`
			`+date_formatted+";"+time+";"+str(number_analysts))`

			`# now, we need to add the information on the corporate participants`
			`# go over all corporate participants`
			`for j in range(len(corporates_list)):`
			`# depending on how you split the text of corporate participants,`
			`# one element of your list could contain the name of the mangager in the first`
			`# line and their position in the second line.`
			`# ADJUST THE FOLLOWING COMMANDS IF YOU USED A DIFFERENT SPLIT.`

			`# split each element of the list of corporate participants further`
			`# into name and position`
			`manager_entry=corporates_list[j]`
			`manager_entry_parts=manager_entry.split(TO BE COMPLETED)`
			`manager_name=manager_entry_parts[TO BE COMPLETED]`

			`# for part b) of the problem it is helpful to have a list of all`
			`# manager names. With this list, we can identify whether a statement`
			`# comes from a managers (-> answer) or from an analyst (-> question)`
			`manager_name_list.append(manager_name)`


			`manager_position=manager_entry_parts[TO BE COMPLETED]`
			`# Like before, the template assumes a very specific type of split here`
			`# So depending on your approach, you might need to change the commands below.`
			`# the position is just the text part after " - "`
			`# For example`
			`# Bank of America Corporation - CEO`
			`# the position is "CEO"`
			`manager_position_edited=re.TO BE COMPLETED`

			`# write the manager names and positions to the output file`
			`output_csv_file.write(";"+manager_name+";"+manager_position_edited)`

			`output_csv_file.write("\n")`


			`print("For earnings call "+str(i)+" part a) has been completed.")`

			`# =========================================================================`
			`# Part B: Extracting the Call Segments`
			`# =========================================================================`

			`# set variables`
			`presentation_text=""`
			`qanda_text=""`
			`qanda_list=[]`
			`question_text=""`
			`answer_text=""`


			`# identify the presentation`
			`# the begin of the presentation has already been identified above`
			`# see at around line 140`
			`#`
			`# the presentation ends where the Q and A part begins`
			`# ================================================================================`
			`# Questions and Answers`
			`# --------------------------------------------------------------------------------`
			`match_qanda=re.search(TO BE COMPLETED,call_text)`
			`presentation_text=call_text[TO BE COMPLETED]`

			`# drop operator statements`
			`# search for the beginning of an operator statement`
			`match_operator=re.search(TO BE COMPLETED,presentation_text)`
			`while match_operator:`
			`match_operator_start=match_operator.start()`
			`# search for the end of the operator statement`
			`# Hint: search only after the beginning of the operator statement`
			`# Hint 2: remember to keep track of your coordinates (.start() and .end())`
			`match_operator_end=re.search(TO BE COMPLETED,TO BE COMPLETED)`

			`# keep the text before the operator statement and the text after`
			`# the approach is similar to removing tables (see Problem 4 and 5 from class)`
			`presentation_text=presentation_text[TO BE COMPLETED]`

			`# check whether there is another match`
			`match_operator=re.search(TO BE COMPLETED,presentation_text)`

			`# sometimes there are technical remarks like "(inaudible)", "(corrected by company after the call)",`
			`# or "(technical difficulty)" -> drop those`
			`TO BE COMPLETED`
			`# there are several ways to approach this editing step (e.g., re.sub())`


			`# drop information on the speakers, e.g.,`
			`# -------------------------------------------------------------------------`
			`# Deborah Crawford, Facebook, Inc. - Director of IR [2]`
			`# -------------------------------------------------------------------------`
			`match_speaker=re.search(TO BE COMPLETED,presentation_text)`
			`while match_speaker:`
			`# the task is similar to the Operator statement but be careful`
			`# to only remove the speaker name but NOT the text of the speaker.`
			`presentation_text=presentation_text TO BE COMPLETED`
			`# check whether there is another speaker name`
			`match_speaker=re.search(TO BE COMPLETED,presentation_text)`


			`# write the text of the presentation to an output file`
			`# make sure that the folder "Problem_1_Conference_Call_Segments" exists.`
			`output_file_presentation=open(directory+'Problem_1_Conference_Call_Segments/call_'+str(call_id)+'_presentation.txt',"w",encoding='utf-8')`
			`output_file_presentation.write(presentation_text)`

			`# Close file`
			`output_file_presentation.close()`


			`# -------------------------------------------------------------------------`
			`# identify questions and answers`
			`# -------------------------------------------------------------------------`
			`# you already have the start of the Q&A section (see at around lines 235)`
			`qanda_text=call_text[match_qanda.end():]`

			`# the earnings call transcript ends with definitions`
			`# remove these/keep the text before the definitions`
			`match_definitions=re.search("\n-{1,}\nDefinitions\n-{1,}\n",qanda_text)`
			`if match_definitions:`
			`# keep the text before`
			`qanda_text=qanda_text[TO BE COMPLETED]`

			`# split the Q and A part by speaker`
			`qanda_list=re.split(TO BE COMPLETED,qanda_text)`

			`# variables to count the number of answers`
			`answer_counter=1`
			`# and questions`
			`question_counter=1`

			`# go over all speakers/statements that you obtained from the previous split`
			`# you now have to decide whether the speaker is an analyst (-> question)`
			`# or a corporate participant (-> answer)`
			`for k in range(TO BE COMPLETED):`

			`# identify the speaker name to check whether it is a corporate participant.`
			`# For example`
			`# --------------------------------------------------------------------------------`
			`# Bruce Thompson, Bank of America Corporation - CFO [3]`
			`# --------------------------------------------------------------------------------`
			`#`
			`speaker_text_part=qanda_list[k]`
			`# split the text part of the kth speaker`
			`# into his*her name and the rest`
			`# NOTE: re.search() and re.sub() are also nice ways to accomplish the goal`
			`speaker_text_sub_parts=re.split(TO BE COMPLETED,qanda_list[k])`
			`# get the name of the speaker from the previous split`
			`# in the example above, we need to get "Bruce Thompson"`
			`speaker_name=speaker_text_sub_parts[TO BE COMPLETED]`
			`# depending on your split, you might need some further editing to`
			`# get onyl the name ("Bruce Thompson") without any additional information.`


			`# the second part of speaker_text_sub_parts is (probably) the statement`
			`# of the speaker (again, it depends on your split)`
			`text=speaker_text_sub_parts[TO BE COMPLETED]`

			`# sometimes there are technical remarks like "(inaudible)", "(corrected by company after the call)",`
			`# or "(technical difficulty)" -> drop those`
			`text=TO BE COMPLETED`
			`# there are several ways to approach this editing step (e.g., re.sub())`

			`# check whether the speaker name is in the manager list from part a) (see at around line 195)`
			`if speaker_name in manager_name_list:`
			`# the name of the speaker is in the list of corporate participants`
			`# -> it is a management answer`

			`answer_text=answer_text+"Answer_"+str(answer_counter)+":\n"+text+"\n"`
			`answer_counter=answer_counter+1`

			`else:`
			`# it is either an analyst question or an operator statement`
			`# be careful to check the condition below. depending on how your`
			`# speaker names look like, you may need .count() and/or re.search() instead of .startswith()`
			`if speaker_name.startswith("Operator") or TO BE COMPLETED:`
			`pass`

			`else:`
			`# it is an analyst question`
			`question_text=question_text+"Question_"+str(question_counter)+":\n"+text+"\n"`
			`question_counter=question_counter+1`

			`# write the texts to output files`
			`# make sure that the subfolder exists.`
			`output_file_answers=open(directory+'Problem_1_Conference_Call_Segments/call_'+str(call_id)+'_answers.txt',"w",encoding='utf-8')`
			`output_file_questions=open(directory+'Problem_1_Conference_Call_Segments/call_'+str(call_id)+'_questions.txt',"w",encoding='utf-8')`
			`output_file_answers.write(answer_text)`
			`output_file_questions.write(question_text)`

			`# Close files`
			`output_file_answers.close()`
			`output_file_questions.close()`
			`call_file.close()`


			`# Close files`
			`overview_file.close()`
			`output_csv_file.close()`

			`print("Problem 1 completed.")`