Add the files for the take-home exam

2022-08-05 00:08:32 +02:00 · 2022-08-05 00:08:32 +02:00 · 0d654bda9d
commit 0d654bda9d
parent a37c87d9c8
248 changed files with 102406 additions and 0 deletions
--- a/exam/original-files/Problem_2_template.py
+++ b/exam/original-files/Problem_2_template.py
@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jul 29 10:42:03 2022
+
+@author: Alexander Hillert, Goethe University
+"""
+
+# import packages
+import re
+
+# define working directory
+# adjust it to your computer
+directory = "YOUR DIRECTORY"
+
+
+# Open the dictionary
+# It is the 2018 version of the LM (2011) dictionary.
+file_word_list = open(directory+'LMD_pos_master_dictionary_2018.txt', 'r', encoding="utf-8")
+word_list = file_word_list.read()
+# use a consistent case format
+word_list = TO BE COMPLETED
+# create the list of positive words
+positive_words = TO BE COMPLETED
+
+
+# Create output file according to the exam instructions
+output_csv_file = open(directory+'Problem_2a_Percentage_Positive_Words.csv', 'w', encoding="utf-8")
+# Write variable names to the first line of the output file
+# 1) Call-ID
+# 2) Answer-ID
+# 3) Total number of words in the answer
+# 4) The number of positive words in the answer
+# 5) The percentage of positive words in the answer
+# 6) the text of the answer
+output_csv_file.write('TO BE COMPLETED')
+
+
+# Iterate over the 60 answer files
+for i in range(TO BE COMPLETED):
+    # If you want you can print the progress of your script
+    print(str(i))
+    
+
+    # Open the ith answer file
+    input_file_answer = open(directory+'/Problem_2_3_Sample_QandA/'+TO BE COMPLETED, 'r',
+    encoding='utf-8', errors='ignore')
+
+    # read the text from the answer file
+    input_text_answer = input_file_answer.read()
+    
+    # use a consistent case format
+    input_text_answer = 
+
+    # Split the text into individual answers
+    answer_list = re.split(TO BE COMPLETED, input_text_answer)
+
+    # Check whether there are empty elements in the answer list
+    # If so, remove them
+    while answer_list.TO BE COMPLETED:
+        TO BE COMPLETED
+    
+    # iterate all answers of the ith call
+    for TO BE COMPLETED:
+
+        # Preprocessing steps according to the exam instructions and hints
+        TO BE COMPLETED
+        # re.sub() commands are useful here.
+
+        
+        ######### Begin of the placeholder #########
+        # Here is the placeholder for the further editing steps that you
+        # should identify by looking at the file from Part b) of this problem.
+        # Having created a first file in Part b), you will see that the measurement
+        # of positive tone can be improved.
+        # Please add these commands here and then return to part 2b)
+        # See also the exam instructions.
+        
+        
+        
+        
+        ######### End of the placeholder ########
+        
+        
+        # Split the text in words
+        list_of_words = TO BE COMPLETED
+        # Check for empty elemments
+        TO BE COMPLETED
+        
+        
+        # Determine total number of words
+        word_count = TO BE COMPLETED
+
+        # Reset the number of positive words to zero
+        positive_count = 0
+        
+        # For each positive word, count the number of occurrences
+        for TO BE COMPLETED
+            # Check whether the positive word of interest shows up
+            positive_words_found = TO BE COMPLETED
+
+            # Loughran and McDonald (2011, JF, p.44): "We account for simple negation
+            # only for Fin-Pos words. Simple negation is taken to be observations
+            # of one of six words (no, not, none, neither, never, nobody) occurring
+            # within three words preceding a positive word.
+
+            # While the positive word is found, implement the LM (2011) negation check.
+            while TO BE COMPLETED:
+                # identify the position of the matched positive word in the list of all words
+                position_of_word = TO BE COMPLETED
+                # identify the three words before the positive word 
+                list_negation = TO BE COMPLETED
+                
+                # check whether one of the three words in list_negation is a negation
+                negation_found = TO BE COMPLETED
+
+                if negation_found TO BE COMPLETED:
+                    positive_count = TO BE COMPLETED
+
+                # delete the matched positive word in the original document
+                list_of_words[position_of_word] = TO BE COMPLETED
+                # check whether there are further matches of the positive word of interest
+                positive_words_found = TO BE COMPLETED
+
+        # compute the percentage of positive words adjusted for negations
+        # it could be that the total number of words of an answer is zero.
+        if word_count > 0:
+            percentage_positive = TO BE COMPLETED
+        else:
+            percentage_positive = "NA"
+
+
+        # Remove line breaks of the text that you write to the csv.
+        # Line breaks would mess up your output file.
+        # In addition to line breaks, you may also want to remove extra
+        # whitespaces and tabs at the beginning and end.
+        answer_text_print = re.sub(TO BE COMPLETED)
+        # replace the symbol that you use as delimiter, e.g., semicolon
+        answer_text_print = re.sub(TO BE COMPLETED, answer_text_print)
+
+        # Write the call-ID, answer-ID, total number of words, number of positive words
+        # adjusted for negations, percentage of positive words adjusted for negations,
+        # and the edited answer text to the output file
+        output_csv_file.write(TO BE COMPLETED+'\n')
+
+    # Close files
+    input_file_answer.close()
+
+print("Finished")
+output_csv_file.close()