#!/usr/bin/python3

import pickle

# Chargement des données.
#
# Il faudra vraisemblablement adapter le chemin vers le fichier wooldridge.pkl.

fid = open('C:/Users/claire.loupias/Desktop/wooldridge23.pkl', 'rb')
alldatasets = pickle.load(fid)
fid.close()

# Base de données nécessaire à l'exercice 3.2
dataset = alldatasets['ceosal2']
#
# Question 1.
#

import numpy as np

dataset['logged_salary'] = np.log(dataset['salary'])
dataset['logged_sales'] = np.log(dataset['sales'])
dataset['logged_mktval'] = np.log(dataset['mktval'])

import statsmodels.formula.api as smf

results = smf.ols('logged_salary ~ logged_sales + logged_mktval', data=dataset).fit()
print(results.summary())

#
# Question 2.
#

results = smf.ols('logged_salary ~ logged_sales + logged_mktval + profits', data=dataset).fit()
print(results.summary())

#
# Question 3.
#

results = smf.ols('logged_salary ~ logged_sales + logged_mktval + profits + ceoten', data=dataset).fit()
print(results.summary())

#
# Question 4.
#

from scipy.stats import pearsonr

corr, _ = pearsonr(dataset.logged_mktval, dataset.profits)

print("La corrélation entre log(mktval) et profits est : ", corr)

