НУЖНА ПОМОЧЬ УВЕЛИЧИТЬ ПРОЦЕНТ С 82-83 до 85-87. За помощь скину на карту мони

Oleg83838

Новичок
Пользователь
Мар 17, 2023
1
0
1
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
import datetime
import pickle
df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
columns_to_del = ['last_seen','occupation_name','life_main','people_main','id']
df.drop(columns_to_del,axis=1,inplace = True)
#print(df['city'].value_counts())

def make_city(city):
if city == 'Moscow':
return 1
elif city == 'Saint Petersburg':
return 2
else:
return 0
def make_agee(age):
if age >= 25:
return 1
return 0
def make_age(bdate):
date = bdate.split('.')
if len(date) > 2:
return 2023 - int(date[2])
return -1
def make_occupation(occupation_type):
if occupation_type != 'NaN':
if occupation_type == 'university':
return 1
if occupation_type == 'work':
return 2
return 0
def make_relation(relation):
def make_education_status(education_status):
if education_status == 'Undergraduate applicant':
return 1
elif education_status == "Student (Bachelor's)":
return 2
elif education_status == "Alumnus (Bachelor's)":
return 3
elif education_status == "Student (Master's)":
return 4
elif education_status == "Alumnus (Master's)":
return 5
elif education_status == 'Student (Specialist)':
return 6
elif education_status == 'Alumnus (Specialist)':
return 7
elif education_status == 'PhD':
return 8
elif education_status == 'Candidate of Sciences':
return 9
else:
return 0
df['education_form'].fillna(0, inplace = True)
def edu_form(education_form):
if education_form == 'Full-time':
return 1
elif education_form == 'Distance Learning':
return 2
elif education_form == 'Part-time':
return 3
else:
return 0
df['education_form'] = df['education_form'].apply(edu_form)
def make_eng(langs):
langs = langs.split(';')
if 'English' in langs:
return 1
return 0



def make_expirience(row):
if row['career_start'] == 'False':
return 0
if row['career_end'] == 'False':
return datetime.date.today().year - int(row['career_start'])
return abs(int(row['career_end']) - int(row['career_start']))
df['bdate'].fillna(
df['bdate'].value_counts().index[0],inplace=True)
df['has_photo'].fillna(0, inplace = True)
df['has_mobile'].fillna(0, inplace = True)
df['age'] = df['bdate'].apply(make_age)
df.drop('bdate',axis=1, inplace=True)
df['age'] = df['age'].apply(make_agee)
df['occupation_type'] = df['occupation_type'].apply(make_occupation)
df['langs'] = df['langs'].apply(make_eng)
df['city'] = df['city'].apply(make_city)
df['expirience'] = df.apply(make_expirience, axis = 1)
df.drop(['career_start','career_end'],axis = 1, inplace = True)
df['education_status'] = df['education_status'].apply(make_education_status)
df['education_form'] = df['education_form'].apply(edu_form)


x = df.drop('result', axis = 1)
y = df['result']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
classifier = KNeighborsClassifier(n_neighbors = 199)
classifier.fit(x_train,y_train)
y_pred = classifier.predict(x_test)
test_df['bdate'].fillna(
test_df['bdate'].value_counts().index[0],inplace=True)
test_df['has_photo'].fillna(0, inplace = True)
test_df['has_mobile'].fillna(0, inplace = True)
test_df['langs'].fillna(0, inplace = True)
test_df.drop(columns_to_del, axis=1, inplace=True)
test_df['age'] = test_df['bdate'].apply(make_age)
test_df.drop('bdate',axis=1, inplace=True)
test_df['age'] = test_df['age'].apply(make_agee)
test_df['education_form'].fillna(0, inplace = True)
test_df['city'] = test_df['city'].apply(make_city)
test_df['education_form'] = test_df['education_form'].apply(edu_form)
test_df['occupation_type'] = test_df['occupation_type'].apply(make_occupation)
test_df['langs'] = test_df['langs'].apply(make_eng)
test_df['expirience'] = test_df.apply(make_expirience, axis = 1)
test_df.drop(['career_start','career_end'],axis = 1, inplace = True)
test_df['education_status'] = test_df['education_status'].apply(make_education_status)
test_df['result'] = classifier.predict(test_df)
print(test_df.info())
print(accuracy_score(y_test,y_pred) * 100)
print(df['age'].value_counts())
 

regnor

Модератор
Команда форума
Модератор
Июл 7, 2020
2 581
457
83
ничего не понятно...
и код вставьте как код, соблюдая отступы - https://itfy.org/threads/kak-ne-nado-zadavat-voprosy.3450/#post-13566
 

Форум IT Специалистов