Добрый день, уважаемые форумчане! Прошу сильно не судить, программированием начал заниматься недавно) Решил написать небольшой парсер для групп в соц сети Вконтакте, но он у меня почему-то висит и не даёт никаких результатов((( Пожалуйста, подскажите, где я мог ошибиться, и как эту ошибку можно исправить, буду очень признателен!!!
Python:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import openpyxl
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#работает с цифровыми и буквенными id
import sys
import vk
import auth_vk
from openpyxl import load_workbook
import pandas as pd
from os.path import join, abspath
import pandas as pd
import requests
import time
from os.path import join, abspath
from openpyxl.utils.dataframe import dataframe_to_rows
print('Здравствуйте!')
print('Операция займёт продолжительное время, пожалуйста, подождите.')
print('Начинаю парсинг групп....')
V = '9.95' # Версия VK API
session = auth_vk.auth_vk_token()
api = vk.API(session, v=V)
def get_members(group_id, fil=""):
return api.groups.getMembers(group_id=group_id, filter=fil)['items']
def get_all_members(group_id):
members = api.groups.getMembers(group_id=group_id)
count = members['count']
offset = 1000
members = members['items']
while offset < count:
members.extend(api.groups.getMembers(group_id=group_id, count=1000, offset=offset)['items'])
offset += 1000
return members
def get_all_members_3(group_id):
# Возвращает членов группы с городами
members = api.groups.getMembers(group_id=group_id, fields='city')
count = members['count']
offset = 1000
members = members['items']
while offset < count:
members.extend(api.groups.getMembers(group_id=group_id, fields='city', count=1000, offset=offset)['items'])
offset += 1000
return members
#чтение таблицы
excel_data_df = pd.read_excel('.\group\groups.xlsx', sheet_name='Sheet')
lst = excel_data_df['ID'].tolist()
data = []
bd_dt_city = list(lst)
dt_city = pd.DataFrame.from_dict(bd_dt_city)
counter_tm_3 = dt_city.values.size
# сбор информации по группе
counter_tm_2 = 0
bd_cityy = []
while counter_tm_2 <= counter_tm_3 - 1:
counter_tm_1_id_gr = dt_city.values[counter_tm_2][0]
try:
def info_group():
if len(sys.argv)>1:
group_id = sys.argv[1]
else:
group_id = counter_tm_1_id_gr # ввод id групп
members = get_all_members_3(group_id) #ввод айди группы
for member in members:
keys = {'title'} #переменная для вывода города
for citys in member: #цикл для вывода группы
for attribute in keys:
try:
print(member['city']['title'])
except KeyError:
pass
data_group = info_gr.json()['response']
return data_group
info_group = info_grup()
name_group = info_group[0]['name']
id_group = info_group[0]['id']
screen_name_group = info_group[0]['screen_name']
quantity_group = info_group[0]['members_count']
except:
pass # прописать алгоритм при битом id
def take_bdd():
offset = 0
count = 1000
value = 'city'
all_date = []
while offset < quantity_group:
response = members.extend(api.groups.getMembers(group_id=group_id, count=1000, offset=offset)['items'])
data = response.json()['response']['items']
offset += 1000
all_date.extend(data)
time.sleep(0.25)
return all_date
def pars_city():
counter_1 = 0
bd_cityy = []
while counter_1 <= quantity_group - 1:
try:
city_name = all_date[counter_1]['city']['title']
except:
city_name = '#'
counter_1 += 1
bd_cityy.append(city_name)
return bd_cityy
bd_city = pars_city()
# обьеденение 2х фреймов данных
dt_all_dataa = pd.DataFrame.from_dict(all_dataa)
dt_bd_city = pd.DataFrame.from_dict(bd_city)
dt = dt_all_dataa.join(dt_bd_city) # обьедение 2х фреймвоф
bd_1 = dt[['id', 'first_name', 'last_name', 0]]
bd = bd_1.rename(columns={0: 'city'}) # переименование столбца
bd['group_id'] = id_group
excel_data_df = pd.read_excel('.\group\groups.xlsx', sheet_name='Sheet')
lst = excel_data_df['ID'].tolist()
tm_bd_1 = [id_group, screen_name_group, quantity_group]
while counter_tm_5 <= counter_bd_city_tm:
city_tm_1 = reading_bd_city_tm[counter_tm_5]
f_tm = bd[bd.city == city_tm_1].reset_index(
drop=True)
counter_tm_4_city = f_tm.values.__len__()
enterGroup = counter_tm_4_city
tm_bd_1.append(enterGroup)
counter_tm_5 += 1
# tm_bd_2 = pd.DataFrame.from_dict(tm_bd_1, orient='index', columns=['id','name','total','Aznakaevo', 'Alkeevo', 'Almetyevsk', 'Arsk', 'Bugulma', 'Buinsk', 'Elabuga', 'Zainsk', 'Zelenodolsk', 'Kukmor', 'Leninogorsk', 'Nizhnekamsk', 'Nurlat', 'Naberezhnye Chelny', 'Kazan'])
# df3 = pd.concat([df3,tm_bd_2], axis=0)
data.append(tm_bd_1)
counter_tm_2 += 1
# df3 = pd.DataFrame({'id':[0],'name':[0],'total':[0],'Aznakaevo':[0], 'Alkeevo':[0], 'Almetyevsk':[0], 'Arsk':[0], 'Bugulma':[0], 'Buinsk':[0], 'Elabuga':[0], 'Zainsk':[0], 'Zelenodolsk':[0], 'Kukmor':[0], 'Leninogorsk':[0], 'Nizhnekamsk':[0], 'Nurlat':[0], 'Naberezhnye Chelny':[0], 'Kazan':[0]})
m_bd_2 = pd.DataFrame.from_dict(data)
m_bd_2.rename(columns={0: 'id', 1: 'sceen_name', 2: 'total', 3: 'Aznakaevo', 4: 'Alkeevo', 5: 'Almetyevsk', 6: 'Arsk', 7: 'Bugulma', 8: 'Buinsk', 9: 'Elabuga', 10: 'Zainsk', 11: 'Zelenodolsk', 12: 'Kukmor', 13: 'Leninogorsk', 14: 'Nizhnekamsk', 15: 'Nurlat', 16: 'Naberezhnye Chelny', 17: 'Kazan'}, inplace=True)
path_result = join('.', 'RESULT=.xlsx')
path_result = abspath(path_result)
# Specify a writer
writerr = pd.ExcelWriter(path_result, engine='xlsxwriter')
m_bd_2.to_excel(writerr, index=False)
writerr.save()
export = open(path_result, "rb")
print('Успешно!')