import requests
import csv
from bs4 import BeautifulSoup
import re
filename = 'links.txt'
with open(filename, "r") as fp:
lines = fp.readlines()
for line in lines:
print(line)
websitelink = line.strip()
response = requests.get(websitelink)
print(response.status_code)
html = response.text
multi_class = "site-wrap-in"
soup = BeautifulSoup(html, "html.parser")
def clearing_table(table): # Чистим таблицу от лишних столбцов
raw_table = str(table).split('\n')
new_table = []
for data in raw_table:
if not re.findall(r'наличие|цена|корзину', data.lower()):
new_table.append(data)
return '\n'.join(new_table)
# Удаляем внутренности
scripts_to_delete = soup.find_all('script')
if scripts_to_delete:
for script in scripts_to_delete:
script.extract()
new_text = str(soup)
else:
print('Скриптов не найдено')
forms_to_delete = soup.find_all('form')
if forms_to_delete:
for form in forms_to_delete:
form.extract()
new_text = str(soup)
else:
print('Форм не найдено')
# Закончили удалять внутренности
product_name = soup.find_all("div", {"class":"site-wrap-in"})
all_products = []
for product in product_name:
name = product.find("h1", {"class":"product-name-title"}).text
vendor = soup.find('div', class_='card-box cls').find('a').text
table = product.find("table", {"class":"card-table"})
clear_table = clearing_table(table)
all_products.append([name, vendor, clear_table])
with open(name.replace('/', '-').replace('"', '-') + ".csv", "w+", encoding='utf-8') as f:
writer = csv.writer(f, delimiter='\n')
for product in all_products:
writer.writerow(product)