Skip to content

Commit d729f0e

Browse files
Alexei BezborodovAlexei Bezborodov
Alexei Bezborodov
authored and
Alexei Bezborodov
committed
parse gover nnov
1 parent 663cb81 commit d729f0e

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

parse_government_nnov.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2022 Alexei Bezborodov. Contacts: <[email protected]>
4+
# License: Public domain: http://unlicense.org/
5+
6+
from bs4 import BeautifulSoup
7+
from urllib2 import urlopen
8+
import ssl
9+
10+
#import os; import locale; os.environ["PYTHONIOENCODING"] = "utf-8"; myLocale=locale.setlocale(category=locale.LC_ALL, locale="ru_RU.UTF-8");
11+
12+
base_url = "https://government-nnov.ru/"
13+
url = base_url + "events/interview"
14+
15+
context_ssl = ssl._create_unverified_context()
16+
html_gover_doc = urlopen(url, context=context_ssl).read()
17+
18+
soup = BeautifulSoup(html_gover_doc, "html.parser", from_encoding="utf-8")
19+
20+
titles = soup.find("div", class_ = "tiles tile-list")
21+
articles = titles.find_all("dl")
22+
num_art = 0
23+
for article in articles:
24+
num_art += 1
25+
ref = article.find("a")
26+
if ref != None:
27+
#print(base_url + ref['href'])
28+
art_url = base_url + ref['href']
29+
html_art_doc = urlopen(art_url, context=context_ssl).read()
30+
soup_article = BeautifulSoup(html_art_doc, "html.parser", from_encoding="utf-8")
31+
content = soup_article.find("div", class_ = "content")
32+
33+
header = content.find("h1", class_="h1-reduced");
34+
lead = content.find("p", class_="lead");
35+
36+
print(num_art, ":")
37+
print(header.string.encode('utf-8', errors='ignore'))
38+
print()
39+
print(num_art, ":")
40+
print(lead.string.encode('utf-8', errors='ignore'))
41+
print('----------------------------------------------')

parse_government_nnov_request.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2022 Alexei Bezborodov. Contacts: <[email protected]>
4+
# License: Public domain: http://unlicense.org/
5+
6+
from bs4 import BeautifulSoup
7+
import requests
8+
9+
base_url = "https://government-nnov.ru/"
10+
url = base_url + "events/interview"
11+
request = requests.get(url, verify = False)
12+
13+
soup = BeautifulSoup(request.text, "html.parser")
14+
15+
tiles = soup.find("div", class_ = "tiles tile-list")
16+
articles = tiles.find_all("dl")
17+
for article in articles:
18+
ref = article.find("a")
19+
if ref != None:
20+
print(ref['href'])

0 commit comments

Comments
 (0)