-

   rss_rss_hh_new

 - e-mail

 

 -

 LiveInternet.ru:
: 17.03.2011
:
:
: 51

:


[ ] ( )

, 17 2017 . 19:03 +
- , . , , , , , , ?

:
, ,
- . ! . - (, ), - (, sqlite) - -, . , , .

, . 4543 . , , , .

, id, url:

https://www.mos.ru/otvet-stroitelstvo/itogi-golosovaniya-zhitelej-po-proektu-programmy-renovacii/?u=121

, , , - , , , , ( , , ) (? ).

? , , . , . , . - , .

- - , , , .

import requests
r = requests.get('https://www.mos.ru/otvet-stroitelstvo/itogi-golosovaniya-zhitelej-po-proektu-programmy-renovacii/?u=121')
print(r.text)

, , - . , . , . , , , , , , .



! , , .

, - www.mos.ru/altmosprx/api/1/renovation/house_result/121 JSON :

{
  "execTime": 0.044450044631958,
  "errorMessage": "",
  "result": {
    "table": " ... 
0G6O4

bf659227e8e3

5f9403659209

18.05

18.05

0G6O5

3f12be5cea77

15.05

0G6V1

5acd126a410ea1a842e67066ea68fa8f

24.05

", "total": { "und": 0, "za": 100, "protocol_res": 0, "protiv": 0, "gorod_mark": 0, "protocol_date": null, "house_status": 1, "gorod": 0 }, "und_table": "
", "address": " , 63, 2" }, "request_id": "empty_requestid", "errorCode": 0 }

, , . - apartment-id ( ), , .

-, ? 4543. 0 . -1 , . . 10000 . , 4543. , - , - , : , , , 2, id: 440931. , .

loop' , concurrent.futures. , , - asyncio, . . , , , . , .. . . :

import requests
from concurrent.futures import ProcessPoolExecutor
import concurrent.futures

def check(url):
    #try  ,   -,    ,    
    #            
    try:
        r = requests.get('https://www.mos.ru/altmosprx/api/1/renovation/house_result/' + str(url) + '/', timeout=10)
        print(url)
        r.encoding = 'utf-8'
        if '400: Bad Request' not in r.text:
            return str(url)
    except:
    #  ,        ,     
        woops = str(url) + ' failed'
        return woops
    
results = []
with ProcessPoolExecutor(max_workers=6) as executor:
    future_results = {executor.submit(check, url): url for url in range(0, 1000000)}
    #   ,      : url in somelist
    for future in concurrent.futures.as_completed(future_results):
        results.append(future.result())

results[:] = [item for item in results if item or item == 0]
#check  None,   , ;     ,    

with open('/home/deb/mosres.txt', 'w') as f:
    for item in results:
        f.write('{}\n'.format(item))

6 worker'. , , , - 70 , , 10 . , .
, , , ( ).

, , , .. . sqlite3.

. , , .. , , , .

import sqlite3

schema = "CREATE TABLE `houses` (\
	`id`	INTEGER PRIMARY KEY,\
      `street` TEXT NOT NULL ,\
	`house_nbr`	TEXT NOT NULL,\
 	`house_additional`	TEXT,\
	`total_votes`	INTEGER,\
	`total_za`	INTEGER,\
	`meeting`	INTEGER DEFAULT '0',\
      `flats` INTEGER\
);"

conn = sqlite3.connect('renovation.db')
cur = conn.cursor()
db = cur.execute(schema)
conn.commit()
conn.close()

, ! , , JSON- ( , JSON) ( , ), , , .

import requests
import re
import sqlite3

def gethouseinfo(idd):
    print(idd)
    urly = 'https://www.mos.ru/altmosprx/api/1/renovation/house_result/' + str(idd) + '/'
    try:
        r = requests.get(urly) 
        r.encoding = 'utf-8'
        results = r.json()
        adress = results['result']['address']
        print(adress)
        if re.match('(.*), (.*), (.*)', adress):
            adress_street = re.match('(.*), (.*), (.*)', adress).group(1)
            adress_house = re.match('(.*), (.*), (.*)', adress).group(2)
            adress_building = re.match('(.*), (.*), (.*)', adress).group(3)
        else:
            adress_street = re.match('(.*), (.*)', adress).group(1)
            adress_house = re.match('(.*), (.*)', adress).group(2)
            adress_building = ''
        totalvotes = len(re.findall('apartment-id', results['result']['table'])) + len(re.findall('apartment-id', results['result']['und_table']))
        aye = results['result']['total']['za']
        meetinghappened = bool(results['result']['total']['protocol_res'])
        iddlist = []
        iddlist.append(idd)
        check = cur.execute('SELECT * FROM houses WHERE id=?', iddlist)
        res = check.fetchone()
        if res:
            print('already exists')
        else:
            insert = cur.execute('INSERT INTO houses (id, street, house_nbr, house_additional, total_votes, total_za, meeting) values (?, ?, ?, ?, ?, ?, ?)', [idd, adress_street, adress_house, adress_building, totalvotes, aye, meetinghappened])
            print('added ' + str(idd))
    except ValueError:
        print('no data for id '+ str(idd))
        jsonerror.append(idd)
    except:
        print('unknown eggog')
        unknownerror.append(idd)

jsonerror = []
unknownerror = []
with open('/home/deb/mosres.txt') as fc:
    mosres = fc.read().splitlines()

conn = sqlite3.connect('/home/deb/renovation.db')
cur = conn.cursor()

for house in mosres:
    gethouseinfo(house)

conn.commit()
conn.close()

if jsonerror:
    with open('/home/deb/jsonerror.txt', 'w') as f:
        for item in jsonerror:
            f.write('{}\n'.format(item))
            
if unknownerror:
    with open('/home/deb/unknownerror.txt', 'w') as f:
        for item in unknownerror:
            f.write('{}\n'.format(item))

, -. , . (-!) !

( , , , , , 66 , ):



:



, . . , , , . , 2, , , ? ! .

tvoyadres.ru/doma, , , , . ? ( , ), , , , , , , . , ? tvoyadres.ru/ulitsy 200 , , . , - ?

, , : tvoyadres.ru/moskovskaya-oblast/goroda/551



!
http://tvoyadres.ru/js/street.php?region=81&city=&count=2073&_=1499809159225
( ). , , . , , , , . , tvoyadres.ru/js/street.php?region=81&city=&count=2073 . , , , 100 . .

0 . 1 . 2 , . 100 100 . 200 100 . 2073 2173? , , . 2174?

Critical Error

You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '-1, 100' at line 1

-. , , count LIMIT SELECT-, 100 , , , count 2173. , sql- , , , . . . , .

, :

def getstreets(num):
    r = requests.get(url + str(num))
    results = r.json()
    result = results['string']
    return(result)

for i in range(1, 2272, 100):
    totalres += getstreets(i)


. html-. , - , .. - , .

sids = re.findall('ulitsy\/(.*?)">(.*?)<\/a>', totalres)
#sids = streets with ids
streetsdict = {}

for i in range(len(sids)):
    key = sids[i][1]
    value = sids[i][0]
    streetsdict[key] = value

, html. . BeautifulSoup.

, , , . , .

import re
from bs4 import BeautifulSoup

def gethouses(num):
    r = requests.get('http://tvoyadres.ru/moskovskaya-oblast/moskva/ulitsy/' + str(num) + '/')
    results = r.text
    soup = BeautifulSoup(results, 'html.parser')
    ul = soup.find("ul", {"class": "next"})
    houses = []
    try:
        for li in ul.find_all("li"):
            urly = li.a['href']
            urly = re.search('doma\/(.*)\/', urly).group(1)
            houses.append([li.get_text(), urly])
        return(houses)
    except:
        print('None')
        return('None')

totalyres = {}

for key in sids:
    num = sids[key]
    totalyres[num] = gethouses(num)

. , , , . loop', , totre , . .

, ! .

for key in totre:
    urlo = 'http://tvoyadres.ru/moskovskaya-oblast/moskva/ulitsy/' + key + '/'
    ra = requests.get(urlo)
    try:
        streetname = re.search('
  • /docs.python.org/3/library/difflib.html">difflib, . , , , , , . , , , - , - , :

    conn = sqlite3.connect('renovation.db')
    cur = conn.cursor()
    streets = cur.execute('SELECT DISTINCT street FROM houses order by street asc')
    streeets = streets.fetchall()
    conn.close()
    
    exactmatches = {}
    keyslist = []
    for key in sids.keys():
        keyslist.append(key)
    
    def glue(maxres=3, freq=0.6):
        for each in streeets:
            eachnoyo = each[0].replace('', '')
            diffres = difflib.get_close_matches(eachnoyo, keyslist, maxres, freq)
            if each[0] not in exactmatches.keys():
                if len(diffres) == 1:
                    print(each[0] + ': ' + diffres[0])
                    notcompleted = False
                    while notcompleted == False:
                        inp = input('Correct? y/n ')
                        if inp == 'y':
                            notcompleted = True
                            exactmatches[each[0]] = sids[diffres[0]]
                        elif inp == 'n':
                            notcompleted = True
                        else:
                            print('Incorrect input, try again')
                elif len(diffres) == 0:
                    print('No matches for ' + each[0])
                elif len(diffres) > 1:
                    print(each[0] + ': ' + str(diffres))
                    notcompleted = False
                    while notcompleted == False:
                        inp = input('List number? Or n ')
                        try:
                            listnum = int(inp)
                        except:
                            listnum = None
                        if inp == 'n':
                            notcompleted = True
                        elif listnum in range(0, len(diffres)):
                            notcompleted = True
                            exactmatches[each[0]] = sids[diffres[0]]
                        else:
                            print('Incorrect input, try again')
        with open('exactmatches.json', 'w') as f:
            json.dump(exactmatches, f, ensure_ascii=False) 

    , , . , , glue(10, freq=0.4).



    506 700 -, , , ( ).

    , , , . .

    conn = sqlite3.connect('renovation.db')
    cur = conn.cursor()
    allhouses = cur.execute('SELECT * FROM houses WHERE flats IS NULL ORDER BY id')
    allhousesres = allhouses.fetchall()
    
    url2 = 'http://tvoyadres.ru/moskovskaya-oblast/moskva/'
    
    
    def getnumberofflats(streetname, houseid):
        urlo = url2 + str(streetname) + '/doma/' + str(houseid) + '/'
        r = requests.get(urlo)
        results = r.text
        numbe = re.search(' <\/span> (\d*)<', results).group(1)
        return numbe
    
    def gluehousesnumbers(freq=3, ratio=0.6):
        for house in allhousesres:
            if house[1] in exactmatches.keys():
                housenbr = house[2].replace('', '')
                if house[3]:
                    housenbr = housenbr + ' ' + house[3]
                housenbr = housenbr.lower()
                diffres = difflib.get_close_matches(housenbr, totre[exactmatches[house[1]]].keys(), freq, ratio)
                if len(diffres) == 1:
                    print(housenbr + ': ' + diffres[0])
                    notcompleted = False
                    while notcompleted == False:
                        inp = input('Correct? y/n ')
                        if inp == 'y':
                            notcompleted = True
                            try: 
                                flatsnumber = getnumberofflats(totre[exactmatches[house[1]]]['streetname'], totre[exactmatches[house[1]]][diffres[0]])
                                insertion = cur.execute('UPDATE houses SET flats = ? WHERE id = ?', [flatsnumber, house[0]])
                            except:
                                print('weird, no flat number for ' + str(house))
                        elif inp == 'n':
                            notcompleted = True
                        else:
                            print('Incorrect input, try again')
                elif len(diffres) > 1:
                    print(housenbr + ': ' + str(diffres))
                    notcompleted = False
                    while notcompleted == False:
                        inp = input('List number? Or n ')
                        try:
                            listnum = int(inp)
                        except:
                            listnum = None
                        if inp == 'n':
                            notcompleted = True
                        elif listnum in range(0, len(diffres)):
                            notcompleted = True
                            try:
                                flatsnumber = getnumberofflats(totre[exactmatches[house[1]]]['streetname'], totre[exactmatches[house[1]]][diffres[0]])
                                insertion = cur.execute('UPDATE houses SET flats = ? WHERE id = ?', [flatsnumber, house[0]])
                            except:
                                print('weird, no flat number for ' + str(house))
                        else:
                            print('Incorrect input, try again')
    
    conn.commit()
    conn.close()
    

    . . , .

    , . , , , , 35 35, 35 ''. , , , , , , - . , .



    : 3592 4500 ! ( ). , , .



    43 , , . , , .

    . , . , , , .. , .

    :



    , :



    , 80% , , , 30%, 40% 80%, , . 100%- 100%- . 58,7%.

    ? , . ? , .

    , - .
    Original source: habrahabr.ru (comments, light).

https://habrahabr.ru/post/333540/

:  

: [1] []
 

:
: 

: ( )

:

  URL