Grabbing with Python and Grab framework The program uses one of the sites of the Russian court site as a source of data, in code, at https://vytegorsky--vld.sudrf.ru. It searches for the specified words in the list of assigned cases from the current day to the appointed day and sends the search result to the e-mail. In the search_list list, the words that will be searched are assigned. The number of days viewed, starting from the current, in which the search will occur is set DaysFuture variable. The settings for the SMTP server and the sender and recipient mailboxes are also set. The process of running the logged code in the example_app.log file. SearchOnTheWebsiteExample-en.py # Search for character lines on the court's website in the # section "To-do List" of the hearing days: # - with the sending of search results to email; # - with the logging of the execution process. from grab import Grab from datetime import datetime, timedelta import smtplib from smtplib import email from email.mime.text import MIMEText import logging # logging setting logger = logging.getLogger("example_app") logger.setLevel(logging.INFO) fh = logging.FileHandler("example_app.log") formatter = logging.Formatter('%(asctime)s %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) # to log logger.info("") logger.info("Start") # number of days starting from current DaysFuture = 5 # column [0]: search words as string # column [1]: list of page dates on which the search was successful search_list = [ \ [u'SEARCH_WORD_1',[]], \ [u'SEARCH_WORD_2',[]], \ ] # set options for sending mail messages # SMTP server, for example smtp.gmail.com smtp_server = "smtp.gmail.com" port = 465 # sender, for example sender@gmail.com email_from = "SENDER" # password password = "EMAIL_PASSWORD" # recipient, for example recipient@gmail.com email_dest = 'RECIPIENT' # email subject subject = u"Info on " + datetime.now().strftime("%Y-%m-%d") # to log logger.info("Grab start") g = Grab() # setting grab code page and timeout def prepare(self): self.setup_grab(document_charset='windows-1251') self.setup_grab(timeout = 10) # cycle by date for CounterDays in range(0,DaysFuture): CounterDate = (datetime.now() + timedelta(days=CounterDays)).strftime("%d.%m.%Y") url = 'https://vytegorsky--vld.sudrf.ru/modules.php?name=sud_delo&srv_num=1&H_date=' + CounterDate counter_go = 0 # after 10 failed attempts to get data, write to log and go to the next date while True: try: g.go(url) break except Exception as e: counter_go = counter_go + 1 if counter_go > 10: logger.exception("Grab error: " + url) break continue # moving on the list of search words (column [0]), # when the word is successfully found in the text of the page, # we save the date of the page in the result column (column [1]), # as an element of the list for row in search_list: if g.doc.text_search(row[0].encode('cp1251'), byte=True): row[1].append(CounterDate) # to log logger.info("Grab completed") # to log logger.info("Start sending an email message") # Generate Message text email_text = '' for row in search_list: if len(row[1]) > 0: if len(email_text) != 0: email_text = email_text + '\n' email_text = email_text + row[0] + ': ' email_text_line = '' for one_date in row[1]: if len(email_text_line) != 0: email_text_line = email_text_line + u',' email_text_line = email_text_line + one_date email_text = email_text + email_text_line # Send an email message m = MIMEText(email_text) m['Subject'] = subject m['From'] = email_from m['To'] = email_dest try: server = smtplib.SMTP_SSL(smtp_server, port) server.login(email_from, password) server.sendmail(email_from, email_dest, m.as_string()) server.quit() # to log logger.info("Send mail message complete") except Exception as e: # to log logger.exception("Error sending mail message") # to log logger.info("Completed") en/grabbing_of_the_court_site_using_python.txt Last modified: 2018/06/27 22:22by 2SRTVF