This is an old revision of the document!

Grabbing with Python and Grab framework

The program uses one of the sites of the Russian court site as a source of data, in code, at
It searches for the specified words in the list of assigned cases from the current day to the appointed day and sends the search result to the e-mail.

In the search_list array, the words that will be searched are assigned.
The number of days viewed, starting from the current, in which the search will occur is set DaysFuture variable.
The settings for the SMTP server and the sender and recipient mailboxes are also set. The process of running the logged code in the example_app.log file.
# Search for character lines on the court's website in the 
#   section "To-do List" of the hearing days:
#   - with the sending of search results to email;
#   - with the logging of the execution process.
from grab import Grab
from datetime import datetime, timedelta
import smtplib
from smtplib import email
from email.mime.text import MIMEText
import logging
# logging setting
logger = logging.getLogger("example_app")
fh = logging.FileHandler("example_app.log")
formatter = logging.Formatter('%(asctime)s  %(message)s')
# to log"")"Start")
# number of days starting from current
DaysFuture = 5
# column [0]: search words as string
# column [1]: an array of page dates on which the search was successful
search_list = [ \
    [u'SEARCH_WORD_1',[]],  \
    [u'SEARCH_WORD_2',[]],  \
# set options for sending mail messages
# SMTP server, for example
smtp_server = "" 
port = 465  
# sender, for example
email_from = "SENDER"
# password
password = "EMAIL_PASSWORD"
# recipient, for example
email_dest = 'RECIPIENT'
# email subject
subject = u"Info on " +"%Y-%m-%d")
# to log"Grab start")
g = Grab()
# setting grab code page and timeout 
def prepare(self):
   self.setup_grab(timeout = 10)
# cycle by date
for CounterDays in range(0,DaysFuture):
    CounterDate = ( + timedelta(days=CounterDays)).strftime("%d.%m.%Y")
    url = '' + CounterDate
    counter_go = 0
    # after 10 failed attempts to get data, write to log and go to the next date
    while True:
        except Exception as e:
            counter_go = counter_go + 1
            if counter_go > 10:
                logger.exception("Grab error: " + url)
    # moving on the array of search words (column [0]), 
    #  when the word is successfully found in the text of the page, 
    #  we save the date of the page in the result column (column [1]), 
    #  as an element of the array
    for row in search_list:
        if g.doc.text_search(row[0].encode('cp1251'), byte=True):
# to log"Grab completed")
# to log"Start sending an email message")
# Generate Message text
email_text = ''
for row in search_list:
    if len(row[1]) > 0:
        if len(email_text) != 0:
            email_text = email_text + '\n'
        email_text =  email_text + row[0]  + ': '
        email_text_line = ''
        for one_date in row[1]:
            if len(email_text_line) != 0:
                email_text_line = email_text_line + u','
            email_text_line =  email_text_line + one_date 
        email_text = email_text + email_text_line
# Send an email message
m = MIMEText(email_text)
m['Subject'] = subject
m['From'] = email_from
m['To'] = email_dest
    server = smtplib.SMTP_SSL(smtp_server, port)
    server.login(email_from, password)
    server.sendmail(email_from, email_dest, m.as_string())
    # to log"Send mail message complete")
except Exception as e:
    # to log
    logger.exception("Error sending mail message")
# to log"Completed")
  • en/grabbing_of_the_court_site_using_python.1527236139.txt.gz
  • Last modified: 2018/05/25 08:15
  • by 2SRTVF