-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScrapingDynamicallyCreatedTables.py
48 lines (36 loc) · 1.19 KB
/
ScrapingDynamicallyCreatedTables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 2 10:12:11 2018
@author: sarfraz
"""
from selenium import webdriver
import pandas as pd
import sys
#URL from where the tables will be scraped
URL= 'http://www.scstrade.com/stockscreening/SS_CompanySnapShotYR.aspx?symbol=FATIMA'
#define driver as firefox webdriver
driver = webdriver.Firefox()
#loads the page in firefox
driver.get(URL)
#get the html element at a specific xpath
element = driver.find_element_by_xpath('//form[1]')
#extract the html from that element
element_html=element.get_attribute('innerHTML')
print(element_html);
#Use try except block to extract the tables from html and to catch the exception gracefully if the table doesnot exist
try:
#Read all tables in the response into a list of dataframes
dataframes = pd.read_html(element_html)
#close the browser / webdriver
driver.close()
#Incase no table is found print "No table found" and exit gracefully
except:
print("No table found")
#close the browser / webdriver
driver.close()
#exit program
sys.exit(0)
#Iterate through the data frames to access each table
for dataframe in dataframes:
print(dataframe)