import requests
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
response = requests.get(url)
#print(response.status_code)
#<Response [200]>
#https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
indicatorDict = {"Previous Close":[],
"Open":[],
"Bid":[],
"Ask":[],
"Day's Range":[],
"52 Week Range":[],
"Volume":[],
"Avg. Volume":[],
"Market Cap":[],
"Beta":[],
"PE Ratio (TTM)":[],
"EPS (TTM)":[],
"Earnings Date":[],
"Dividend & Yield":[],
"Ex-Dividend Date":[],
"1y Target Est":[]
}
htmlText = response.text
#print(htmlText)
Positioning
But
IndexError: list index out of range
Caused by
# Using HTML Code
solution:
then
Caused by diferent structure
#################################################
solution test:
splitList = htmlText.split("Dividend & Yield")
secondSplitList = splitList[1].split("\">")[1]
thirdSplitList = secondSplitList.split("</td")[0]
print(thirdSplitList)
###########################################All codes################################
import requests
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
response = requests.get(url)
#print(response.status_code)
#<Response [200]>
#https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
indicatorDict = {"Previous Close":[],
"Open":[],
"Bid":[],
"Ask":[],
"Day's Range":[], #Day's Range"
"52 Week Range":[],
"Volume":[],
"Avg. Volume":[],
"Market Cap":[],
"Beta":[],
"PE Ratio (TTM)":[],
"EPS (TTM)":[],
"Earnings Date":[],
"Dividend & Yield":[], #Dividend & Yield
"Ex-Dividend Date":[],
"1y Target Est":[]
}
htmlText = response.text
#print(htmlText)
specialSplitList=["Day's Range","52 Week Range","Dividend & Yield"]
for indicator in indicatorDict:
if indicator not in specialSplitList:
splitList = htmlText.split(indicator)
secondSplitList = splitList[1].split("\">")[2]
data = secondSplitList.split("</")[0]
else:
splitList = htmlText.split(indicator)
secondSplitList = splitList[1].split("\">")[1]
data = secondSplitList.split("</td")[0]
#print(indicator,": ",data)
indicatorDict[indicator].append(data)
print(indicatorDict)
output:
D:\Anaconda3\python.exe -i J:/PycharmProjects/scrapers/yahoo/apple.py
{'Previous Close': ['204.41'], 'Open': ['203.35'], 'Bid': ['203.97 x 1400'], 'Ask': ['203.95 x 1200'],
'Day's Range': ['202.90 - 205.07'], '52 Week Range': ['142.00 - 233.47'], 'Volume': ['17,265,518'],
'Avg. Volume': ['27,948,582'], 'Market Cap': ['931.955B'], 'Beta': ['1.09'], 'PE Ratio (TTM)': ['17.18'],
'EPS (TTM)': ['11.89'], 'Earnings Date': ['Jul 30, 2019'], 'Dividend & Yield': ['3.08 (1.52%)'],
'Ex-Dividend Date': ['2019-05-10'], '1y Target Est': ['212.03']}
>>>
########################Improved version#####################
import requests
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
response = requests.get(url)
#print(response.status_code)
#<Response [200]>
#https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
indicatorDict = {"Previous Close":[],
"Open":[],
"Bid":[],
"Ask":[],
"Day's Range":[],
"52 Week Range":[],
"Volume":[],
"Avg. Volume":[],
"Market Cap":[],
"Beta":[],
"PE Ratio (TTM)":[],
"EPS (TTM)":[],
"Earnings Date":[],
"Dividend & Yield":[],
"Ex-Dividend Date":[],
"1y Target Est":[]
}
htmlText = response.text
#print(htmlText)
specialSplitDict={"Day's Range":"Day's Range",
"52 Week Range": "52 Week Range",
"Dividend & Yield":"Dividend & Yield"}
for indicator in indicatorDict:
if indicator not in specialSplitDict:
splitList = htmlText.split(indicator)
secondSplitList = splitList[1].split("\">")[2]
data = secondSplitList.split("</")[0]
else:
splitList = htmlText.split(specialSplitDict[indicator])
secondSplitList = splitList[1].split("\">")[1]
data = secondSplitList.split("</td")[0]
#print(indicator,": ",data)
indicatorDict[indicator].append(data)
print(indicatorDict)
output:
{'Previous Close': ['204.41'], 'Open': ['203.35'], 'Bid': ['203.97 x 1400'], 'Ask': ['203.95 x 1200'], "Day's Range": ['202.90 - 205.07'], '52 Week Range': ['142.00 - 233.47'], 'Volume': ['17,265,518'], 'Avg. Volume': ['27,948,582'], 'Market Cap': ['939.678B'], 'Beta': ['1.09'], 'PE Ratio (TTM)': ['17.18'], 'EPS (TTM)': ['11.89'], 'Earnings Date': ['Jul 30, 2019'], 'Dividend & Yield': ['3.08 (1.52%)'], 'Ex-Dividend Date': ['2019-05-10'], '1y Target Est': ['212.03']}
##########################################################################################
import requests
from lxml import etree
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
req = requests.get(url)
html = req.content.decode('utf-8')
selector = etree.HTML(html)
#temp = selector.xpath('//div[@id="quote-summary"]//span/text()')[1]
#grandparent#../../==tr
temp = selector.xpath('//div[@id="quote-summary"]//span[contains(text(),"Previous Close")]/../../td[2]/span/text()')
#sibing
print(temp)
#############################################################################################
#!/usr/bin/python
#encoding:utf-8
"""
@author: LlQ
@contact:[email protected]
@file:appleXPath.py
@time: 7/7/2019 1:39 AM
"""
import requests
from lxml import etree
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
req = requests.get(url)
html = req.content.decode('utf-8')
selector = etree.HTML(html)
indicatorDict = {"Previous Close":[],
"Open":[],
"Bid":[],
"Ask":[],
"Day's Range":[],
"52 Week Range":[],
"Volume":[],
"Avg. Volume":[],
"Market Cap":[],
"Beta":[],
"PE Ratio (TTM)":[],
"EPS (TTM)":[],
"Earnings Date":[],
"Dividend & Yield":[],
"Ex-Dividend Date":[],
"1y Target Est":[]
}
specialSplitDict={"Day's Range":"Day's Range",
"52 Week Range": "52 Week Range",
"Dividend & Yield":"Dividend & Yield"}
for indicator in indicatorDict:
print( selector.xpath('//div[@id="quote-summary"]//span[contains(text(),indicator)]/../../td[2]/span/text()' ) )
#print(indicatorDict)
#grandparent#../../==tr
temp = selector.xpath('//div[@id="quote-summary"]//span[contains(text(),"Previous Close")]/../../td[2]/span/text()')
print(temp)
temp = selector.xpath('//div[@id="quote-summary"]//span[contains(text(),"Open")]/../../td[2]/span/text()')
print(temp)
temp = selector.xpath('//div[@id="quote-summary"]//span[contains(text(),"Bid")]/../../td[2]/span/text()')
print(temp)
output: 如果直接使用for循环,就会出现一个列表,必须使用format (大坑)
###############################################solutions#####################################
import requests
from lxml import etree
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
req = requests.get(url)
html = req.content.decode('utf-8')
selector = etree.HTML(html)
indicatorDict = {"Previous Close":[],
"Open":[],
"Bid":[],
"Ask":[],
"Day's Range":[],
"52 Week Range":[],
"Volume":[],
"Avg. Volume":[],
"Market Cap":[],
"Beta":[],
"PE Ratio (TTM)":[],
"EPS (TTM)":[],
"Earnings Date":[],
"Dividend & Yield":[],
"Ex-Dividend Date":[],
"1y Target Est":[]
}
specialSplitDict={"Day's Range":"Day's Range",
"52 Week Range": "52 Week Range",
"Dividend & Yield":"Dividend & Yield"}
for indicator in indicatorDict:
if indicator not in specialSplitDict:
data = selector.xpath('//div[@id="quote-summary"]//span[contains(text(),'
'"{0}")]/../../td[2]/span/text()'.format(indicator))
else:
data = selector.xpath('//div[@id="quote-summary"]//span[contains(text(),'
'"%s")]/../../td[2]/text()' % indicator)
indicatorDict[indicator].extend(data)
print(indicatorDict)
{'Previous Close': ['204.41'], 'Open': ['203.35'], 'Bid': ['203.92 x 1400'], 'Ask': ['204.03 x 1200'], "Day's Range": ['202.90 - 205.07'], '52 Week Range': ['142.00 - 233.47'], 'Volume': ['17,265,518', '27,948,582'], 'Avg. Volume': ['27,948,582'], 'Market Cap': ['939.678B'], 'Beta': ['1.09'], 'PE Ratio (TTM)': ['17.18'], 'EPS (TTM)': ['11.89'], 'Earnings Date': ['Jul 30, 2019'], 'Dividend & Yield': ['3.08 (1.51%)'], 'Ex-Dividend Date': ['2019-05-10'], '1y Target Est': ['212.03']}
###############################selenium + xpath + request ##############################
from selenium import webdriver
url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch"
browserDriver = webdriver.Chrome(executable_path='D:/chromedriver/chromedriver')
browserDriver.get(url)
#1. print page_source
print(browserDriver.page_source)
#2. look for a data in jason format/dict(actually the data is in <script> of html), such as 205.66 (previous close price)
# I found
#3. found the position of target in web page structure
elements = browserDriver.find_elements_by_xpath("html")
counter =1
for element in elements:
if "PreviousClose" in element.get_attribute("textContent"):
print(counter)
counter +=1
#4. write a function
def findXPath(elementTag, target, path):
if target in elementTag.get_attribute("textContent") and elementTag.tag_name == "script":
return path
#if not, I want to go deeper #"./": current directory
childrenElements = elementTag.find_elements_by_xpath("./*") # goes deeper
for childNode in childrenElements:
print(path+"/" + childNode.tag_name)
finalPath = findXPath(childNode,target,path+"/"+childNode.tag_name)
if finalPath != "":
return finalPath
return ""
uniqueElement = browserDriver.find_element_by_xpath("html")
print("The final path is: ", findXPath(uniqueElement, "PreviousClose", "html"))
# uniqueElement = browserDriver.find_element_by_xpath("html")
# print("The final path is: ", findXPath(uniqueElement, "PreviousClose", "html"))
# #html/body/script
elements = browserDriver.find_elements_by_xpath("html/body/script")
counter = 1
for element in elements:
if "PreviousClose" in element.get_attribute("textContent"):
print(counter)
counter +=1
#1 : the first one
# elements = browserDriver.find_elements_by_xpath("html/body/script")
# counter = 1
# for element in elements:
# if "PreviousClose" in element.get_attribute("textContent"):
# print(counter)
# counter +=1
# #1 : the first one
element = browserDriver.find_element_by_xpath("html/body/script[1]")
print(element.get_attribute("textContent"))
browserDriver.quit()
#############################################################################
import json def findJsonPath(jsonObject, target, path, matchType): if type(jsonObject) == matchType: if target in jsonObject: return path for key in jsonObject.keys(): finalPath = findJsonPath(jsonObject[key], target, path+","+key, matchType) if finalPath != "": return finalPath return "" element = browserDriver.find_element_by_xpath("html/body/script[1]") tempData = element.get_attribute("textContent").strip("(this));\n") #note the new line #the data is after "root.App.main = " tempData = tempData.split("root.App.main = ")[1][:-3] #dict, ";", "}" #print(tempData) jsonData = json.loads(tempData) #print(jsonData.keys()) #dict_keys(['context', 'plugins']) matchType = type(jsonData) print("Final Path is: ", findJsonPath(jsonData, "previousClose", "", matchType)) # #print("Final Path is: ", findJsonPath(jsonData, "previousClose", "", matchType)) # #Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,summaryDetail print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"]) #browserDriver.quit()
{'previousClose': {'raw': 202.59, 'fmt': '202.59'}, 'regularMarketOpen': {'raw': 203.65, 'fmt': '203.65'}, 'twoHundredDayAverage': {'raw': 185.24533, 'fmt': '185.25'}, 'trailingAnnualDividendYield': {'raw': 0.0144133475, 'fmt': '1.44%'}, 'payoutRatio': {'raw': 0.2446, 'fmt': '24.46%'}, 'volume24Hr': {}, 'regularMarketDayHigh': {'raw': 207.23, 'fmt': '207.23'}, 'navPrice': {}, 'averageDailyVolume10Day': {'raw': 18279083, 'fmt': '18.28M', 'longFmt': '18,279,083'}, 'totalAssets': {}, 'regularMarketPreviousClose': {'raw': 202.59, 'fmt': '202.59'}, 'fiftyDayAverage': {'raw': 197.43915, 'fmt': '197.44'}, 'trailingAnnualDividendRate': {'raw': 2.92, 'fmt': '2.92'}, 'open': {'raw': 203.65, 'fmt': '203.65'}, 'averageVolume10days': {'raw': 18279083, 'fmt': '18.28M', 'longFmt': '18,279,083'}, 'expireDate': {}, 'yield': {}, 'algorithm': None, 'dividendRate': {'raw': 3.08, 'fmt': '3.08'}, 'exDividendDate': {'raw': 1557446400, 'fmt': '2019-05-10'}, 'beta': {'raw': 1.091297, 'fmt': '1.09'}, 'circulatingSupply': {}, 'startDate': {}, 'regularMarketDayLow': {'raw': 203.62, 'fmt': '203.62'}, 'priceHint': {'raw': 2, 'fmt': '2', 'longFmt': '2'}, 'currency': 'USD', 'trailingPE': {'raw': 17.43249, 'fmt': '17.43'}, 'regularMarketVolume': {'raw': 21361692, 'fmt': '21.36M', 'longFmt': '21,361,692'}, 'lastMarket': None, 'maxSupply': {}, 'openInterest': {}, 'marketCap': {'raw': 953435750400, 'fmt': '953.44B', 'longFmt': '953,435,750,400'}, 'volumeAllCurrencies': {}, 'strikePrice': {}, 'averageVolume': {'raw': 26755958, 'fmt': '26.76M', 'longFmt': '26,755,958'}, 'priceToSalesTrailing12Months': {'raw': 3.688482, 'fmt': '3.69'}, 'dayLow': {'raw': 203.62, 'fmt': '203.62'}, 'ask': {'raw': 0, 'fmt': '0.00'}, 'ytdReturn': {}, 'askSize': {'raw': 900, 'fmt': '900', 'longFmt': '900'}, 'volume': {'raw': 21361692, 'fmt': '21.36M', 'longFmt': '21,361,692'}, 'fiftyTwoWeekHigh': {'raw': 233.47, 'fmt': '233.47'}, 'forwardPE': {'raw': 16.537909, 'fmt': '16.54'}, 'maxAge': 1, 'fromCurrency': None, 'fiveYearAvgDividendYield': {'raw': 1.66, 'fmt': '1.66'}, 'fiftyTwoWeekLow': {'raw': 142, 'fmt': '142.00'}, 'bid': {'raw': 0, 'fmt': '0.00'}, 'tradeable': True, 'dividendYield': {'raw': 0.0152, 'fmt': '1.52%'}, 'bidSize': {'raw': 4000, 'fmt': '4k', 'longFmt': '4,000'}, 'dayHigh': {'raw': 207.23, 'fmt': '207.23'}}
Previous Close
'previousClose': {'raw': 202.59, 'fmt': '202.59'},
Open
'open': {'raw': 203.65, 'fmt': '203.65'},
Bid
'bid': {'raw': 207.25, 'fmt': '207.25'},
'bidSize': {'raw': 4000, 'fmt': '4k', 'longFmt': '4,000'},
Ask
'ask': {'raw': 207.16, 'fmt': '207.16'},
'askSize': {'raw': 900, 'fmt': '900', 'longFmt': '900'},
Day's Range
'dayLow': {'raw': 203.62, 'fmt': '203.62'},
'dayHigh': {'raw': 207.23, 'fmt': '207.23'}
52 Week Range
'fiftyTwoWeekLow': {'raw': 142, 'fmt': '142.00'},
'fiftyTwoWeekHigh': {'raw': 233.47, 'fmt': '233.47'},
Volume
'volume': {'raw': 21361692, 'fmt': '21.36M', 'longFmt': '21,361,692'},
Avg. Volume
'averageVolume': {'raw': 26711182, 'fmt': '26.71M', 'longFmt': '26,711,182'},
Market Cap #953.436B
'marketCap': {'raw': 953435750400, 'fmt': '953.44B', 'longFmt': '953,435,750,400'},
Beta
'beta': {'raw': 1.091297, 'fmt': '1.09'},
PE Ratio
'trailingPE': {'raw': 17.43249, 'fmt': '17.43'}
Forward Dividend & Yield
'dividendRate': {'raw': 3.08, 'fmt': '3.08'},
'dividendYield': {'raw': 0.0152, 'fmt': '1.52%'},
Ex-Dividend Date
'exDividendDate': {'raw': 1557446400, 'fmt': '2019-05-10'},
#############################################################################
EPS(TTM) was in different place of JSON format/ dictionary
import json def findJsonPath(jsonObject, target, path, matchType): if type(jsonObject) == matchType: if target in jsonObject: return path for key in jsonObject.keys(): finalPath = findJsonPath(jsonObject[key], target, path+","+key, matchType) if finalPath != "": return finalPath return ""
element = browserDriver.find_element_by_xpath("html/body/script[1]") tempData = element.get_attribute("textContent").strip("(this));\n") #note the new line #the data is after "root.App.main = " tempData = tempData.split("root.App.main = ")[1][:-3] #dict, ";", "}" print(tempData) jsonData = json.loads(tempData) #print(jsonData.keys()) #dict_keys(['context', 'plugins']) matchType = type(jsonData) print("Final Path is: ", findJsonPath(jsonData, "trailingEps", "", matchType)) # #Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,defaultKeyStatistics print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["defaultKeyStatistics"]) browserDriver.quit()
{'annualHoldingsTurnover': {}, 'enterpriseToRevenue': {'raw': 3.732, 'fmt': '3.73'}, 'beta3Year': {}, 'profitMargins': {'raw': 0.22117001, 'fmt': '22.12%'}, 'enterpriseToEbitda': {'raw': 12.472, 'fmt': '12.47'}, '52WeekChange': {'raw': 0.049689054, 'fmt': '4.97%'}, 'morningStarRiskRating': {}, 'forwardEps': {'raw': 12.53, 'fmt': '12.53'}, 'revenueQuarterlyGrowth': {}, 'sharesOutstanding': {'raw': 4601079808, 'fmt': '4.6B', 'longFmt': '4,601,079,808'}, 'fundInceptionDate': {}, 'annualReportExpenseRatio': {}, 'totalAssets': {}, 'bookValue': {'raw': 22.977, 'fmt': '22.98'}, 'sharesShort': {'raw': 43448528, 'fmt': '43.45M', 'longFmt': '43,448,528'}, 'sharesPercentSharesOut': {'raw': 0.0094, 'fmt': '0.94%'}, 'fundFamily': None, 'lastFiscalYearEnd': {'raw': 1538179200, 'fmt': '2018-09-29'}, 'heldPercentInstitutions': {'raw': 0.60946, 'fmt': '60.95%'}, 'netIncomeToCommon': {'raw': 57170001920, 'fmt': '57.17B', 'longFmt': '57,170,001,920'}, 'trailingEps': {'raw': 11.887, 'fmt': '11.89'}, 'lastDividendValue': {}, 'SandP52WeekChange': {'raw': 0.055385828, 'fmt': '5.54%'}, 'priceToBook': {'raw': 9.018584, 'fmt': '9.02'}, 'heldPercentInsiders': {'raw': 0.00074, 'fmt': '0.07%'}, 'nextFiscalYearEnd': {'raw': 1601337600, 'fmt': '2020-09-29'}, 'yield': {}, 'mostRecentQuarter': {'raw': 1553904000, 'fmt': '2019-03-30'}, 'shortRatio': {'raw': 1.68, 'fmt': '1.68'}, 'sharesShortPreviousMonthDate': {'raw': 1559260800, 'fmt': '2019-05-31'}, 'floatShares': {'raw': 4348475982, 'fmt': '4.35B', 'longFmt': '4,348,475,982'}, 'beta': {'raw': 1.091297, 'fmt': '1.09'}, 'enterpriseValue': {'raw': 964669800448, 'fmt': '964.67B', 'longFmt': '964,669,800,448'}, 'priceHint': {'raw': 2, 'fmt': '2', 'longFmt': '2'}, 'threeYearAverageReturn': {}, 'lastSplitDate': {'raw': 1402272000, 'fmt': '2014-06-09'}, 'lastSplitFactor': '1/7', 'legalType': None, 'morningStarOverallRating': {}, 'earningsQuarterlyGrowth': {'raw': -0.164, 'fmt': '-16.40%'}, 'priceToSalesTrailing12Months': {}, 'dateShortInterest': {'raw': 1561680000, 'fmt': '2019-06-28'}, 'pegRatio': {'raw': 1.51, 'fmt': '1.51'}, 'ytdReturn': {}, 'forwardPE': {'raw': 16.537909, 'fmt': '16.54'}, 'maxAge': 1, 'lastCapGain': {}, 'shortPercentOfFloat': {'raw': 0.0092, 'fmt': '0.92%'}, 'sharesShortPriorMonth': {'raw': 51257104, 'fmt': '51.26M', 'longFmt': '51,257,104'}, 'category': None, 'fiveYearAverageReturn': {}}
print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["defaultKeyStatistics"])
########################################################################
finalData1=jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"] finalData2=jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["defaultKeyStatistics"] import pandas as pd fd1_DF=pd.DataFrame(data=finalData1) fd2_DF=pd.DataFrame(data=finalData2) print(fd1_DF) print(fd2_DF) browserDriver.quit()
#######################All codes##############################
from selenium import webdriver url = "https://finance.yahoo.com/quote/AAPL?p=AAPL&.tsrc=fin-srch" browserDriver = webdriver.Chrome(executable_path='D:/chromedriver/chromedriver') browserDriver.get(url) #1. print page_source #print(browserDriver.page_source) #2. look for a data in jason format/dict(actually the data is in <script>), such as 205.66 (previous close price) # I found #3. web page structure # elements = browserDriver.find_elements_by_xpath("html") # # counter =1 # # for element in elements: # # if "PreviousClose" in element.get_attribute("textContent"): # # print(counter) # # counter +=1 #4. write a function def findXPath(elementTag, target, path): if target in elementTag.get_attribute("textContent") and elementTag.tag_name == "script": return path #if not, I want to go deeper #"./": current directory childrenElements = elementTag.find_elements_by_xpath("./*") # goes deeper for childNode in childrenElements: print(path+"/" + childNode.tag_name) finalPath = findXPath(childNode,target,path+"/"+childNode.tag_name) if finalPath != "": return finalPath return "" # uniqueElement = browserDriver.find_element_by_xpath("html") # print("The final path is: ", findXPath(uniqueElement, "PreviousClose", "html")) # #html/body/script # elements = browserDriver.find_elements_by_xpath("html/body/script") # counter = 1 # for element in elements: # if "PreviousClose" in element.get_attribute("textContent"): # print(counter) # counter +=1 # #1 : the first one # element = browserDriver.find_element_by_xpath("html/body/script[1]") # print(element.get_attribute("textContent")) import json def findJsonPath(jsonObject, target, path, matchType): if type(jsonObject) == matchType: if target in jsonObject: return path for key in jsonObject.keys(): finalPath = findJsonPath(jsonObject[key], target, path+","+key, matchType) if finalPath != "": return finalPath return "" element = browserDriver.find_element_by_xpath("html/body/script[1]") tempData = element.get_attribute("textContent").strip("(this));\n") #note the new line #the data is after "root.App.main = " tempData = tempData.split("root.App.main = ")[1][:-3] #dict, ";", "}" #print(tempData) jsonData = json.loads(tempData) #print(jsonData.keys()) #dict_keys(['context', 'plugins']) matchType = type(jsonData) #print("Final Path is: ", findJsonPath(jsonData, "trailingEps", "", matchType)) # #print("Final Path is: ", findJsonPath(jsonData, "previousClose", "", matchType)) # #Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,summaryDetail # #print("Final Path is: ", findJsonPath(jsonData, "trailingEps", "", matchType)) # #Final Path is: ,context,dispatcher,stores,QuoteSummaryStore,defaultKeyStatistics #print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"]) #print(jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["defaultKeyStatistics"]) finalData1=jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryDetail"] finalData2=jsonData["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["defaultKeyStatistics"]
#############################################################
Done!