You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
What's the phone number of Capilano Suspension Bridge Park?
fromdataprep.connectorimportconnect# You can get ”yelp_access_token“ by following https://www.yelp.com/developers/documentation/v3/authenticationconn_yelp=connect("yelp", _auth={"access_token":yelp_access_token}, _concurrency=5)
df=awaitconn_yelp.query("businesses", term="Capilano Suspension Bridge Park", location="Vancouver", _count=1)
df[["name","phone"]]
id
name
phone
0
Capilano Suspension Bridge Park
+1 604-985-7474
Which yoga store has the highest review count in Vancouver?
fromdataprep.connectorimportconnect# You can get ”yelp_access_token“ by following https://www.yelp.com/developers/documentation/v3/authenticationconn_yelp=connect("yelp", _auth={"access_token":yelp_access_token}, _concurrency=1)
# Check all supported categories: https://www.yelp.ca/developers/documentation/v3/all_category_listdf=awaitconn_yelp.query("businesses", categories="yoga", location="Vancouver", sort_by="review_count", _count=1)
df[["name", "review_count"]]
id
name
review_count
0
YYOGA Downtown Flow
107
How many Starbucks stores in Seattle and where are they?
fromdataprep.connectorimportconnect# You can get ”yelp_access_token“ by following https://www.yelp.com/developers/documentation/v3/authenticationconn_yelp=connect("yelp", _auth={"access_token":yelp_access_token}, _concurrency=5)
df=awaitconn_yelp.query("businesses", term="Starbucks", location="Seattle", _count=1000)
# Remove irrelevant datadf=df[(df['city'] =='Seattle') & (df['name'] =='Starbucks')]
df[['name', 'address1', 'city', 'state', 'country', 'zip_code']].reset_index(drop=True)
id
name
address1
city
state
country
zip_code
0
Starbucks
515 Westlake Ave N
Seattle
WA
US
98109
1
Starbucks
442 Terry Avenue N
Seattle
WA
US
98109
...
.......
.......
......
..
..
....
126
Starbucks
17801 International Blvd
Seattle
WA
US
98158
What are the ratings for a list of resturants?
fromdataprep.connectorimportconnectimportpandasaspdimportasyncio# You can get ”yelp_access_token“ by following https://www.yelp.com/developers/documentation/v3/authenticationconn_yelp=connect("yelp", _auth={"access_token":yelp_access_token}, _concurrency=5)
names= ["Miku", "Boulevard", "NOTCH 8", "Chambar", "VIJ’S", "Fable", "Kirin Restaurant", "Cafe Medina", \
"Ask for Luigi", "Savio Volpe", "Nicli Pizzeria", "Annalena", "Edible Canada", "Nuba", "The Acorn", \
"Lee's Donuts", "Le Crocodile", "Cioppinos", "Six Acres", "St. Lawrence", "Hokkaido Santouka Ramen"]
query_list= [conn_yelp.query("businesses", term=name, location="Vancouver", _count=1) fornameinnames]
results=asyncio.gather(*query_list)
df=pd.concat(awaitresults)
df[["name", "rating", "city"]].reset_index(drop=True)
ID
Name
Rating
City
0
Miku
4.5
Vancouver
1
Boulevard Kitchen & Oyster Bar
4.0
Vancouver
...
...
...
...
20
Hokkaido Ramen Santouka
4.0
Vancouver
Hunter -- Collect and Verify Professional Email Addresses
Who are executives of Asana and what are their emails?
fromdataprep.connectorimportconnect# You can get ”hunter_access_token“ by registering as a developer https://hunter.io/users/sign_upconn_hunter=connect("hunter", _auth={"access_token":'hunter_access_token'})
df=awaitconn_hunter.query('all_emails', domain='asana.com', _count=10)
df[df['department']=='executive']
fromdataprep.connectorimportconnect# You can get ”hunter_access_token“ by registering as a developer https://hunter.io/users/sign_upconn_hunter=connect("hunter", _auth={"access_token":'hunter_access_token'})
df=awaitconn_hunter.query("individual_email", full_name='dustin moskovitz', domain='asana.com')
df
fromdataprep.connectorimportconnect# You can get ”hunter_access_token“ by registering as a developer https://hunter.io/users/sign_upconn_hunter=connect("hunter", _auth={"access_token":'hunter_access_token'})
employees=awaitconn_hunter.query("all_emails", domain='asana.com', _count=10)
executives=employees.loc[employees['department']=='executive']
emails=executives[['email']]
foremailinemails.iterrows():
status=awaitconn_hunter.query("email_verifier", email=email[1][0])
emails['status'] =statusemails
fromdataprep.connectorimportconnect# You can get ”hunter_access_token“ by registering as a developer https://hunter.io/users/sign_upconn_hunter=connect("hunter", _auth={"access_token":'hunter_access_token'})
df=awaitconn_hunter.query("account")
df
requests available
0
19475
What are the counts of each level of seniority of Intercom employees?
fromdataprep.connectorimportconnect# You can get ”hunter_access_token“ by registering as a developer https://hunter.io/users/sign_upconn_hunter=connect("hunter", _auth={"access_token":'hunter_access_token'})
df=awaitconn_hunter.query("email_count", domain='intercom.io')
df.drop('total', axis=1)
junior
senior
executive
0
0
2
2
Finance
Finnhub -- Collect Financial, Market, Economic Data
How to get a list of cryptocurrencies and their exchanges
importpandasaspdfromdataprep.connectorimportconnect# You can get ”finnhub_access_token“ by following https://finnhub.io/conn_finnhub=connect("finnhub", _auth={"access_token":finnhub_access_token}, update=True)
df=awaitconn_finnhub.query('crypto_exchange')
exchanges=df['exchange'].to_list()
symbols= []
forexinexchanges:
data=awaitdf.query('crypto_symbols', exchange=ex)
symbols.append(data)
df_symbols=pd.concat(symbols)
df_symbols
id
description
displaySymbol
symbol
0
Binance FRONT/ETH
FRONT/ETH
BINANCE:FRONTETH
1
Binance ATOM/BUSD
ATOM/BUSD
BINANCE:ATOMBUSD
...
...
...
...
281
Poloniex AKRO/BTC
AKRO/BTC
POLONIEX:BTC_AKRO
Which ipo in the current month has the highest total share values?
importcalendarfromdatetimeimportdatetimefromdataprep.connectorimportconnect# You can get ”finnhub_access_token“ by following https://finnhub.io/conn_finnhub=connect("finnhub", _auth={"access_token":finnhub_access_token}, update=True)
today=datetime.today()
days_in_month=calendar.monthrange(today.year, today.month)[1]
date_from=today.replace(day=1).strftime('%Y-%m-%d')
date_to=today.replace(day=days_in_month).strftime('%Y-%m-%d')
ipo_df=awaitconn_finnhub.query('ipo_calender', from_=date_from, to=date_to)
ipo_df[ipo_df['totalSharesValue'] ==ipo_df['totalSharesValue'].max()]
id
date
exchange
name
numberOfShares
...
totalSharesValue
5
2021-02-03
NYSE
TELUS International (Cda) Inc.
33333333
...
9.58333e+08
What are the average acutal earnings from the last 4 seasons of a list of 10 popular stocks?
importasyncioimportpandasaspdfromdataprep.connectorimportconnect# You can get ”finnhub_access_token“ by following https://finnhub.io/conn_finnhub=connect("finnhub", _auth={"access_token":finnhub_access_token}, update=True)
stock_list= ['TSLA', 'AAPL', 'WMT', 'GOOGL', 'FB', 'MSFT', 'COST', 'NVDA', 'JPM', 'AMZN']
query_list= [conn_finnhub.query('earnings', symbol=symbol) forsymbolinstock_list]
query_results=asyncio.gather(*query_list)
stocks_df=pd.concat(awaitquery_results)
stocks_df=stocks_df.groupby('symbol', as_index=False).agg({'actual': ['mean']})
stocks_df.columns=stocks_df.columns.get_level_values(0)
stocks_df=stocks_df.sort_values(by='actual', ascending=False).rename(columns={'actual': 'avg_actual'})
stocks_df.reset_index(drop=True)
id
symbol
avg_actual
0
GOOGL
12.9375
1
AMZN
8.5375
2
FB
2.4475
..
...
...
9
TSLA
0.556
What is the earnings of last 4 quarters of a given company? (e.g. TSLA)
fromdataprep.connectorimportconnectfromdatetimeimportdatetime, timedelta, timezone# You can get ”finnhub_access_token“ by following https://finnhub.io/conn_finnhub=connect("finnhub", _auth={"access_token":finnhub_access_token}, update=True)
today=datetime.now(tz=timezone.utc)
oneyear=today-timedelta(days=365)
start=int(round(oneyear.timestamp()))
result=awaitconn_finnhub.query('earnings_calender', symbol='TSLA', from_=start, to=today)
result=result.set_index('date')
result
id
date
epsActual
epsEstimate
hour
quarter
...
symbol
year
0
2021-01-27
0.8
1.37675
amc
4
...
TSLA
2020
1
2020-10-21
0.76
0.600301
amc
3
...
TSLA
2020
2
2020-07-22
0.436
-0.0267036
amc
2
...
TSLA
2020
..
...
...
...
...
...
...
...
...
3
2011-02-15
-0.094
-0.101592
amc
4
...
TSLA
2010
Geocoding
MapQuest -- Collect Driving Directions, Maps, Traffic Data
Where is the Simon Fraser University? Give all the places if there is more than one campus.
fromdataprep.connectorimportconnect# You can get ”mapquest_access_token“ by following https://developer.mapquest.com/conn_map=connect("mapquest", _auth={"access_token": mapquest_access_token}, _concurrency=10)
BC_BBOX="-139.06,48.30,-114.03,60.00"campus=awaitconn_map.query("place", q="Simon Fraser University", sort="relevance", bbox=BC_BBOX, _count=50)
campus=campus[campus["name"] =="Simon Fraser University"].reset_index()
id
index
name
country
state
city
address
postalCode
coordinates
details
0
0
Simon Fraser University
CA
BC
Burnaby
8888 University Drive E
V5A 1S6
[-122.90416, 49.27647]
...
1
2
Simon Fraser University
CA
BC
Vancouver
602 Hastings St W
V6B 1P2
[-123.113431, 49.284626]
...
How many KFC are there in Burnaby? What are their address?
fromdataprep.connectorimportconnect# You can get ”mapquest_access_token“ by following https://developer.mapquest.com/conn_map=connect("mapquest", _auth={"access_token": mapquest_access_token}, _concurrency=10)
BC_BBOX="-139.06,48.30,-114.03,60.00"kfc=awaitconn_map.query("place", q="KFC", sort="relevance", bbox=BC_BBOX, _count=500)
kfc=kfc[(kfc["name"] =="KFC") & (kfc["city"] =="Burnaby")].reset_index()
print("There are %d KFCs in Burnaby"%len(kfc))
print("Their addresses are:")
kfc['address']
There are 1 KFCs in Burnaby
Their addresses are:
id
address
0
5094 Kingsway
The ratio of Starbucks to Tim Hortons in Vancouver?
fromdataprep.connectorimportconnect# You can get ”mapquest_access_token“ by following https://developer.mapquest.com/conn_map=connect("mapquest", _auth={"access_token": mapquest_access_token}, _concurrency=10)
VAN_BBOX='-123.27,49.195,-123.020,49.315'starbucks=awaitconn_map.query('place', q='starbucks', sort='relevance', bbox=VAN_BBOX, page='1', pageSize='50', _count=200)
timmys=awaitconn_map.query('place', q='Tim Hortons', sort='relevance', bbox=VAN_BBOX, page='1', pageSize='50', _count=200)
is_vancouver_sb=starbucks['city'] =='Vancouver'is_vancouver_tim=timmys['city'] =='Vancouver'sb_in_van=starbucks[is_vancouver_sb]
tim_in_van=timmys[is_vancouver_tim]
print('The ratio of Starbucks:Tim Hortons in Vancouver is %d:%d'% (len(sb_in_van), len(tim_in_van)))
The ratio of Starbucks:Tim Hortons in Vancouver is 188:120
What is the closest gas station from Metropolist and how far is it?
fromdataprep.connectorimportconnectfromnumpyimportradians, sin, cos, arctan2, sqrtdefdistance_in_km(cord1, cord2):
R=6373.0lat1=radians(cord1[1])
lon1=radians(cord1[0])
lat2=radians(cord2[1])
lon2=radians(cord2[0])
dlon=lon2-lon1dlat=lat2-lat1a=sin(dlat/2)**2+cos(lat1) *cos(lat2) *sin(dlon/2)**2c=2*arctan2(sqrt(a), sqrt(1-a))
distance=R*creturn(distance)
# You can get ”mapquest_access_token“ by following https://developer.mapquest.com/conn_map=connect("mapquest", _auth={"access_token": mapquest_access_token}, _concurrency=10)
METRO_TOWN= [-122.9987, 49.2250]
METRO_TOWN_string='%f,%f'% (METRO_TOWN[0], METRO_TOWN[1])
nearest_petro=awaitconn_map.query('place', q='gas station', sort='distance', location=METRO_TOWN_string, page='1', pageSize='1')
print('Metropolist is %fkm from the nearest gas station'%distance_in_km(METRO_TOWN, nearest_petro['coordinates'][0]))
print('The gas station is %s at %s'% (nearest_petro['name'][0], nearest_petro['address'][0]))
Metropolist is 0.376580km from the nearest gas station
The gas station is Chevron at 4692 Imperial St
In BC, which city has the most amount of shopping centers?
fromdataprep.connectorimportconnect# You can get ”mapquest_access_token“ by following https://developer.mapquest.com/conn_map=connect("mapquest", _auth={"access_token": mapquest_access_token}, _concurrency=10)
BC_BBOX="-139.06,48.30,-114.03,60.00"GROCERY='sic:541105'shop_list=awaitconn_map.query("place", sort="relevance", bbox=BC_BBOX, category=GROCERY, _count=500)
shop_list=shop_list[shop_list["state"] =="BC"]
shop_list.groupby('city')['name'].count().sort_values(ascending=False).head(10)
city
count
Vancouver
42
Victoria
24
Surrey
15
Burnaby
14
...
...
North Vancouver
8
Where is the nearest grocery of SFU? How many miles far? And how much time estimated for driving?
fromdataprep.connectorimportconnect# You can get ”mapquest_access_token“ by following https://developer.mapquest.com/conn_map=connect("mapquest", _auth={"access_token": mapquest_access_token}, _concurrency=10)
SFU_LOC='-122.90416, 49.27647'GROCERY='sic:541105'nearest_grocery=awaitconn_map.query("place", location=SFU_LOC, sort="distance", category=GROCERY)
destination=nearest_grocery.iloc[0]['details']
name=nearest_grocery.iloc[0]['name']
route=awaitconn_map.query("route", from_='8888 University Drive E, Burnaby', to=destination)
total_distance=sum([float(i)foriinroute.iloc[:]['distance']])
total_time=sum([int(i)foriinroute.iloc[:]['time']])
print('The nearest grocery of SFU is '+name+'. It is '+str(total_distance) +' miles far, and It is expected to take '+str(total_time//60) +'m'+str(total_time%60)+'s of driving.')
route
The nearest grocery of SFU is Nesters Market. It is 1.234 miles far, and It is expected to take 3m21s of driving.
id
index
narrative
distance
time
0
0
Start out going east on University Dr toward Arts Rd.
0.348
57
1
1
Turn left to stay on University Dr.
0.606
84
2
2
Enter next roundabout and take the 1st exit onto University High St.
0.28
60
3
3
9000 UNIVERSITY HIGH STREET is on the left.
0
0
Lifestyle
Spoonacular -- Collect Recipe, Food, and Nutritional Information Data
Which foods are unhealthy, i.e.,have high carbs and high fat content?
fromdataprep.connectorimportconnectimportpandasaspddc=connect('spoonacular', _auth={'access_token': API_key}, concurrency=3, update=True)
df=awaitdc.query('recipes_by_nutrients', minFat=65, maxFat=100, minCarbs=75, maxCarbs=100, _count=20)
df["calories"] =pd.to_numeric(df["calories"]) # convert string type to numericdf=df[df['calories']>1100] # considering foods with more than 1100 calories per serving to be unhealthydf[["title","calories","fat","carbs"]].sort_values(by=['calories'], ascending=False)
id
title
calories
fat
carbs
2
Brownie Chocolate Chip Cheesecake
1210
92g
79g
8
Potato-Cheese Pie
1208
80g
96g
0
Stuffed Shells with Beef and Broc
1192
72g
81g
3
Coconut Crusted Rockfish
1187
72g
92g
4
Grilled Ratatouille
1143
82g
88g
7
Pecan Bars
1121
84g
91g
Which meat dishes are rich in proteins?
fromdataprep.connectorimportconnectdc=connect('spoonacular', _auth={'access_token': API_key}, concurrency=3, update=True)
df=awaitdc.query('recipes', query='beef', diet='keto', minProtein=25, maxProtein=60, _count=5)
df=df[["title","nutrients"]]
# Output of 'nutrients' column : [{'title': 'Protein', 'amount': 22.3768, 'unit': 'g'}]g= [] # to extract the exact amount of Proteins in grams and store as listforiindf["nutrients"]:
z=i[0]
g.append(z['amount'])
df.insert(1,'Protein(g)',g)
df[["title","Protein(g)"]].sort_values(by='Protein(g)',ascending=False)
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
df=awaitconn_musixmatch.query("artist_info", artist_mbid="122d63fc-8671-43e4-9752-34e846d62a9c")
df[['name', 'twitter_url']]
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
df=awaitconn_musixmatch.query("track_matches", q_track="Gone, Gone, Gone")
df[['name', 'album_name']]
name
album_name
0
Gone, Gone, Gone
The World From the Side of the Moon
Which artist/artists group is most popular in Canada?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
df=awaitconn_musixmatch.query("top_artists", country="Canada")
df['name'][0]
'BTS'
How many genres are in the Musixmatch database?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
df=awaitconn_musixmatch.query("genres")
len(df)
362
Who is the most popular American artist named Michael?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token}, _concurrency=5)
df=awaitconn_musixmatch.query("artists", q_artist="Michael")
df=df[df['country'] =="US"].sort_values('rating', ascending=False)
df['name'].iloc[0]
'Michael Jackson'
What is the genre of the album "Atlas"?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
album=awaitconn_musixmatch.query("album_info", album_id=11339785)
genres=awaitconn_musixmatch.query("genres")
album_genre=genres[genres['id'] ==album['genre_id'][0][0]]['name']
album_genre.iloc[0]
'Soundtrack'
What is the link to lyrics of the most popular song in the album "Yellow"?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token}, _concurrency=5)
df=awaitconn_musixmatch.query("album_tracks", album_id=10266231)
df=df.sort_values('rating', ascending=False)
df['track_share_url'].iloc[0]
What are Lady Gaga's albums from most to least recent?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token}, update=True)
df=awaitconn_musixmatch.query("artist_albums", artist_mbid="650e7db6-b795-4eb5-a702-5ea2fc46c848", s_release_date="desc")
df.name.unique()
array(['Chromatica', 'Stupid Love',
'A Star Is Born (Original Motion Picture Soundtrack)', 'Your Song'],
dtype=object)
Which artists are similar to Lady Gaga?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
df=awaitconn_musixmatch.query("related_artists", artist_mbid="650e7db6-b795-4eb5-a702-5ea2fc46c848")
df
What are the highest rated songs in Canada from highest to lowest popularity?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token}, _concurrency=5)
df=awaitconn_musixmatch.query("top_tracks", country='CA')
df[df['is_explicit'] ==0].sort_values('rating', ascending=False).reset_index()
What are other songs in the same album as the song "Before You Go"?
fromdataprep.connectorimportconnect# You can get ”musixmatch_access_token“ by registering as a developer https://developer.musixmatch.com/signupconn_musixmatch=connect("musixmatch", _auth={"access_token":musixmatch_access_token})
song=awaitconn_musixmatch.query("track_info", commontrack_id=103153140)
album=awaitconn_musixmatch.query("album_tracks", album_id=song["album_id"][0])
album
Spotify -- Collect Albums, Artists, and Tracks Metadata
How many followers does Eminem have?
fromdataprep.connectorimportconnect# You can get ”spotify_client_id“ and "spotify_client_secret" by registering as a developer https://developer.spotify.com/dashboard/#conn_spotify=connect("spotify", _auth={"client_id":spotify_client_id, "client_secret":spotify_client_secret}, _concurrency=3)
df=awaitconn_spotify.query("artist", q="Eminem", _count=500)
df.loc[df['# followers'].idxmax(), '# followers']
41157398
How many singles does Pink Floyd have that are available in Canada?
fromdataprep.connectorimportconnect# You can get ”spotify_client_id“ and "spotify_client_secret" by registering as a developer https://developer.spotify.com/dashboard/#conn_spotify=connect("spotify", _auth={"client_id":spotify_client_id, "client_secret":spotify_client_secret}, _concurrency=3)
artist_name="Pink Floyd"df=awaitconn_spotify.query("album", q=artist_name, _count=500)
df=df.loc[[(artist_nameinx) forxindf['artist']]]
df=df.loc[[('CA'inx) forxindf['available_markets']]]
df=df.loc[df['total_tracks'] =='1']
df.shape[0]
12
In the last quarter of 2020, which artist released the album with the most tracks?
fromdataprep.connectorimportconnectimportpandasaspd# You can get ”spotify_client_id“ and "spotify_client_secret" by registering as a developer https://developer.spotify.com/dashboard/#conn_spotify=connect("spotify", _auth={"client_id":spotify_client_id, "client_secret":spotify_client_secret}, _concurrency=3)
df=awaitconn_spotify.query("album", q="2020", _count=500)
df['date'] =pd.to_datetime(df['release_date'])
df=df[df['date'] >'2020-10-01'].drop(columns= ['image url', 'external urls', 'release_date'])
df['total_tracks'] =df['total_tracks'].astype(int)
df=df.loc[df['total_tracks'].idxmax()]
print(df['album_name'] +", by "+df['artist'][0] +", tracks: "+str(df['total_tracks']))
ASOT 996 - A State Of Trance Episode 996 (Top 50 Of 2020 Special), by Armin van Buuren ASOT Radio, tracks: 172
Who is the most popular artist: Eminem, Beyonce, Pink Floyd and Led Zeppelin
# and what are their popularity ratings?fromdataprep.connectorimportconnect# You can get ”spotify_client_id“ and "spotify_client_secret" by registering as a developer https://developer.spotify.com/dashboard/#conn_spotify=connect("spotify", _auth={"client_id":spotify_client_id, "client_secret":spotify_client_secret}, _concurrency=3)
artists_and_num_followers= []
forartistin ['Beyonce', 'Pink Floyd', 'Eminem', 'Led Zeppelin']:
df=awaitconn_spotify.query("artist", q=artist, _count=500)
num_followers=df.loc[df['# followers'].idxmax(), 'popularity']
artists_and_num_followers.append((artist, num_followers))
print(sorted(artists_and_num_followers, key=lambdax: x[1], reverse=True))
Who is the author of article 'Yellen Outlines Economic Priorities, and Republicans Draw Battle Lines'
fromdataprep.connectorimportconnect# You can get ”times_access_token“ by following https://developer.nytimes.com/apisconn_times=connect("times", _auth={"access_token":times_access_token})
df=awaitconn_times.query('ac',q='Yellen Outlines Economic Priorities, and Republicans Draw Battle Lines')
df[["authors"]]
id
authors
0
By Alan Rappeport
What is the newest news from Ottawa
fromdataprep.connectorimportconnect# You can get ”times_access_token“ by following https://developer.nytimes.com/apisconn_times=connect("times", _auth={"access_token":times_access_token})
df=awaitconn_times.query('ac',q="ottawa",sort='newest')
df[['headline','authors','abstract','url','pub_date']].head(1)
headline
...
pub_date
0
21 Men Accuse Lincoln Project Co-Founder of Online Harassment
...
2021-01-31T14:48:35+0000
What are Headlines of articles where Trump was mentioned in the last 6 months of 2020 in the technology news section
fromdataprep.connectorimportconnect# You can get ”times_access_token“ by following https://developer.nytimes.com/apisconn_times=connect("times", _auth={"access_token":times_access_token})
df=awaitconn_times.query('ac',q="Trump",fq='section_name:("technology")',begin_date='20200630',end_date='20201231',sort='newest', _count=50)
print(df['headline'])
print("Trump was mentioned in "+str(len(df)) +" articles")
id
headline
0
No, Trump cannot win Georgia’s electoral votes through a write-in Senate campaign.
1
How Misinformation ‘Superspreaders’ Seed False Election Theories
2
No, Trump’s sister did not publicly back him. He was duped by a fake account.
..
...
49
Trump Official’s Tweet, and Its Removal, Set Off Flurry of Anti-Mask Posts
Trump was mentioned in 50 articles
What is the ranking of times a celebrity is mentioned in a headline in latter half of 2020?
fromdataprep.connectorimportconnectimportpandasaspd# You can get ”times_access_token“ by following https://developer.nytimes.com/apisconn_times=connect("times", _auth={"access_token":times_access_token})
celeb_list= ['Katy Perry', 'Taylor Swift', 'Lady Gaga', 'BTS', 'Rihanna', 'Kim Kardashian']
number_of_mentions= []
foriinceleb_list:
df1=awaitconn_times.query('ac',q=i,begin_date='20200630',end_date='20201231')
df1=df1[df1['headline'].str.contains(i)]
a=len(df1['headline'])
number_of_mentions.append(a)
print(number_of_mentions)
ranking_df=pd.DataFrame({'name': celeb_list, 'number of mentions': number_of_mentions})
ranking_df=ranking_df.sort_values(by=['number of mentions'], ascending=False)
ranking_df
What are the products I can get when I search for "winter jackets"?
fromdataprep.connectorimportconnect# You can get ”etsy_access_key“ by following https://www.etsy.com/developers/documentation/getting_started/oauthconn_etsy=connect("etsy", _auth={'access_token': etsy_access_key}, _concurrency=5)
# Item searchdf=awaitconn_etsy.query("items", keywords="winter jackets")
df[['title',"url","description","price","currency"]]
~~ Welcome to our shop ~~\n\nSet include:\n1 Vin...
52.00
SGD
1
What's the favorites for the shop “CrazedGaming”?
fromdataprep.connectorimportconnect# You can get ”etsy_access_key“ by following https://www.etsy.com/developers/documentation/getting_started/oauthconn_etsy=connect("etsy", _auth={'access_token': etsy_access_key}, _concurrency=5)
# Shop searchdf=awaitconn_etsy.query("shops", shop_name="CrazedGaming", _count=1)
df[["name", "url", "favorites"]]
What are the top 10 custom photo pillows ranked by number of favorites?
fromdataprep.connectorimportconnect# You can get ”etsy_access_key“ by following https://www.etsy.com/developers/documentation/getting_started/oauthconn_etsy=connect("etsy", _auth= {"access_token": etsy_access_key}, _concurrency=5)
# Item search sort by favoritesdf_cp_pillow=awaitconn_etsy.query("items", keywords="custom photo pillow", _count=7000)
df_cp_pillow=df_cp_pillow.sort_values(by= ['favorites'], ascending=False)
df_top10_cp_pillow=df_cp_pillow.iloc[:10]
df_top10_cp_pillow[['title', 'price', 'currency', 'favorites', 'quantity']]
id
title
price
currency
favorites
quantity
68
Custom Pet Photo Pillow, Valentines Day Gift, ...
29.99
USD
9619.0
320.0
193
Custom Shaped Dog Photo Pillow Personalized Mo...
29.99
USD
5523.0
941.0
374
Custom PILLOW Pet Portrait - Pet Portrait Pill...
49.95
USD
5007.0
74.0
196
Personalized Cat Pillow Mothers Day Gift for M...
29.99
USD
3839.0
939.0
69
Photo Sequin Pillow Case, Personalized Sequin ...
25.49
USD
3662.0
675.0
637
Family photo sequin pillow | custom image reve...
28.50
USD
3272.0
540.0
44
Custom Pet Pillow Custom Cat Pillow best cat l...
20.95
USD
2886.0
14.0
646
Sequin Pillow with Photo Personalized Photo Re...
32.00
USD
2823.0
1432.0
633
Personalized Name Pillow, Baby shower gift, Ba...
16.00
USD
2511.0
6.0
4416
Letter C pillow Custom letter Alphabet pillow ...
24.00
USD
2284.0
4.0
What are the prices of active products for quantities (>10) for a particular searched keyword "blue 2021 weekly spiral planner"?
fromdataprep.connectorimportconnect# You can get ”etsy_access_key“ by following https://www.etsy.com/developers/documentation/getting_started/oauthconn_etsy=connect("etsy", _auth={'access_token': etsy_access_key}, _concurrency=5)
# Item search and filtersplanner_df=awaitconn_etsy.query("items", keywords="blue 2021 weekly spiral planner", _count=100)
result_df=planner_df[((planner_df['state'] =='active') & (planner_df['quantity'] >10))]
result_df
What's the average price for blue denim frayed jacket on Etsy selling in USD currency?
fromdataprep.connectorimportconnect# You can get ”etsy_access_key“ by following https://www.etsy.com/developers/documentation/getting_started/oauthconn_etsy=connect("etsy", _auth= {"access_token": etsy_access_key}, _concurrency=5)
# Item search and filters df_dbfjacket=awaitconn_etsy.query("items", keywords="blue denim frayed jacket", _count=500)
df_dbfjacket=df_dbfjacket[df_dbfjacket['currency'] =='USD'].astype(float)
# Calculate average priceaverage_price=round(df_dbfjacket['price'].mean(), 2)
print("The average price for blue denim frayed jacket is: $", average_price)
The average price for blue denim frayed jacket is: $ 58.82
What are the top 10 viewed for keyword “ceramic wind chimes” with a given word “handmade” present in the description?
fromdataprep.connectorimportconnect# You can get ”etsy_access_key“ by following https://www.etsy.com/developers/documentation/getting_started/oauthconn_etsy=connect("etsy", _auth= {"access_token": etsy_access_key}, _concurrency=5)
# Item searchdf=awaitconn_etsy.query("items", keywords="ceramic wind chimes", _count=2000)
# Filter and sortingdf=df[(df["description"].str.contains('handmade'))]
new_df=df[["title", "url", "views"]]
new_df.sort_values(by="views", ascending=False).reset_index(drop=True).head(10)
Twitch -- Collect Twitch Streams and Channels Information
How many followers does the Twitch user "Logic" have?
fromdataprep.connectorimportconnect# You can get ”twitch_access_token“ by registering https://www.twitch.tv/signupconn_twitch=connect("twitch", _auth={"access_token":twitch_access_token}, _concurrency=3)
df=awaitconn_twitch.query("channels", query="logic", _count=1000)
df=df.where(df['name'] =='logic').dropna()
df=df[['name', 'followers']]
df.reset_index()
index
name
followers
0
0
logic
540274.0
Which 5 Twitch users that speak English have the most views and what games do they play?
fromdataprep.connectorimportconnect# You can get ”twitch_access_token“ by registering https://www.twitch.tv/signupconn_twitch=connect("twitch", _auth={"access_token":twitch_access_token}, _concurrency=3)
df=awaitconn_twitch.query("channels",query="%", _count=1000)
df=df[df['language'] =='en']
df=df.sort_values('views', ascending=False)
df=df[['name', 'views', 'game', 'language']]
df=df.head(5)
df.reset_index()
index
name
views
game
language
0
495
Fextralife
1280705870
The Elder Scrolls Online
en
1
9
Riot Games
1265668908
League of Legends
en
2
16
ESL_CSGO
548559390
Counter-Strike: Global Offensive
en
3
160
BeyondTheSummit
462493560
Dota 2
en
4
1
shroud
433902453
Rust
en
Which channel has the most viewers for each of the top 10 games?
fromdataprep.connectorimportconnect# You can get ”twitch_access_token“ by registering https://www.twitch.tv/signupconn_twitch=connect("twitch", _auth={"access_token":twitch_access_token}, _concurrency=3)
df=awaitconn_twitch.query("streams", query="%", _count=1000)
# Group by games, sum viewers and sort by total viewersdf_new=df.groupby(['game'], as_index=False)['viewers'].agg('sum').rename(columns= {'game':'games', 'viewers':'total_viewers'})
df_new=df_new.sort_values('total_viewers',ascending=False)
# Select the channel with most viewers from each game df_2=df.loc[df.groupby(['game'])['viewers'].idxmax()]
# Select the most popular channels for each of the 10 most popular gamesdf_new=df_new.head(10)['games']
best_games=df_new.tolist()
result_df=df_2[df_2['game'].isin(best_games)]
result_df=result_df.head(10)
result_df=result_df[['game','channel_name', 'viewers']]
result_df.reset_index()
index
game
channel_name
viewers
0
3
seonghwazip
32126
1
21
Call of Duty: Warzone
FaZeBlaze
7521
2
9
Dota 2
dota2mc_ru
16118
3
2
Escape From Tarkov
summit1g
33768
4
15
Fortnite
Fresh
10371
5
8
Hearthstone
SilverName
16765
6
22
Just Chatting
Trainwreckstv
6927
7
0
League of Legends
LCK_Korea
77613
8
10
Minecraft
Tfue
15209
9
11
VALORANT
TenZ
13617
(1) What is the number of Fortnite and Valorant streams in the past 24 hours?
(2) Is there any relationship between viewers and channel followers?
fromdataprep.connectorimportconnectimportpandasaspd# You can get ”twitch_access_token“ by registering https://www.twitch.tv/signupconn_twitch=connect("twitch", _auth= {"access_token":twitch_access_token}, _concurrency=3)
df=awaitconn_twitch.query("streams", query="%fortnite%VALORANT%", _count=1000)
df=df[['stream_created_at', 'game', 'viewers', 'channel_followers']]
df['stream_created_at'] =df['stream_created_at'].astype('str') # Convert date to stringforidx, valueinenumerate(df['stream_created_at']):
df.loc[idx,'stream_created_at'] =value[0:9] +' '+value[-9:-1] # Extract datetimedf['stream_created_at'] =pd.to_datetime(df['stream_created_at'])
df['diff'] =pd.Timestamp.now().normalize() -df['stream_created_at']
df['diff'] =df['diff'].dt.total_seconds().astype('int')
df2=df[['channel_followers', 'viewers']].corr(method='pearson') # Find correlation (part 2)df=df[df['diff'] >864000] # Find streams in last 24 hoursoptions= ['Fortnite', 'VALORANT']
df=df[df['game'].isin(options)]
df=df.groupby(['game'], as_index=False)['diff'].agg('count').rename(columns={'diff':'count'})
# Print correlation part 2print("Correlation between viewers and channel followers:")
print(df2)
# Print part 1print('Number of streams in the past 24 hours:')
df
Correlation between viewers and channel followers:
channel_followers viewers
channel_followers 1.000000 0.851698
viewers 0.851698 1.000000
What are the 10 latest english tweets by SFU handle (@SFU) ?
fromdataprep.connectorimportconnectdc=connect('twitter', _auth={'client_id':client_id, 'client_secret':client_secret})
# Querying 100 tweets from @SFUdf=awaitdc.query("tweets", _q="from:@SFU -is:retweet", _count=100)
# Filtering english language tweetsdf=df[df['iso_language_code'] =='en'][['created_at', 'text']]
# Displaying latest 10 tweetsdf=df.iloc[0:10,]
print('-----------')
forindex, rowindf.iterrows():
print(row['created_at'], row['text'])
print('-----------')
-----------
Mon Feb 01 23:59:16 +0000 2021 Thank you to these #SFU student athletes for sharing their insights. #BlackHistoryMonth2021 https://t.co/WGCvGrQOzu
-----------
Mon Feb 01 23:00:56 +0000 2021 How can #SFU address issues of inclusion & access for #Indigenous students & work with them to support their educat… https://t.co/knEM0SSHYu
-----------
Mon Feb 01 21:37:30 +0000 2021 DYK: New #SFU research shows media gender bias; men are quoted 3 times more often than women. #GenderGapTracker loo… https://t.co/c77PsNUIqV
-----------
Mon Feb 01 19:55:03 +0000 2021 With the temperatures dropping, how will you keep warm this winter? Check out our tips on what to wear (and footwea… https://t.co/EOCuYbio4P
-----------
Mon Feb 01 18:06:49 +0000 2021 COVID-19 has affected different groups in unique ways. #SFU researchers looked at the stresses facing “younger” old… https://t.co/gMvcxOlWvb
-----------
Mon Feb 01 16:18:51 +0000 2021 Please follow @TransLink for updates. https://t.co/nQDZQ5JYlt
-----------
Fri Jan 29 23:00:02 +0000 2021 #SFU researchers Caroline Colijn and Paul Tupper performed a modelling exercise to see if screening with rapid test… https://t.co/07aU3SP0j2
-----------
Fri Jan 29 19:01:32 +0000 2021 un/settled, a towering photo-poetic piece at #SFU's Belzberg Library, aims to centre Blackness & celebrate Black th… https://t.co/F6kp0Lwu5A
-----------
Fri Jan 29 17:02:34 +0000 2021 Learning that it’s okay to ask for help is an important part of self-care—and so is recognizing when you don't have… https://t.co/QARn1CRLyp
-----------
Fri Jan 29 00:44:11 +0000 2021 @shashjayy @shashjayy Hi Shashwat, I've spoken to my colleagues in Admissions. They're looking into it and will respond to you directly.
-----------
What are top 10 users based on retweet count ?
fromdataprep.connectorimportconnectdc=connect('twitter', _auth={'client_id':client_id, 'client_secret':client_secret})
# Querying 1000 retweets and filtering only english language tweetsdf=awaitdc.query("tweets", q='RT AND is:retweet', _count=1000)
df=df[df['iso_language_code'] =='en']
# Iterating over tweets to get users and Retweet Countretweets= {}
forindex, rowindf.iterrows():
ifrow['text'].startswith('RT'):
# Eg. tweet 'RT @Crazyhotboye: NMS?\nLeveled up to 80' user_retweeted=row['text'][4:row['text'].find(':')]
ifuser_retweetedinretweets:
retweets[user_retweeted] +=1else:
retweets[user_retweeted] =1# Sorting and displaying top 10 userscols= ['User', 'RT_Count']
retweets_df=pd.DataFrame(list(retweets.items()), columns=cols)
retweets_df=retweets_df.sort_values(by=['RT_Count'], ascending=False).reset_index(drop=True).iloc[0:10,:]
retweets_df
id
User
RT_Count
0
John_Greed
195
1
uEatCrayons
85
2
Demo2020cracy
78
3
store_pup
75
4
miknitem_oasis
61
5
MarkCrypto23
54
6
realmamivee
52
7
trailblazers
50
8
devilsvalentine
40
9
SharingforCari1
38
What are the trending topics (Top 10) in twitter now based on hashtags count?
fromdataprep.connectorimportconnectimportpandasaspdimportjsondc=connect('twitter', _auth={'client_id':client_id, 'client_secret':client_secret})
pd.options.mode.chained_assignment=Nonedf=awaitdc.query("tweets", q=False, _count=2000)
defextract_tags(tags):
tags_tolist=json.loads(tags.replace("'", '"'))
only_tag= [str(t['text']) fortintags_tolist]
returnonly_tag# remove tweets which do not have hashtaghas_hashtags=df[df['hashtags'].str.len() >2]
# only 'en' tweets are our interestshas_hashtags=has_hashtags[has_hashtags['iso_language_code'] =='en']
has_hashtags['tag_list'] =has_hashtags['hashtags'].apply(lambdat: extract_tags(t))
tags_and_text=has_hashtags[['text','tag_list']]
tag_count=tags_and_text.explode('tag_list').groupby(['tag_list']).agg(tag_count=('tag_list', 'count'))
# remove tag with only one occurencetag_count=tag_count[tag_count['tag_count'] >1]
tag_count=tag_count.sort_values(by=['tag_count'], ascending=False).reset_index()
# Top 10 hashtagstag_count=tag_count.iloc[0:10,:]
tag_count