Examples
Here are some not obvious usage examples that might be usefull. For basic usage of concrete classes see docstrings of scraping classes on the API page.
Grouping climbs by stages
from pprint import pprint
from procyclingstats import Race, RaceClimbs, Stage
# RACE_URL can be replaced with any valid stage race URL
RACE_URL = "race/tour-de-france/2022"
race = Race(f"{RACE_URL}/overview")
race_climbs = RaceClimbs(f"{RACE_URL}/route/climbs")
stages = race.stages()
climbs_table = race_climbs.climbs()
# make dict to access climbs by their URLs
climbs = {climb['climb_url']: climb for climb in climbs_table}
stages_climbs = {}
# group climbs by stages
for stage_info in stages:
stage = Stage(stage_info['stage_url'])
stage_climbs = [climbs[s['climb_url']] for s in stage.climbs()]
stages_climbs[stage_info['stage_url']] = stage_climbs
pprint(stages_climbs)
Async usage
# Example of using procyclingstats package asynchronously. Third party
# "requests_futures" package is needed to run the example.
import time
from concurrent.futures import as_completed
from pprint import pprint
from requests_futures.sessions import FuturesSession
from procyclingstats import Ranking, Rider
def main():
ranking = Ranking("rankings/me/individual-season").individual_ranking()
# get heights of first 50 riders from the ranking asynchronously
futures_heights = ranking_heights_future(ranking)
# get heights of first 50 riders from the ranking synchronously
heights = ranking_heights(ranking)
pprint(futures_heights)
def ranking_heights_future(ranking):
t1 = time.time()
session = FuturesSession()
# initialize list with future sessions
future_sessions = []
# make requests to all stage pages asynchronously and store them in a list
for row in ranking[:50]:
# create absolute URL from stage_url in the table
# start making request to the URL
url = "https://www.procyclingstats.com/" + row['rider_url']
future_session = session.get(url)
future_sessions.append(future_session)
# create rider objects from obtained HTMLs and store heights in
# riders_heights dict, make sure you don't use
# concurrent.futures.as_completed(future_sessions)
# in the for loop, because in that case the order of riders won't be
# preserved
riders_heights = {}
for i, future_session in enumerate(future_sessions):
html = future_session.result().text
rider = Rider(ranking[i]['rider_url'], html=html, update_html=False)
riders_heights[rider.relative_url()] = rider.height()
print("With requests_futures package:", time.time() - t1)
return riders_heights
def ranking_heights(ranking):
t1 = time.time()
riders_heights = {}
for row in ranking[:50]:
rider = Rider(row['rider_url'])
riders_heights[rider.relative_url()] = rider.height()
print("Without requests_futures package:", time.time() - t1)
return riders_heights
if __name__ == "__main__":
main()