Examples

Here are some not obvious usage examples that might be usefull. For basic usage of concrete classes see docstrings of scraping classes on the API page.

Grouping climbs by stages

from pprint import pprint

from procyclingstats import Race, RaceClimbs, Stage

# RACE_URL can be replaced with any valid stage race URL
RACE_URL = "race/tour-de-france/2022"
race = Race(f"{RACE_URL}/overview")
race_climbs = RaceClimbs(f"{RACE_URL}/route/climbs")

stages = race.stages()
climbs_table = race_climbs.climbs()
# make dict to access climbs by their URLs
climbs = {climb['climb_url']: climb for climb in climbs_table}

stages_climbs = {}
# group climbs by stages
for stage_info in stages:
    stage = Stage(stage_info['stage_url'])
    stage_climbs = [climbs[s['climb_url']] for s in stage.climbs()]
    stages_climbs[stage_info['stage_url']] = stage_climbs
    
pprint(stages_climbs) 

Async usage

# Example of using procyclingstats package asynchronously. Third party
# "requests_futures" package is needed to run the example.
import time
from concurrent.futures import as_completed
from pprint import pprint

from requests_futures.sessions import FuturesSession

from procyclingstats import Ranking, Rider


def main():
    ranking = Ranking("rankings/me/individual-season").individual_ranking()
    # get heights of first 50 riders from the ranking asynchronously
    futures_heights = ranking_heights_future(ranking)
    # get heights of first 50 riders from the ranking synchronously
    heights = ranking_heights(ranking)
    pprint(futures_heights)

def ranking_heights_future(ranking):
    t1 = time.time()
    session = FuturesSession()
    # initialize list with future sessions
    future_sessions = []
    # make requests to all stage pages asynchronously and store them in a list
    for row in ranking[:50]:
        # create absolute URL from stage_url in the table
        # start making request to the URL
        url = "https://www.procyclingstats.com/" + row['rider_url']
        future_session = session.get(url)
        future_sessions.append(future_session)

    # create rider objects from obtained HTMLs and store heights in 
    # riders_heights dict, make sure you don't use
    # concurrent.futures.as_completed(future_sessions)
    # in the for loop, because in that case the order of riders won't be
    # preserved
    riders_heights = {}
    for i, future_session in enumerate(future_sessions):
        html = future_session.result().text
        rider = Rider(ranking[i]['rider_url'], html=html, update_html=False)
        riders_heights[rider.relative_url()] = rider.height()
    print("With requests_futures package:", time.time() - t1)
    return riders_heights
    
def ranking_heights(ranking):
    t1 = time.time()
    riders_heights = {}
    for row in ranking[:50]:
        rider = Rider(row['rider_url'])
        riders_heights[rider.relative_url()] = rider.height()
    print("Without requests_futures package:", time.time() - t1)
    return riders_heights

if __name__ == "__main__":
    main()