Datasets Ookla

download Ookla data


list_ookla_files


def list_ookla_files(
    
)->dict:

Get list of ookla data

ookla_files = list_ookla_files()
assert ookla_files.get(OoklaFile("fixed", "2021", "2"), None) is not None

download_ookla_file


def download_ookla_file(
    type_:str, # Internet connection type: 'fixed' or 'mobile'
    year:str, # Year (e.g. '2020')
    quarter:str, # Quarter (valid values: '1','2','3','4')
    directory:str='data/', # Download directory
    overwrite:bool=False, # Overwrite if existing
    show_progress:bool=True, # show progres bar
    chunksize:int=8192, # Download chunksize
    reporthook:NoneType=None, # Use custom progress bar
)->Optional:

Download ookla file to path


parallel_download


def parallel_download(
    item
):

download_ookla_parallel


def download_ookla_parallel(
    num_expected_ookla_files, type_, year, directory, overwrite, show_progress, chunksize, reporthook
):

download_ookla_year_data


def download_ookla_year_data(
    type_, year, cache_dir, use_cache:bool=True, show_progress:bool=True, chunksize:int=8192,
    reporthook:NoneType=None
):

Download ookla data for a specifed type (fixed or mobile) and year. Data for all 4 quarters will be downloaded.


lookup_ookla_file


def lookup_ookla_file(
    filename
):

Get OoklaFile for the given filename

assert lookup_ookla_file("2021-04-01_performance_fixed_tiles.parquet") == OoklaFile(
    "fixed", "2021", "2"
)

compute_datakey


def compute_datakey(
    aoi_bounds, type_, year, return_geometry
):

write_ookla_metajson


def write_ookla_metajson(
    cache_dir, data_key, total_bounds, type_, year, return_geometry
):

OoklaDataManager


def OoklaDataManager(
    cache_dir:str='~/.cache/geowrangler'
):

An instance of this class provides convenience functoins for loading and caching Ookla data


OoklaDataManager.reinitialize_processed_cache


def reinitialize_processed_cache(
    
):

Reinitialize processed_cache_dir to start over from scratch.


OoklaDataManager.reinitialize_aggregated_cache


def reinitialize_aggregated_cache(
    
):

Reinitialize aggregated_cache_dir to start over from scratch.


OoklaDataManager.load_type_year_data


def load_type_year_data(
    aoi:GeoDataFrame, # area of interest
    type_:str, # ookla data type: fixed or mobile
    year:str, # year
    use_cache:bool=True, # use cache dir
    return_geometry:bool=False, # include geometry in returned values
    show_progress:bool=True, # display progress bar
    chunksize:int=8192, # download buffer size
    reporthook:NoneType=None, # custom progress bar
):

Load Ookla data across all quarters for a specified aoi, type (fixed, mobile) and year


OoklaDataManager.aggregate_ookla_features


def aggregate_ookla_features(
    aoi:GeoDataFrame, # Area of interest
    type_:str, # Ookla speed type: 'fixed` or `mobile`
    year:str, # Year to aggregate (over 4 quarters)
    use_cache:bool=True, # Use cached data in cache dir as specified in ookla_data_manager
    return_geometry:bool=False, # Save aggregated data as geojson
    output_crs:str='epsg:4326', # crs to use in creating aggregated geodataframe
    aggregations:Dict={'mean_avg_d_kbps': ('avg_d_kbps', 'mean'), 'mean_avg_u_kbps': ('avg_u_kbps', 'mean'), 'mean_avg_lat_ms': ('avg_lat_ms', 'mean'), 'mean_num_tests': ('tests', 'mean'), 'mean_num_devices': ('devices', 'mean')}, # Aggregation functions on ookla data (see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.agg.html)
    show_progress:bool=True, # display progress bar
    chunksize:int=8192, # download buffer size
    reporthook:NoneType=None, # custom progress bar
):

Generates yearly aggregate features for the AOI based on Ookla data for a given type (fixed, mobile) and year.