From e25fbf2e98a900c600ef2804786728f33b30af1c Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 29 Apr 2024 13:30:38 +0300 Subject: [PATCH] Add changes. --- .../mooncrawl/leaderboards_generator/cli.py | 2 +- .../mooncrawl/leaderboards_generator/utils.py | 24 +++++++++++++++++++ crawlers/mooncrawl/mooncrawl/settings.py | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/crawlers/mooncrawl/mooncrawl/leaderboards_generator/cli.py b/crawlers/mooncrawl/mooncrawl/leaderboards_generator/cli.py index fdec37de..7ad2932e 100644 --- a/crawlers/mooncrawl/mooncrawl/leaderboards_generator/cli.py +++ b/crawlers/mooncrawl/mooncrawl/leaderboards_generator/cli.py @@ -191,7 +191,7 @@ def handle_leaderboards(args: argparse.Namespace) -> None: records = [ {key: to_json_types(value) for key, value in row._asdict().items()} - for row in db_session.execute(text(query["query"]), params).all() + for row in results.all() ] leaderboard_push_api_url = f"{MOONSTREAM_ENGINE_URL}/leaderboard/{leaderboard_id}/scores?normalize_addresses={leaderboard_data['normalize_addresses']}&overwrite=true" diff --git a/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py b/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py index 78cc4c91..311b58c2 100644 --- a/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py +++ b/crawlers/mooncrawl/mooncrawl/leaderboards_generator/utils.py @@ -158,8 +158,13 @@ def get_data_from_url(url): def send_data_to_endpoint(chunks, endpoint_url, headers, timeout=10): for index, chunk in enumerate(chunks): + retry = 0 try: logger.info(f"Pushing chunk {index} to leaderboard API") + # calculate MB size of the chunk + + logger.info(f"Chunk size: {len(json.dumps(chunk))/1024/1024} MB") + response = requests.put( endpoint_url, headers=headers, json=chunk, timeout=timeout ) @@ -169,6 +174,22 @@ def send_data_to_endpoint(chunks, endpoint_url, headers, timeout=10): logger.error( f"Could not push results to leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" ) + + logger.error("Chunk size is too big, reducing the batch size and retrying") + + while retry < 3: + try: + response = requests.put( + endpoint_url, headers=headers, json=chunk, timeout=timeout + ) + response.raise_for_status() + break + except requests.exceptions.HTTPError as http_error: + logger.error( + f"Could not push results to leaderboard API: {http_error.response.text} with status code {http_error.response.status_code}" + ) + retry += 1 + raise http_error @@ -257,4 +278,7 @@ def leaderboard_push_batch( logger.error( f"Could not delete leaderboard version: {http_error.response.text} with status code {http_error.response.status_code}" ) + logger.error( + f"Leaderboard version {leaderboard_version_delete_api_url} was not deleted" + ) return diff --git a/crawlers/mooncrawl/mooncrawl/settings.py b/crawlers/mooncrawl/mooncrawl/settings.py index bc304043..eeba9246 100644 --- a/crawlers/mooncrawl/mooncrawl/settings.py +++ b/crawlers/mooncrawl/mooncrawl/settings.py @@ -383,5 +383,5 @@ if MOONSTREAM_LEADERBOARD_GENERATOR_JOURNAL_ID == "": ) -MOONSTREAM_LEADERBOARD_GENERATOR_BATCH_SIZE = 10000 +MOONSTREAM_LEADERBOARD_GENERATOR_BATCH_SIZE = 1000 MOONSTREAM_LEADERBOARD_GENERATOR_PUSH_TIMEOUT_SECONDS = 60