diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py index 04fc2ed9..d78ee5ea 100644 --- a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/cli.py @@ -28,6 +28,9 @@ from .crawler import ( from .db import get_first_labeled_block_number, get_last_labeled_block_number from .historical_crawler import historical_crawler +from .reorg_recrawl import reorg_scan, update_reorg_labels + + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -341,6 +344,41 @@ def handle_historical_crawl(args: argparse.Namespace) -> None: ) +def handle_reorg_scan(args: argparse.Namespace) -> None: + blockchain_type = AvailableBlockchainType(args.blockchain_type) + + with yield_db_session_ctx() as db_session: + reorg_labels = reorg_scan(db_session, blockchain_type) + + update_reorg_labels(db_session, blockchain_type, reorg_labels) + + if args.recrawl: + handle_historical_crawl( + args=argparse.Namespace( + access_id=args.access_id, + blockchain_type=blockchain_type.value, + web3=None, + poa=args.poa, + max_blocks_batch=80, + min_sleep_time=0.1, + force=True, + only_events=False, + only_functions=False, + find_deployed_blocks=False, + tasks_journal=False, + address=None, + start=reorg_labels["start"], + end=reorg_labels["end"], + ) + ) + + logger.info( + f"Reorg scan finished. Recrawled from {reorg_labels['start']} to {reorg_labels['end']}" + ) + + clear_reorg_labels(db_session, blockchain_type) + + def main() -> None: parser = argparse.ArgumentParser() parser.set_defaults(func=lambda _: parser.print_help()) @@ -536,6 +574,32 @@ def main() -> None: ) historical_crawl_parser.set_defaults(func=handle_historical_crawl) + reorg_parser = subparsers.add_parser( + "reorg-scan", help="Detect reorgs in the database" + ) + + reorg_parser.add_argument( + "--blockchain-type", + "-b", + type=str, + help=f"Available blockchain types: {[member.value for member in AvailableBlockchainType]}", + ) + reorg_parser.add_argument( + "--poa", + action="store_true", + default=False, + help="Use PoA middleware", + ) + + reorg_parser.add_argument( + "--recrawl", + action="store_true", + default=False, + help="Recrawl blocks that were affected by reorg", + ) + + reorg_parser.set_defaults(func=handle_reorg_scan) + args = parser.parse_args() args.func(args) diff --git a/crawlers/mooncrawl/mooncrawl/moonworm_crawler/reorg_recrawl.py b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/reorg_recrawl.py new file mode 100644 index 00000000..50e31455 --- /dev/null +++ b/crawlers/mooncrawl/mooncrawl/moonworm_crawler/reorg_recrawl.py @@ -0,0 +1,32 @@ +import logging +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple + +from moonstreamdb.blockchain import AvailableBlockchainType, get_block_model +from moonworm.crawler.log_scanner import _fetch_events_chunk, _crawl_events as moonworm_autoscale_crawl_events # type: ignore +from sqlalchemy.orm.session import Session +from sqlalchemy.sql.expression import and_ +from web3 import Web3 + +from .crawler import EventCrawlJob + + +def reorg_scan( + db_session, + blockchain_type: AvailableBlockchainType, +): + """ + Cheks for reorgs labels in database + """ + pass + + +def update_reorg_labels( + db_session, + blockchain_type: AvailableBlockchainType, + reorg_labels: Any, +): + """ + Change label to reorg + """ + pass