kopia lustrzana https://github.com/bugout-dev/moonstream
Merge pull request #919 from moonstream-to/migrate-moonworm-tasks
Migrate moonworm taskspull/1011/head
commit
38947e1379
|
@ -473,7 +473,7 @@ def upload_abi_to_s3(
|
||||||
|
|
||||||
|
|
||||||
def get_all_entries_from_search(
|
def get_all_entries_from_search(
|
||||||
journal_id: str, search_query: str, limit: int, token: str
|
journal_id: str, search_query: str, limit: int, token: str, content: bool = False
|
||||||
) -> List[BugoutSearchResult]:
|
) -> List[BugoutSearchResult]:
|
||||||
"""
|
"""
|
||||||
Get all required entries from journal using search interface
|
Get all required entries from journal using search interface
|
||||||
|
@ -486,7 +486,7 @@ def get_all_entries_from_search(
|
||||||
token=token,
|
token=token,
|
||||||
journal_id=journal_id,
|
journal_id=journal_id,
|
||||||
query=search_query,
|
query=search_query,
|
||||||
content=False,
|
content=content,
|
||||||
timeout=10.0,
|
timeout=10.0,
|
||||||
limit=limit,
|
limit=limit,
|
||||||
offset=offset,
|
offset=offset,
|
||||||
|
@ -499,7 +499,7 @@ def get_all_entries_from_search(
|
||||||
token=token,
|
token=token,
|
||||||
journal_id=journal_id,
|
journal_id=journal_id,
|
||||||
query=search_query,
|
query=search_query,
|
||||||
content=False,
|
content=content,
|
||||||
timeout=10.0,
|
timeout=10.0,
|
||||||
limit=limit,
|
limit=limit,
|
||||||
offset=offset,
|
offset=offset,
|
||||||
|
@ -529,47 +529,45 @@ def apply_moonworm_tasks(
|
||||||
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
)
|
)
|
||||||
|
|
||||||
# create historical crawl task in journal
|
|
||||||
|
|
||||||
# will use create_entries_pack for creating entries in journal
|
# will use create_entries_pack for creating entries in journal
|
||||||
|
|
||||||
existing_tags = [entry.tags for entry in entries]
|
existing_tags = [entry.tags for entry in entries]
|
||||||
|
|
||||||
existing_hashes = [
|
existing_selectors = [
|
||||||
tag.split(":")[-1]
|
tag.split(":")[-1] for tag in chain(*existing_tags) if "abi_selector" in tag
|
||||||
for tag in chain(*existing_tags)
|
|
||||||
if "abi_method_hash" in tag
|
|
||||||
]
|
]
|
||||||
|
|
||||||
abi_hashes_dict = {
|
abi_selectors_dict = {
|
||||||
hashlib.md5(json.dumps(method).encode("utf-8")).hexdigest(): method
|
Web3.keccak(
|
||||||
|
text=method["name"]
|
||||||
|
+ "("
|
||||||
|
+ ",".join(map(lambda x: x["type"], method["inputs"]))
|
||||||
|
+ ")"
|
||||||
|
)[:4].hex(): method
|
||||||
for method in abi
|
for method in abi
|
||||||
if (method["type"] in ("event", "function"))
|
if (method["type"] in ("event", "function"))
|
||||||
and (method.get("stateMutability", "") != "view")
|
and (method.get("stateMutability", "") != "view")
|
||||||
}
|
}
|
||||||
|
|
||||||
for hash in abi_hashes_dict:
|
for abi_selector in abi_selectors_dict:
|
||||||
if hash not in existing_hashes:
|
if abi_selector not in existing_selectors:
|
||||||
abi_selector = Web3.keccak(
|
hash = hashlib.md5(
|
||||||
text=abi_hashes_dict[hash]["name"]
|
json.dumps(abi_selectors_dict[abi_selector]).encode("utf-8")
|
||||||
+ "("
|
).hexdigest()
|
||||||
+ ",".join(
|
|
||||||
map(lambda x: x["type"], abi_hashes_dict[hash]["inputs"])
|
|
||||||
)
|
|
||||||
+ ")"
|
|
||||||
)[:4].hex()
|
|
||||||
|
|
||||||
moonworm_abi_tasks_entries_pack.append(
|
moonworm_abi_tasks_entries_pack.append(
|
||||||
{
|
{
|
||||||
"title": address,
|
"title": address,
|
||||||
"content": json.dumps(abi_hashes_dict[hash], indent=4),
|
"content": json.dumps(
|
||||||
|
abi_selectors_dict[abi_selector], indent=4
|
||||||
|
),
|
||||||
"tags": [
|
"tags": [
|
||||||
f"address:{address}",
|
f"address:{address}",
|
||||||
f"type:{abi_hashes_dict[hash]['type']}",
|
f"type:{abi_selectors_dict[abi_selector]['type']}",
|
||||||
f"abi_method_hash:{hash}",
|
f"abi_method_hash:{hash}",
|
||||||
f"abi_selector:{abi_selector}",
|
f"abi_selector:{abi_selector}",
|
||||||
f"subscription_type:{subscription_type}",
|
f"subscription_type:{subscription_type}",
|
||||||
f"abi_name:{abi_hashes_dict[hash]['name']}",
|
f"abi_name:{abi_selectors_dict[abi_selector]['name']}",
|
||||||
f"status:active",
|
f"status:active",
|
||||||
f"task_type:moonworm",
|
f"task_type:moonworm",
|
||||||
f"moonworm_task_pickedup:False", # True if task picked up by moonworm-crawler(default each 120 sec)
|
f"moonworm_task_pickedup:False", # True if task picked up by moonworm-crawler(default each 120 sec)
|
||||||
|
|
|
@ -12,7 +12,12 @@ from sqlalchemy.orm import with_expression
|
||||||
|
|
||||||
from moonstreamdb.db import SessionLocal
|
from moonstreamdb.db import SessionLocal
|
||||||
|
|
||||||
from ..settings import BUGOUT_BROOD_URL, BUGOUT_SPIRE_URL, MOONSTREAM_APPLICATION_ID
|
from ..settings import (
|
||||||
|
BUGOUT_BROOD_URL,
|
||||||
|
BUGOUT_SPIRE_URL,
|
||||||
|
MOONSTREAM_APPLICATION_ID,
|
||||||
|
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
|
)
|
||||||
from ..web3_provider import yield_web3_provider
|
from ..web3_provider import yield_web3_provider
|
||||||
|
|
||||||
from . import subscription_types, subscriptions, moonworm_tasks, queries
|
from . import subscription_types, subscriptions, moonworm_tasks, queries
|
||||||
|
@ -20,6 +25,7 @@ from .migrations import (
|
||||||
checksum_address,
|
checksum_address,
|
||||||
update_dashboard_subscription_key,
|
update_dashboard_subscription_key,
|
||||||
generate_entity_subscriptions,
|
generate_entity_subscriptions,
|
||||||
|
add_selectors,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -87,6 +93,9 @@ steps:
|
||||||
- id: 20230501
|
- id: 20230501
|
||||||
name: fix_duplicates_keys_in_entity_subscription
|
name: fix_duplicates_keys_in_entity_subscription
|
||||||
description: Fix entity duplicates keys for all subscriptions introduced in 20230213
|
description: Fix entity duplicates keys for all subscriptions introduced in 20230213
|
||||||
|
- id: 20230904
|
||||||
|
name fill_missing_selectors_in_moonworm_tasks
|
||||||
|
description: Get all moonworm jobs from moonworm journal and add selector tag if it not represent
|
||||||
"""
|
"""
|
||||||
logger.info(entity_migration_overview)
|
logger.info(entity_migration_overview)
|
||||||
|
|
||||||
|
@ -117,6 +126,30 @@ def migrations_run(args: argparse.Namespace) -> None:
|
||||||
web3_session = yield_web3_provider()
|
web3_session = yield_web3_provider()
|
||||||
db_session = SessionLocal()
|
db_session = SessionLocal()
|
||||||
try:
|
try:
|
||||||
|
if args.id == 20230904:
|
||||||
|
step_order = [
|
||||||
|
"fill_missing_selectors_in_moonworm_tasks",
|
||||||
|
"deduplicate_moonworm_tasks",
|
||||||
|
]
|
||||||
|
step_map: Dict[str, Dict[str, Any]] = {
|
||||||
|
"upgrade": {
|
||||||
|
"fill_missing_selectors_in_moonworm_tasks": {
|
||||||
|
"action": add_selectors.fill_missing_selectors_in_moonworm_tasks,
|
||||||
|
"description": "Get all moonworm jobs from moonworm journal and add selector tag if it not represent",
|
||||||
|
},
|
||||||
|
"deduplicate_moonworm_tasks": {
|
||||||
|
"action": add_selectors.deduplicate_moonworm_task_by_selector,
|
||||||
|
"description": "Deduplicate moonworm tasks by selector",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"downgrade": {},
|
||||||
|
}
|
||||||
|
if args.command not in ["upgrade", "downgrade"]:
|
||||||
|
logger.info("Wrong command. Please use upgrade or downgrade")
|
||||||
|
step = args.step
|
||||||
|
|
||||||
|
migration_run(step_map, args.command, step, step_order)
|
||||||
|
|
||||||
if args.id == 20230501:
|
if args.id == 20230501:
|
||||||
# fix entity duplicates keys for all subscriptions introduced in 20230213
|
# fix entity duplicates keys for all subscriptions introduced in 20230213
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,187 @@
|
||||||
|
"""
|
||||||
|
Add selectors to all moonworm tasks.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
from bugout.exceptions import BugoutResponseException
|
||||||
|
from web3 import Web3
|
||||||
|
|
||||||
|
from ...settings import (
|
||||||
|
BUGOUT_REQUEST_TIMEOUT_SECONDS,
|
||||||
|
MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
|
)
|
||||||
|
from ...settings import bugout_client as bc
|
||||||
|
from ...actions import get_all_entries_from_search
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def fill_missing_selectors_in_moonworm_tasks() -> None:
|
||||||
|
"""
|
||||||
|
Add selectors to all moonworm tasks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
batch_size = 100
|
||||||
|
|
||||||
|
moonworm_tasks = get_all_entries_from_search(
|
||||||
|
journal_id=MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
search_query="#task_type:moonworm !#version:2.0",
|
||||||
|
limit=batch_size,
|
||||||
|
content=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(moonworm_tasks)} moonworm tasks versions 1.0")
|
||||||
|
|
||||||
|
entries_tags = []
|
||||||
|
|
||||||
|
## batch tasks
|
||||||
|
|
||||||
|
for task_batch in [
|
||||||
|
moonworm_tasks[i : i + batch_size]
|
||||||
|
for i in range(0, len(moonworm_tasks), batch_size)
|
||||||
|
]:
|
||||||
|
count = 0
|
||||||
|
for task in task_batch:
|
||||||
|
tags = ["version:2.0"]
|
||||||
|
|
||||||
|
## get abi
|
||||||
|
try:
|
||||||
|
abi = json.loads(task.content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warn(
|
||||||
|
f"Unable to parse abi from task: {task.entry_url.split()[-1]}: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "name" not in abi:
|
||||||
|
logger.warn(
|
||||||
|
f"Unable to find abi name in task: {task.entry_url.split()[-1]}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not any([tag.startswith("abi_selector:") for tag in task.tags]):
|
||||||
|
## generate selector
|
||||||
|
|
||||||
|
abi_selector = Web3.keccak(
|
||||||
|
text=abi["name"]
|
||||||
|
+ "("
|
||||||
|
+ ",".join(map(lambda x: x["type"], abi["inputs"]))
|
||||||
|
+ ")"
|
||||||
|
)[:4].hex()
|
||||||
|
|
||||||
|
tags.append(f"abi_selector:{abi_selector}")
|
||||||
|
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
entries_tags.append(
|
||||||
|
{
|
||||||
|
"entry_id": task.entry_url.split("/")[-1], ## 😭
|
||||||
|
"tags": tags,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found {count} missing selectors in batch {len(task_batch)} tasks")
|
||||||
|
|
||||||
|
## update entries
|
||||||
|
|
||||||
|
try:
|
||||||
|
bc.create_entries_tags(
|
||||||
|
journal_id=MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
entries_tags=entries_tags,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
except BugoutResponseException as e:
|
||||||
|
logger.error(f"Unable to update entries tags: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def deduplicate_moonworm_task_by_selector():
|
||||||
|
"""
|
||||||
|
Find moonworm tasks with same selector and remove old versions
|
||||||
|
"""
|
||||||
|
|
||||||
|
moonworm_tasks = get_all_entries_from_search(
|
||||||
|
journal_id=MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
search_query="#task_type:moonworm #version:2.0",
|
||||||
|
limit=100,
|
||||||
|
content=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(moonworm_tasks)} moonworm tasks versions 2.0")
|
||||||
|
|
||||||
|
## loop over tasks
|
||||||
|
|
||||||
|
selectors = {}
|
||||||
|
|
||||||
|
for task in moonworm_tasks:
|
||||||
|
tags = task.tags
|
||||||
|
|
||||||
|
## get selector
|
||||||
|
selector = [tag for tag in tags if tag.startswith("abi_selector:")]
|
||||||
|
|
||||||
|
address = [tag for tag in tags if tag.startswith("address:")]
|
||||||
|
|
||||||
|
if len(selector) == 0:
|
||||||
|
logger.warn(
|
||||||
|
f"Unable to find selector in task: {task.entry_url.split()[-1]}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
selector = selector[0].split(":")[1]
|
||||||
|
|
||||||
|
if len(address) == 0:
|
||||||
|
logger.warn(f"Unable to find address in task: {task.entry_url.split()[-1]}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
address = address[0].split(":")[1]
|
||||||
|
|
||||||
|
if address not in selectors:
|
||||||
|
selectors[address] = {}
|
||||||
|
|
||||||
|
if selector not in selectors[address]:
|
||||||
|
selectors[address][selector] = {"entries": {}}
|
||||||
|
|
||||||
|
selectors[address][selector]["entries"][
|
||||||
|
task.entry_url.split("/")[-1]
|
||||||
|
] = task.created_at
|
||||||
|
|
||||||
|
logger.info(f"Found {len(selectors)} addresses")
|
||||||
|
|
||||||
|
for address, selectors_dict in selectors.items():
|
||||||
|
for selector, tasks_dict in selectors_dict.items():
|
||||||
|
if len(tasks_dict["entries"]) == 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
## find earliest task
|
||||||
|
|
||||||
|
earliest_task_id = min(
|
||||||
|
tasks_dict["entries"], key=lambda key: tasks_dict["entries"][key]
|
||||||
|
)
|
||||||
|
|
||||||
|
## remove all tasks except latest
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Found {len(tasks_dict['entries'])} tasks with selector {selector} erliest task {earliest_task_id} with created_at: {tasks_dict['entries'][earliest_task_id]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
for task_id in tasks_dict["entries"]:
|
||||||
|
if task_id == earliest_task_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
bc.delete_entry(
|
||||||
|
journal_id=MOONSTREAM_MOONWORM_TASKS_JOURNAL,
|
||||||
|
entry_id=task_id,
|
||||||
|
token=MOONSTREAM_ADMIN_ACCESS_TOKEN,
|
||||||
|
)
|
||||||
|
except BugoutResponseException as e:
|
||||||
|
logger.error(f"Unable to delete entry with id {task_id} : {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Deleted entry: {task_id}")
|
Ładowanie…
Reference in New Issue