kopia lustrzana https://github.com/bugout-dev/moonstream
Python implementation of Ethereum Signature Database crawler
rodzic
8acebb7c8a
commit
ef9c73c554
|
@ -1,197 +0,0 @@
|
|||
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/node,linux,windows,osx
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=node,linux,windows,osx
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### Node ###
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
.env
|
||||
.env.test
|
||||
.env.production
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
|
||||
### OSX ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/node,linux,windows,osx
|
||||
|
||||
package-lock.json
|
|
@ -1 +0,0 @@
|
|||
v14.17.3
|
|
@ -1,42 +0,0 @@
|
|||
const fetch = require('node-fetch')
|
||||
const fs = require("fs")
|
||||
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function makeRequest(url, sleepTime) {
|
||||
if (sleepTime)
|
||||
sleep(sleepTime)
|
||||
let response = await fetch(url);
|
||||
let json = await response.json();
|
||||
return json;
|
||||
}
|
||||
|
||||
async function crawlFunctionSignatures() {
|
||||
let url = "https://www.4byte.directory/api/v1/signatures/";
|
||||
let response = await makeRequest(url)
|
||||
let signatures = response.results;
|
||||
while (response.next) {
|
||||
response = await makeRequest(response.next)
|
||||
signatures = signatures.concat(response.results)
|
||||
console.log(`already crawled : ${signatures.length}`)
|
||||
}
|
||||
fs.writeFileSync("function_signatures.json", JSON.stringify(signatures))
|
||||
}
|
||||
|
||||
async function crawlEventSignatures() {
|
||||
let url = "https://www.4byte.directory/api/v1/event-signatures/";
|
||||
let response = await makeRequest(url)
|
||||
let signatures = response.results;
|
||||
while (response.next) {
|
||||
response = await makeRequest(response.next)
|
||||
signatures = signatures.concat(response.results)
|
||||
console.log(`already crawled : ${signatures.length}`)
|
||||
}
|
||||
fs.writeFileSync("event_signatures.json", JSON.stringify(signatures))
|
||||
}
|
||||
|
||||
//crawlFunctionSignatures()
|
||||
//crawlEventSignatures();
|
|
@ -1,57 +0,0 @@
|
|||
const sqlite3 = require('sqlite3').verbose()
|
||||
const fs = require('fs')
|
||||
let db = new sqlite3.Database('./signatures.db', sqlite3.OPEN_READWRITE, (err) => {
|
||||
if (err) {
|
||||
console.error(err.message);
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
function put_func_signatures_to_db() {
|
||||
db.serialize(() => {
|
||||
db.run('create table if not exists '
|
||||
+ 'function_signatures('
|
||||
+ 'id numeric primary key,'
|
||||
+ 'text_signature text,'
|
||||
+ 'hex_signature text)')
|
||||
|
||||
let stmt = db.prepare('insert into function_signatures values (?, ?, ?)')
|
||||
let function_signatures = JSON.parse(fs.readFileSync("./function_signatures.json"))
|
||||
function_signatures.forEach((item) => {
|
||||
try {
|
||||
stmt.run([item.id, item.text_signature, item.hex_signature])
|
||||
|
||||
}
|
||||
catch(err) {
|
||||
console.log(item)
|
||||
console.log(err)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function put_event_signatures_to_db() {
|
||||
db.serialize(() => {
|
||||
db.run('create table if not exists '
|
||||
+ 'event_signatures('
|
||||
+ 'id numeric primary key,'
|
||||
+ 'text_signature text,'
|
||||
+ 'hex_signature text)')
|
||||
|
||||
let stmt = db.prepare('insert into event_signatures values (?, ?, ?)')
|
||||
let function_signatures = JSON.parse(fs.readFileSync("./event_signatures.json"))
|
||||
function_signatures.forEach((item) => {
|
||||
try {
|
||||
stmt.run([item.id, item.text_signature, item.hex_signature])
|
||||
|
||||
}
|
||||
catch(err) {
|
||||
console.log(item)
|
||||
console.log(err)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
//put_func_signatures_to_db()
|
||||
//put_event_signatures_to_db()
|
|
@ -1,16 +0,0 @@
|
|||
{
|
||||
"name": "@bugout/4byteCrawler",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"csv-parser": "^3.0.0",
|
||||
"node-fetch": "^2.6.1",
|
||||
"sqlite3": "^5.0.2"
|
||||
}
|
||||
}
|
Plik binarny nie jest wyświetlany.
|
@ -0,0 +1,171 @@
|
|||
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
*.code-workspace
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
.ionide
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
|
||||
|
||||
# Custom
|
||||
dev.env
|
||||
prod.env
|
||||
alembic.dev.ini
|
||||
alembic.prod.ini
|
||||
.db/
|
||||
.venv/
|
||||
.esd/
|
||||
.secrets/
|
|
@ -0,0 +1,5 @@
|
|||
# Crawler: Ethereum Signature Database
|
||||
|
||||
This crawler retrieves Ethereum function signatures from the Ethereum Signature Database at
|
||||
[https://4byte.directory](https://4byte.directory).
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
import argparse
|
||||
import sys
|
||||
import time
|
||||
from typing import Optional, Union
|
||||
|
||||
from moonstreamdb.db import yield_db_session_ctx
|
||||
from moonstreamdb.models import ESDEventSignature, ESDFunctionSignature
|
||||
from sqlalchemy.orm import Session
|
||||
import requests
|
||||
|
||||
CRAWL_URLS = {
|
||||
"functions": "https://www.4byte.directory/api/v1/signatures/",
|
||||
"events": "https://www.4byte.directory/api/v1/event-signatures/",
|
||||
}
|
||||
|
||||
DB_MODELS = {
|
||||
"functions": ESDFunctionSignature,
|
||||
"events": ESDEventSignature,
|
||||
}
|
||||
|
||||
def crawl_step(db_session: Session, crawl_url: str, db_model: Union[ESDEventSignature, ESDFunctionSignature]) -> Optional[str]:
|
||||
attempt = 0
|
||||
current_interval = 2
|
||||
success = False
|
||||
|
||||
response: Optional[requests.Response] = None
|
||||
while (not success) and attempt < 3:
|
||||
attempt += 1
|
||||
try:
|
||||
response = requests.get(crawl_url)
|
||||
response.raise_for_status()
|
||||
success = True
|
||||
except:
|
||||
current_interval *= 2
|
||||
time.sleep(current_interval)
|
||||
|
||||
if response is None:
|
||||
print(f"Could not process URL: {crawl_url}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
page = response.json()
|
||||
results = page.get("results", [])
|
||||
|
||||
rows = [db_model(id=row.get("id"), text_signature=row.get("text_signature"), hex_signature=row.get("hex_signature"), created_at=row.get("created_at")) for row in results]
|
||||
db_session.bulk_save_objects(rows)
|
||||
db_session.commit()
|
||||
|
||||
return page.get("next")
|
||||
|
||||
def crawl(crawl_type: str, interval: float) -> None:
|
||||
crawl_url: Optional[str] = CRAWL_URLS[crawl_type]
|
||||
db_model = DB_MODELS[crawl_type]
|
||||
with yield_db_session_ctx() as db_session:
|
||||
while crawl_url is not None:
|
||||
print(f"Crawling: {crawl_url}")
|
||||
crawl_url = crawl_step(db_session, crawl_url, db_model)
|
||||
time.sleep(interval)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Crawls function and event signatures from the Ethereum Signature Database (https://www.4byte.directory/)")
|
||||
parser.add_argument("crawl_type", choices=CRAWL_URLS, help="Specifies whether to crawl function signatures or event signatures")
|
||||
parser.add_argument("--interval", type=float, default=0.1, help="Number of seconds to wait between requests to the Ethereum Signature Database API")
|
||||
args = parser.parse_args()
|
||||
|
||||
crawl(args.crawl_type, args.interval)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,15 @@
|
|||
alembic==1.6.5
|
||||
certifi==2021.5.30
|
||||
charset-normalizer==2.0.3
|
||||
greenlet==1.1.0
|
||||
idna==3.2
|
||||
Mako==1.1.4
|
||||
MarkupSafe==2.0.1
|
||||
-e git+ssh://git@github.com/zomglings/moonstock.git@8acebb7c8a1872cd0a9c2b663f86be3877a20636#egg=moonstreamdb&subdirectory=db
|
||||
psycopg2-binary==2.9.1
|
||||
python-dateutil==2.8.2
|
||||
python-editor==1.0.4
|
||||
requests==2.26.0
|
||||
six==1.16.0
|
||||
SQLAlchemy==1.4.22
|
||||
urllib3==1.26.6
|
|
@ -0,0 +1 @@
|
|||
export EXPLORATION_DB_URI="postgresql://<username>:<password>@<db_host>:<db_port>/<db_name>"
|
Ładowanie…
Reference in New Issue