Replaced postgresql by elastic search

main
Štěpán Škorpil 2022-09-14 21:16:04 +02:00
rodzic 7eb28e5d84
commit f6f9d758a1
81 zmienionych plików z 5764 dodań i 8406 usunięć

Wyświetl plik

@ -1,20 +1,22 @@
FROM node:16-bullseye AS build FROM node:16-bullseye AS build
ENV POSTGRES_URL='postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public' \ ENV ELASTIC_URL='http://elastic:9200' \
ELASTIC_USER='elastic' \
ELASTIC_PASSWORD='' \
SEED_NODE_DOMAIN='mastodon.social' \ SEED_NODE_DOMAIN='mastodon.social' \
REATTEMPT_MINUTES='60' \ REATTEMPT_MINUTES='60' \
REFRESH_HOURS='120' \ REFRESH_HOURS='120' \
WAIT_FOR_JOB_MINUTES='60' \ WAIT_FOR_JOB_MINUTES='60' \
DEFAULT_TIMEOUT_MILLISECONDS='10000' \ DEFAULT_TIMEOUT_MILLISECONDS='10000' \
BANNED_DOMAINS='' \
TZ='UTC' TZ='UTC'
WORKDIR /srv WORKDIR /srv
COPY application/package*.json ./ COPY application/package*.json ./
COPY application/prisma ./prisma/
RUN npm install RUN npm install
COPY application/. . COPY application/. .
RUN npm run build RUN npm run build
FROM build AS dev FROM build AS dev
CMD npm run dev CMD npx tsc --watch
FROM node:16-bullseye AS prod FROM node:16-bullseye AS prod
RUN groupadd -g 1001 nodejs RUN groupadd -g 1001 nodejs
@ -22,7 +24,6 @@ RUN useradd -u 1001 -g 1001 nextjs
WORKDIR /srv WORKDIR /srv
USER nextjs USER nextjs
COPY --from=build /srv/node_modules ./node_modules COPY --from=build /srv/node_modules ./node_modules
COPY --from=build /srv/prisma ./prisma
COPY --from=build /srv/package*.json ./ COPY --from=build /srv/package*.json ./
COPY --from=build /srv/dist ./dist COPY --from=build /srv/dist ./dist
CMD npm run start:deploy CMD npm run start:deploy

Wyświetl plik

@ -24,15 +24,18 @@ Data providers for more apps will be probably added soon (Pull requests are welc
Configuration is done using environmental variables: Configuration is done using environmental variables:
| Variable | Description | Default value / Example value | | Variable | Description | Default value / Example value |
|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------| |--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------|
| `POSTGRES_URL` | Postgres database uri | `postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public` | | `ELASTIC_URL` | Url address of ElasticSearch server | `http://elastic:9200` |
| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` | | `ELASTIC_USER` | Username for EalsticSearch server | `elastic` |
| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` | | `ELASTIC_PASSWORD` | Username for EalsticSearch server | empty |
| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` | | `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` |
| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` | | `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` |
| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` | | `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` |
| `TZ` | _Optional_, Timezone | `UTC` | | `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` |
| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` |
| `BANNED_DOMAINS` | _Optional_, Domains not to index (even with subdomains) | _empty_ |
| `TZ` | _Optional_, Timezone | `UTC` |
## Deploy ## Deploy
App is designed to be run in docker container and deployed using docker-compose. App is designed to be run in docker container and deployed using docker-compose.
More info can be found in [FediSearch example docker-compose](https://github.com/Stopka/fedisearch-compose) project More info can be found in [FediSearch example docker-compose](https://github.com/Stopka/fedisearch-compose) project

12062
application/package-lock.json wygenerowano

Plik diff jest za duży Load Diff

Wyświetl plik

@ -19,18 +19,12 @@
"test:cov": "jest --coverage", "test:cov": "jest --coverage",
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
"test:e2e": "jest --config ./test/jest-e2e.json", "test:e2e": "jest --config ./test/jest-e2e.json",
"migrate:dev": "prisma migrate dev",
"migrate:dev:create": "prisma migrate dev --create-only",
"migrate:deploy": "npx prisma migrate deploy",
"migrate:reset": "npx prisma migrate reset",
"migrate:resolve": "npx prisma migrate resolve",
"prisma:generate": "npx prisma generate",
"prisma:studio": "npx prisma studio",
"start:deploy": "npm run migrate:deploy && npm run start" "start:deploy": "npm run migrate:deploy && npm run start"
}, },
"dependencies": { "dependencies": {
"@prisma/client": "^3.6.0", "@elastic/elasticsearch": "^8.2.1",
"axios": "^0.21.1", "axios": "^0.21.1",
"geoip-lite": "^1.4.6",
"npmlog": "^6.0.0", "npmlog": "^6.0.0",
"rimraf": "^3.0.2", "rimraf": "^3.0.2",
"striptags": "^3.2.0", "striptags": "^3.2.0",
@ -38,6 +32,7 @@
"zod": "^3.11.6" "zod": "^3.11.6"
}, },
"devDependencies": { "devDependencies": {
"@types/geoip-lite": "^1.4.1",
"@types/jest": "^27.0.2", "@types/jest": "^27.0.2",
"@types/node": "^16.11.10", "@types/node": "^16.11.10",
"@types/npmlog": "^4.1.3", "@types/npmlog": "^4.1.3",
@ -50,7 +45,6 @@
"eslint-plugin-promise": "^5.1.1", "eslint-plugin-promise": "^5.1.1",
"eslint-plugin-react": "^7.27.1", "eslint-plugin-react": "^7.27.1",
"jest": "^27.3.0", "jest": "^27.3.0",
"prisma": "^3.6.0",
"standard": "*", "standard": "*",
"ts-jest": "^27.0.7", "ts-jest": "^27.0.7",
"typescript": "^4.3.5" "typescript": "^4.3.5"

Wyświetl plik

@ -1,174 +0,0 @@
-- CreateEnum
CREATE TYPE "FeedType" AS ENUM ('account', 'channel');
-- CreateTable
CREATE TABLE "Tag" (
"id" UUID NOT NULL,
"name" TEXT NOT NULL,
CONSTRAINT "Tag_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Email" (
"id" UUID NOT NULL,
"address" TEXT NOT NULL,
"feedId" UUID NOT NULL,
CONSTRAINT "Email_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "FeedToTag" (
"feedId" UUID NOT NULL,
"tagId" UUID NOT NULL,
CONSTRAINT "FeedToTag_pkey" PRIMARY KEY ("feedId","tagId")
);
-- CreateTable
CREATE TABLE "Field" (
"id" UUID NOT NULL,
"name" TEXT NOT NULL,
"value" TEXT NOT NULL,
"feedId" UUID NOT NULL,
CONSTRAINT "Field_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Feed" (
"id" UUID NOT NULL,
"nodeId" UUID NOT NULL,
"foundAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"refreshedAt" TIMESTAMP(3) NOT NULL,
"name" TEXT NOT NULL,
"displayName" TEXT NOT NULL,
"description" TEXT NOT NULL,
"followersCount" INTEGER NOT NULL,
"followingCount" INTEGER NOT NULL,
"statusesCount" INTEGER,
"bot" BOOLEAN,
"url" TEXT NOT NULL,
"avatar" TEXT,
"locked" BOOLEAN NOT NULL,
"lastStatusAt" TIMESTAMP(3),
"createdAt" TIMESTAMP(3) NOT NULL,
"type" "FeedType" NOT NULL DEFAULT E'account',
"parentFeedName" TEXT,
"parentFeedDomain" TEXT,
"fulltext" TEXT NOT NULL DEFAULT E'',
CONSTRAINT "Feed_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "Node" (
"id" UUID NOT NULL,
"softwareName" TEXT,
"softwareVersion" TEXT,
"totalUserCount" INTEGER,
"monthActiveUserCount" INTEGER,
"halfYearActiveUserCount" INTEGER,
"statusesCount" INTEGER,
"openRegistrations" BOOLEAN,
"foundAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"refreshedAt" TIMESTAMP(3),
"refreshAttemptedAt" TIMESTAMP(3),
"domain" TEXT NOT NULL,
CONSTRAINT "Node_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- AddForeignKey
ALTER TABLE "Email" ADD CONSTRAINT "Email_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_tagId_fkey" FOREIGN KEY ("tagId") REFERENCES "Tag"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Field" ADD CONSTRAINT "Field_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Feed" ADD CONSTRAINT "Feed_nodeId_fkey" FOREIGN KEY ("nodeId") REFERENCES "Node"("id") ON DELETE RESTRICT ON UPDATE CASCADE;

Wyświetl plik

@ -1,163 +0,0 @@
/*
Warnings:
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
*/
-- DropIndex
DROP INDEX "Email_address_idx";
-- DropIndex
DROP INDEX "Feed_bot_idx";
-- DropIndex
DROP INDEX "Feed_createdAt_idx";
-- DropIndex
DROP INDEX "Feed_description_idx";
-- DropIndex
DROP INDEX "Feed_displayName_idx";
-- DropIndex
DROP INDEX "Feed_fulltext_idx";
-- DropIndex
DROP INDEX "Feed_lastStatusAt_idx";
-- DropIndex
DROP INDEX "Feed_locked_idx";
-- DropIndex
DROP INDEX "Feed_name_nodeId_key";
-- DropIndex
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
-- DropIndex
DROP INDEX "Feed_refreshedAt_idx";
-- DropIndex
DROP INDEX "Feed_type_idx";
-- DropIndex
DROP INDEX "Field_name_idx";
-- DropIndex
DROP INDEX "Field_value_idx";
-- DropIndex
DROP INDEX "Node_domain_key";
-- DropIndex
DROP INDEX "Node_foundAt_idx";
-- DropIndex
DROP INDEX "Node_halfYearActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_monthActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_openRegistrations_idx";
-- DropIndex
DROP INDEX "Node_refreshAttemptedAt_idx";
-- DropIndex
DROP INDEX "Node_refreshedAt_idx";
-- DropIndex
DROP INDEX "Node_softwareName_idx";
-- DropIndex
DROP INDEX "Node_softwareVersion_idx";
-- DropIndex
DROP INDEX "Node_statusesCount_idx";
-- DropIndex
DROP INDEX "Node_totalUserCount_idx";
-- DropIndex
DROP INDEX "Tag_name_key";
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");

Wyświetl plik

@ -1,167 +0,0 @@
/*
Warnings:
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
*/
-- DropIndex
DROP INDEX "Email_address_idx";
-- DropIndex
DROP INDEX "Feed_bot_idx";
-- DropIndex
DROP INDEX "Feed_createdAt_idx";
-- DropIndex
DROP INDEX "Feed_description_idx";
-- DropIndex
DROP INDEX "Feed_displayName_idx";
-- DropIndex
DROP INDEX "Feed_fulltext_idx";
-- DropIndex
DROP INDEX "Feed_lastStatusAt_idx";
-- DropIndex
DROP INDEX "Feed_locked_idx";
-- DropIndex
DROP INDEX "Feed_name_nodeId_key";
-- DropIndex
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
-- DropIndex
DROP INDEX "Feed_refreshedAt_idx";
-- DropIndex
DROP INDEX "Feed_type_idx";
-- DropIndex
DROP INDEX "Field_name_idx";
-- DropIndex
DROP INDEX "Field_value_idx";
-- DropIndex
DROP INDEX "Node_domain_key";
-- DropIndex
DROP INDEX "Node_foundAt_idx";
-- DropIndex
DROP INDEX "Node_halfYearActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_monthActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_openRegistrations_idx";
-- DropIndex
DROP INDEX "Node_refreshAttemptedAt_idx";
-- DropIndex
DROP INDEX "Node_refreshedAt_idx";
-- DropIndex
DROP INDEX "Node_softwareName_idx";
-- DropIndex
DROP INDEX "Node_softwareVersion_idx";
-- DropIndex
DROP INDEX "Node_statusesCount_idx";
-- DropIndex
DROP INDEX "Node_totalUserCount_idx";
-- DropIndex
DROP INDEX "Tag_name_key";
-- AlterTable
ALTER TABLE "Feed" ALTER COLUMN "followersCount" DROP NOT NULL,
ALTER COLUMN "followingCount" DROP NOT NULL;
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'pleroma';

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'misskey';

Wyświetl plik

@ -1,3 +0,0 @@
update "Node"
set "softwareName"=lower("softwareName")
where true;

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'hometown';

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'ecko';

Wyświetl plik

@ -1,193 +0,0 @@
/*
Warnings:
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
*/
-- DropForeignKey
ALTER TABLE "Email" DROP CONSTRAINT "Email_feedId_fkey";
-- DropForeignKey
ALTER TABLE "Feed" DROP CONSTRAINT "Feed_nodeId_fkey";
-- DropForeignKey
ALTER TABLE "FeedToTag" DROP CONSTRAINT "FeedToTag_feedId_fkey";
-- DropForeignKey
ALTER TABLE "FeedToTag" DROP CONSTRAINT "FeedToTag_tagId_fkey";
-- DropForeignKey
ALTER TABLE "Field" DROP CONSTRAINT "Field_feedId_fkey";
-- DropIndex
DROP INDEX "Email_address_idx";
-- DropIndex
DROP INDEX "Feed_bot_idx";
-- DropIndex
DROP INDEX "Feed_createdAt_idx";
-- DropIndex
DROP INDEX "Feed_description_idx";
-- DropIndex
DROP INDEX "Feed_displayName_idx";
-- DropIndex
DROP INDEX "Feed_fulltext_idx";
-- DropIndex
DROP INDEX "Feed_lastStatusAt_idx";
-- DropIndex
DROP INDEX "Feed_locked_idx";
-- DropIndex
DROP INDEX "Feed_name_nodeId_key";
-- DropIndex
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
-- DropIndex
DROP INDEX "Feed_refreshedAt_idx";
-- DropIndex
DROP INDEX "Feed_type_idx";
-- DropIndex
DROP INDEX "Field_name_idx";
-- DropIndex
DROP INDEX "Field_value_idx";
-- DropIndex
DROP INDEX "Node_domain_key";
-- DropIndex
DROP INDEX "Node_foundAt_idx";
-- DropIndex
DROP INDEX "Node_halfYearActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_monthActiveUserCount_idx";
-- DropIndex
DROP INDEX "Node_openRegistrations_idx";
-- DropIndex
DROP INDEX "Node_refreshAttemptedAt_idx";
-- DropIndex
DROP INDEX "Node_refreshedAt_idx";
-- DropIndex
DROP INDEX "Node_softwareName_idx";
-- DropIndex
DROP INDEX "Node_softwareVersion_idx";
-- DropIndex
DROP INDEX "Node_statusesCount_idx";
-- DropIndex
DROP INDEX "Node_totalUserCount_idx";
-- DropIndex
DROP INDEX "Tag_name_key";
-- CreateIndex
CREATE INDEX "Email_address_idx" ON "Email"("address");
-- CreateIndex
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
-- CreateIndex
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
-- CreateIndex
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
-- CreateIndex
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
-- CreateIndex
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
-- CreateIndex
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
-- CreateIndex
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
-- CreateIndex
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
-- CreateIndex
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
-- CreateIndex
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
-- CreateIndex
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
-- CreateIndex
CREATE INDEX "Field_name_idx" ON "Field"("name");
-- CreateIndex
CREATE INDEX "Field_value_idx" ON "Field"("value");
-- CreateIndex
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
-- CreateIndex
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
-- CreateIndex
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
-- CreateIndex
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
-- CreateIndex
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
-- CreateIndex
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
-- CreateIndex
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
-- CreateIndex
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
-- CreateIndex
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
-- CreateIndex
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
-- CreateIndex
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
-- AddForeignKey
ALTER TABLE "Email" ADD CONSTRAINT "Email_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_tagId_fkey" FOREIGN KEY ("tagId") REFERENCES "Tag"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Field" ADD CONSTRAINT "Field_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Feed" ADD CONSTRAINT "Feed_nodeId_fkey" FOREIGN KEY ("nodeId") REFERENCES "Node"("id") ON DELETE CASCADE ON UPDATE CASCADE;

Wyświetl plik

@ -1,4 +0,0 @@
update "Node"
set "refreshedAt"=NULL,
"refreshAttemptedAt"=NULL
where "Node"."softwareName" like 'friendica';

Wyświetl plik

@ -1,3 +0,0 @@
# Please do not edit this file manually
# It should be added in your version-control system (i.e. Git)
provider = "postgresql"

Wyświetl plik

@ -1,115 +0,0 @@
datasource db {
url = env("POSTGRES_URL")
provider = "postgresql"
}
generator client {
provider = "prisma-client-js"
previewFeatures = ["extendedIndexes","fullTextSearch","referentialActions"]
}
model Tag {
id String @id @default(uuid()) @db.Uuid
name String @unique
feedToTag FeedToTag[]
}
model Email {
id String @id @default(uuid()) @db.Uuid
address String
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
feedId String @db.Uuid
@@index([address])
}
model FeedToTag {
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
feedId String @db.Uuid
tag Tag @relation(fields: [tagId], references: [id], onDelete: Cascade)
tagId String @db.Uuid
@@id([feedId, tagId])
}
model Field {
id String @id @default(uuid()) @db.Uuid
name String
value String
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
feedId String @db.Uuid
@@index([name])
@@index([value])
}
enum FeedType{
account
channel
}
model Feed {
id String @id @default(uuid()) @db.Uuid
node Node @relation(fields: [nodeId], references: [id], onDelete: Cascade)
nodeId String @db.Uuid
foundAt DateTime @default(now())
refreshedAt DateTime @updatedAt
name String
displayName String
description String
feedToTags FeedToTag[]
fields Field[]
emails Email[]
followersCount Int?
followingCount Int?
statusesCount Int?
bot Boolean?
url String
avatar String?
locked Boolean
lastStatusAt DateTime?
createdAt DateTime
type FeedType @default(account)
parentFeedName String?
parentFeedDomain String?
fulltext String @default("")
@@index([displayName])
@@index([description])
@@index([bot])
@@index([locked])
@@index([lastStatusAt])
@@index([createdAt])
@@index([refreshedAt])
@@index([parentFeedName,parentFeedDomain])
@@index([type])
@@index([fulltext])
@@unique([name, nodeId])
}
model Node {
id String @id @default(uuid()) @db.Uuid
softwareName String?
softwareVersion String?
totalUserCount Int?
monthActiveUserCount Int?
halfYearActiveUserCount Int?
statusesCount Int?
openRegistrations Boolean?
foundAt DateTime @default(now())
refreshedAt DateTime?
refreshAttemptedAt DateTime?
domain String @unique
feeds Feed[]
@@index([softwareName])
@@index([softwareVersion])
@@index([totalUserCount])
@@index([monthActiveUserCount])
@@index([halfYearActiveUserCount])
@@index([statusesCount])
@@index([openRegistrations])
@@index([refreshedAt])
@@index([refreshAttemptedAt])
@@index([foundAt])
}

Wyświetl plik

@ -4,6 +4,7 @@ import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse'
import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds' import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds'
const schema = z.object({ const schema = z.object({
name: z.string().optional(),
software: z.object({ software: z.object({
name: z.string(), name: z.string(),
version: z.string() version: z.string()

Wyświetl plik

@ -6,17 +6,17 @@ export interface FeedData {
description:string, description:string,
followersCount: number, followersCount: number,
followingCount:number, followingCount:number,
statusesCount:number, statusesCount?:number,
bot:boolean, bot?:boolean,
url: string, url: string,
avatar:string|null, avatar?:string,
locked:boolean, locked:boolean,
lastStatusAt:Date|null, lastStatusAt?:Date,
createdAt:Date createdAt:Date
fields: FieldData[], fields: FieldData[],
type: 'account'|'channel' type: 'account'|'channel'
parentFeed: { parentFeed?: {
name:string name:string
hostDomain:string hostDomain:string
}|null }
} }

Wyświetl plik

@ -1,5 +1,5 @@
export interface FieldData{ export interface FieldData{
name: string, name: string,
value: string, value: string,
verifiedAt: Date|null verifiedAt: Date|undefined
} }

Wyświetl plik

@ -1,9 +1,7 @@
import axios from 'axios' import axios from 'axios'
import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse'
import { FeedData } from '../FeedData'
import { z } from 'zod' import { z } from 'zod'
import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds'
import { NodeProviderMethod } from '../NodeProviderMethod'
import { NoMoreFeedsError } from '../NoMoreFeedsError' import { NoMoreFeedsError } from '../NoMoreFeedsError'
import { FeedProviderMethod } from '../FeedProviderMethod' import { FeedProviderMethod } from '../FeedProviderMethod'
@ -91,24 +89,24 @@ export const retrieveUsersPage:FeedProviderMethod = async (domain, page) => {
url: `https://${domain}/@${item.username}`, url: `https://${domain}/@${item.username}`,
avatar: item.avatarUrl, avatar: item.avatarUrl,
locked: item.isLocked, locked: item.isLocked,
lastStatusAt: item.updatedAt !== null ? new Date(item.updatedAt) : null, lastStatusAt: item.updatedAt !== null ? new Date(item.updatedAt) : undefined,
createdAt: new Date(item.createdAt), createdAt: new Date(item.createdAt),
fields: [ fields: [
...item.fields.map(field => { ...item.fields.map(field => {
return { return {
name: replaceEmojis(field.name, item.emojis), name: replaceEmojis(field.name, item.emojis),
value: replaceEmojis(field.value, item.emojis), value: replaceEmojis(field.value, item.emojis),
verifiedAt: null verifiedAt: undefined
} }
}), }),
...[ ...[
{ name: 'Location', value: item.location, verifiedAt: null }, { name: 'Location', value: item.location, verifiedAt: undefined },
{ name: 'Birthday', value: item.birthday, verifiedAt: null }, { name: 'Birthday', value: item.birthday, verifiedAt: undefined },
{ name: 'Language', value: item.lang, verifiedAt: null } { name: 'Language', value: item.lang, verifiedAt: undefined }
].filter(field => field.value !== null) ].filter(field => field.value !== null)
], ],
type: 'account', type: 'account',
parentFeed: null parentFeed: undefined
} }
} }
) )

Wyświetl plik

@ -1,7 +1,7 @@
import { z } from 'zod' import { z } from 'zod'
export const avatarSchema = z.nullable(z.object({ export const avatarSchema = z.optional(z.nullable(z.object({
path: z.string() path: z.string()
})) })))
export type Avatar = z.infer<typeof avatarSchema> export type Avatar = z.infer<typeof avatarSchema>

Wyświetl plik

@ -1,8 +1,8 @@
import { Avatar } from './Avatar' import { Avatar } from './Avatar'
export const parseAvatarUrl = (data:Avatar, domain:string):string|null => { export const parseAvatarUrl = (data:Avatar, domain:string):string|undefined => {
if (data === null) { if (data === null) {
return null return undefined
} }
return `https://${domain}${data.path}` return `https://${domain}${data.path}`
} }

Wyświetl plik

@ -57,11 +57,11 @@ export const retrieveAccounts:FeedProviderMethod = async (domain, page) => {
followersCount: item.followersCount, followersCount: item.followersCount,
followingCount: item.followingCount, followingCount: item.followingCount,
createdAt: new Date(item.createdAt), createdAt: new Date(item.createdAt),
bot: null, bot: undefined,
lastStatusAt: null, lastStatusAt: undefined,
statusesCount: null, statusesCount: undefined,
type: 'account', type: 'account',
parentFeed: null parentFeed: undefined
} }
}) })
} }

Wyświetl plik

@ -54,7 +54,7 @@ export const retrieveVideoChannels:FeedProviderMethod = async (domain, page) =>
.filter(item => item.host === domain) .filter(item => item.host === domain)
.map((item):FeedData => { .map((item):FeedData => {
const fields:FieldData[] = item.support const fields:FieldData[] = item.support
? [{ name: 'support', value: item.support, verifiedAt: null }] ? [{ name: 'support', value: item.support, verifiedAt: undefined }]
: [] : []
return { return {
name: item.name, name: item.name,
@ -67,9 +67,9 @@ export const retrieveVideoChannels:FeedProviderMethod = async (domain, page) =>
followersCount: item.followersCount, followersCount: item.followersCount,
followingCount: item.followingCount, followingCount: item.followingCount,
createdAt: new Date(item.createdAt), createdAt: new Date(item.createdAt),
bot: null, bot: undefined,
lastStatusAt: null, lastStatusAt: undefined,
statusesCount: null, statusesCount: undefined,
type: 'channel', type: 'channel',
parentFeed: { parentFeed: {
name: item.ownerAccount.name, name: item.ownerAccount.name,

Wyświetl plik

@ -0,0 +1,19 @@
import { lookup } from 'dns/promises'
import { updateNodeIps } from '../../Storage/Nodes/updateNodeIps'
import { ElasticClient } from '../../Storage/ElasticClient'
import Node from '../../Storage/Definitions/Node'
const refreshNodeIps = async (elastic: ElasticClient, node:Node):Promise<Node> => {
console.info('Looking up node ip addresses', {
nodeDomain: node.domain
})
try {
const addresses = await lookup(node.domain, { all: true })
return updateNodeIps(elastic, node, addresses.map(resolution => resolution.address))
} catch (error) {
console.warn('Could not lookup the domain', { node, error })
return node
}
}
export default refreshNodeIps

Wyświetl plik

@ -1,28 +1,15 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
import { FeedData } from '../../Fediverse/Providers/FeedData' import { FeedData } from '../../Fediverse/Providers/FeedData'
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
import { createFeedTags } from '../../Storage/Tags/createFeedTags'
import { fetchTags } from '../../Storage/Tags/fetchTags'
import { extractTags } from '../../StringTools/extractTags' import { extractTags } from '../../StringTools/extractTags'
import { extractEmails } from '../../StringTools/extractEmails' import { extractEmails } from '../../StringTools/extractEmails'
import { createFeedFields } from '../../Storage/Fields/createFeedFields'
import { createFeedEmails } from '../../Storage/Emails/createFeedEmails'
import { createFeed } from '../../Storage/Feeds/createFeed' import { createFeed } from '../../Storage/Feeds/createFeed'
import prepareFulltext from './prepareFulltext' import prepareFulltext from './prepareFulltext'
import Feed from '../../Storage/Definitions/Feed'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const addFeed = async (prisma: PrismaClient, node: Node, feedData: FeedData): Promise<Feed> => { export const addFeed = async (elastic: ElasticClient, node: Node, feedData: FeedData): Promise<Feed> => {
const fulltext = prepareFulltext(feedData, node) const fulltext = prepareFulltext(feedData, node)
const feed = await createFeed(prisma, { ...feedData, fulltext }, node) const extractedTags = extractTags(fulltext)
const extractedEmails = extractEmails(fulltext)
await createFeedFields(prisma, feed, feedData.fields) return await createFeed(elastic, { ...feedData, extractedTags, extractedEmails }, node)
const tagNames = extractTags(fulltext)
await createMissingTags(prisma, tagNames)
const tags = await fetchTags(prisma, tagNames)
await createFeedTags(prisma, feed, tags)
const emails = extractEmails(fulltext)
await createFeedEmails(prisma, feed, emails)
return feed
} }

Wyświetl plik

@ -1,6 +1,6 @@
import { FeedData } from '../../Fediverse/Providers/FeedData' import { FeedData } from '../../Fediverse/Providers/FeedData'
import striptags from 'striptags' import striptags from 'striptags'
import { Node } from '@prisma/client' import Node from '../../Storage/Definitions/Node'
export default function (feedData: FeedData, node: Node):string { export default function (feedData: FeedData, node: Node):string {
return striptags( return striptags(

Wyświetl plik

@ -1,33 +1,17 @@
import { PrismaClient, Feed, Node } from '@prisma/client'
import { FeedData } from '../../Fediverse/Providers/FeedData' import { FeedData } from '../../Fediverse/Providers/FeedData'
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
import { createFeedTags } from '../../Storage/Tags/createFeedTags'
import { fetchTags } from '../../Storage/Tags/fetchTags'
import { extractTags } from '../../StringTools/extractTags' import { extractTags } from '../../StringTools/extractTags'
import { extractEmails } from '../../StringTools/extractEmails' import { extractEmails } from '../../StringTools/extractEmails'
import { createFeedFields } from '../../Storage/Fields/createFeedFields'
import { createFeedEmails } from '../../Storage/Emails/createFeedEmails'
import { deleteAllFeedFields } from '../../Storage/Fields/deleteAllFeedFields'
import { deleteAllFeedTags } from '../../Storage/Tags/deleteAllFeedTags'
import { deleteAllFeedEmails } from '../../Storage/Emails/deleteAllFeedEmails'
import { updateFeed } from '../../Storage/Feeds/updateFeed' import { updateFeed } from '../../Storage/Feeds/updateFeed'
import Feed from '../../Storage/Definitions/Feed'
import Node from '../../Storage/Definitions/Node'
import prepareFulltext from './prepareFulltext' import prepareFulltext from './prepareFulltext'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshFeed = async (prisma: PrismaClient, feed:Feed, feedData: FeedData, node: Node): Promise<Feed> => { export const refreshFeed = async (elastic: ElasticClient, feed:Feed, feedData: FeedData, node: Node): Promise<Feed> => {
const fulltext = prepareFulltext(feedData, node) const fulltext = prepareFulltext(feedData, node)
await deleteAllFeedFields(prisma, feed) const extractedTags = extractTags(fulltext)
await createFeedFields(prisma, feed, feedData.fields) const extractedEmails = extractEmails(fulltext)
const tagNames = extractTags(fulltext) return await updateFeed(elastic, feed, { ...feedData, extractedTags, extractedEmails })
await createMissingTags(prisma, tagNames)
const tags = await fetchTags(prisma, tagNames)
await deleteAllFeedTags(prisma, feed)
await createFeedTags(prisma, feed, tags)
const emails = extractEmails(fulltext)
await deleteAllFeedEmails(prisma, feed)
await createFeedEmails(prisma, feed, emails)
return await updateFeed(prisma, feed, { ...feedData, fulltext })
} }

Wyświetl plik

@ -1,12 +1,13 @@
import { refreshFeedsOnPage } from './refreshFeedsOnPage' import { refreshFeedsOnPage } from './refreshFeedsOnPage'
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider' import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
import { Node, PrismaClient } from '@prisma/client' import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshFeeds = async (prisma:PrismaClient, provider:FeedProvider, node:Node):Promise<void> => { export const refreshFeeds = async (elastic: ElasticClient, provider:FeedProvider, node:Node):Promise<void> => {
try { try {
for (let page = 0; true; page++) { for (let page = 0; true; page++) {
console.info('Retrieve feeds page', { nodeDomain: node.domain, provider: provider.getKey(), page: page }) console.info('Retrieve feeds page', { nodeDomain: node.domain, provider: provider.getKey(), page: page })
await refreshFeedsOnPage(prisma, provider, node, page) await refreshFeedsOnPage(elastic, provider, node, page)
} }
} catch (e) { } catch (e) {
console.info('Feed search finished: ' + e, { nodeDomain: node.domain, provider: provider.getKey() }) console.info('Feed search finished: ' + e, { nodeDomain: node.domain, provider: provider.getKey() })

Wyświetl plik

@ -1,11 +1,13 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
import { refreshOrAddFeed } from './refreshOrAddFeed' import { refreshOrAddFeed } from './refreshOrAddFeed'
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider' import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
import Node from '../../Storage/Definitions/Node'
import Feed from '../../Storage/Definitions/Feed'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshFeedsOnPage = async (prisma: PrismaClient, provider:FeedProvider, node:Node, page:number):Promise<Feed[]> => { export const refreshFeedsOnPage = async (elastic: ElasticClient, provider:FeedProvider, node:Node, page:number):Promise<Feed[]> => {
const feedData = await provider.retrieveFeeds(node.domain, page) const feedData = await provider.retrieveFeeds(node.domain, page)
console.info('Retrieved feeds', { count: feedData.length, domain: node.domain, provider: provider.getKey(), page: page }) console.info('Retrieved feeds', { count: feedData.length, domain: node.domain, provider: provider.getKey(), page: page })
return Promise.all(feedData.map( return Promise.all(feedData.map(
feedDataItem => refreshOrAddFeed(prisma, node, feedDataItem) feedDataItem => refreshOrAddFeed(elastic, node, feedDataItem)
)) ))
} }

Wyświetl plik

@ -1,15 +1,22 @@
import { FeedData } from '../../Fediverse/Providers/FeedData' import { FeedData } from '../../Fediverse/Providers/FeedData'
import { fetchFeedByNodeAndName } from '../../Storage/Feeds/fetchFeedByNodeAndName'
import { refreshFeed } from './refreshFeed' import { refreshFeed } from './refreshFeed'
import { addFeed } from './addFeed' import { addFeed } from './addFeed'
import { Node, PrismaClient, Feed } from '@prisma/client' import Feed from '../../Storage/Definitions/Feed'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
import getFeed from '../../Storage/Feeds/getFeed'
export const refreshOrAddFeed = async (prisma:PrismaClient, node:Node, feedData:FeedData):Promise<Feed> => { export const refreshOrAddFeed = async (elastic: ElasticClient, node:Node, feedData:FeedData):Promise<Feed> => {
const feed = await fetchFeedByNodeAndName(prisma, node, feedData.name) let feed:Feed|null = null
try {
feed = await getFeed(elastic, `${feedData.name}@${node.domain}`)
} catch (e) {
}
if (feed) { if (feed) {
console.info('Refreshing feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type }) console.info('Refreshing feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type })
return await refreshFeed(prisma, feed, feedData, node) return await refreshFeed(elastic, feed, feedData, node)
} }
console.info('Adding feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type }) console.info('Adding feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type })
return await addFeed(prisma, node, feedData) return await addFeed(elastic, node, feedData)
} }

Wyświetl plik

@ -1,12 +1,13 @@
import { Node, PrismaClient } from '@prisma/client'
import { retrieveDomainNodeInfo } from '../../Fediverse/NodeInfo/retrieveDomainNodeInfo' import { retrieveDomainNodeInfo } from '../../Fediverse/NodeInfo/retrieveDomainNodeInfo'
import { updateNode } from '../../Storage/Nodes/updateNode' import { updateNodeInfo } from '../../Storage/Nodes/updateNodeInfo'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const refreshNodeInfo = async (prisma: PrismaClient, node:Node):Promise<Node> => { export const refreshNodeInfo = async (elastic: ElasticClient, node:Node):Promise<Node> => {
console.info('Updating info of node', { nodeDomain: node.domain }) console.info('Updating info of node', { nodeDomain: node.domain })
try { try {
const nodeInfo = await retrieveDomainNodeInfo(node.domain) const nodeInfo = await retrieveDomainNodeInfo(node.domain)
return await updateNode(prisma, node, nodeInfo) return await updateNodeInfo(elastic, node, nodeInfo)
} catch (error) { } catch (error) {
console.warn('Failed to update node info: ' + error) console.warn('Failed to update node info: ' + error)
return node return node

Wyświetl plik

@ -1,12 +1,13 @@
import { Node, PrismaClient } from '@prisma/client'
import { NodeProvider } from '../../Fediverse/Providers/NodeProvider' import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
import { findNewNodesOnPage } from './findNewNodesOnPage' import { findNewNodesOnPage } from './findNewNodesOnPage'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
export const findNewNodes = async (prisma: PrismaClient, provider:NodeProvider, node:Node):Promise<void> => { export const findNewNodes = async (elastic: ElasticClient, provider:NodeProvider, node:Node):Promise<void> => {
try { try {
for (let page = 0; true; page++) { for (let page = 0; true; page++) {
console.info('Retrieve node page', { domain: node.domain, provider: provider.getKey() }) console.info('Retrieve node page', { domain: node.domain, provider: provider.getKey() })
await findNewNodesOnPage(prisma, provider, node, page) await findNewNodesOnPage(elastic, provider, node, page)
} }
} catch (e) { } catch (e) {
console.info('Node search finished: ' + e, { domain: node.domain, provider: provider.getKey() }) console.info('Node search finished: ' + e, { domain: node.domain, provider: provider.getKey() })

Wyświetl plik

@ -1,11 +1,14 @@
import { PrismaClient, Node } from '@prisma/client'
import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes' import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes'
import { NodeProvider } from '../../Fediverse/Providers/NodeProvider' import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
import Node from '../../Storage/Definitions/Node'
import { ElasticClient } from '../../Storage/ElasticClient'
import isDomainNotBanned from '../../Storage/Nodes/isDomainNotBanned'
export const findNewNodesOnPage = async ( export const findNewNodesOnPage = async (
prisma:PrismaClient, provider: NodeProvider, node:Node, page:number elastic: ElasticClient, provider: NodeProvider, node:Node, page:number
):Promise<number> => { ):Promise<number> => {
const domains = await provider.retrieveNodes(node.domain, page) let domains = await provider.retrieveNodes(node.domain, page)
domains = domains.filter(isDomainNotBanned)
console.log('Found nodes', { count: domains.length, domain: node.domain, provider: provider.getKey(), page: page }) console.log('Found nodes', { count: domains.length, domain: node.domain, provider: provider.getKey(), page: page })
return await createMissingNodes(prisma, domains) return await createMissingNodes(elastic, domains, node.domain)
} }

Wyświetl plik

@ -0,0 +1,17 @@
import countNodeFeeds from '../../Storage/Feeds/countNodeFeeds'
import { setNodeStats } from '../../Storage/Nodes/setNodeStats'
import { ElasticClient } from '../../Storage/ElasticClient'
import Node from '../../Storage/Definitions/Node'
export type NodeStats ={
account: number,
channel: number,
}
export default async function updateNodeFeedStats (elasticClient: ElasticClient, node: Node) {
await setNodeStats(
elasticClient,
node,
await countNodeFeeds(elasticClient, node.domain)
)
}

Wyświetl plik

@ -1,8 +1,8 @@
import { PrismaClient } from '@prisma/client'
import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes' import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes'
import { ElasticClient } from '../../Storage/ElasticClient'
export const addNodeSeed = async (prisma:PrismaClient, domain:string):Promise<boolean> => { export const addNodeSeed = async (elastic: ElasticClient, domains:string[]):Promise<boolean> => {
console.info('Trying to add seed domain node', { domain: domain }) console.info('Trying to add seed domain nodes', { domains: domains })
const result = await createMissingNodes(prisma, [domain]) const result = await createMissingNodes(elastic, domains, undefined)
return result > 0 return result > 0
} }

Wyświetl plik

@ -0,0 +1,11 @@
import { deleteDomainFeeds } from '../../Storage/Feeds/deleteDomainFeeds'
import { deleteDomainNodes } from '../../Storage/Nodes/deleteDomainNodes'
import { ElasticClient } from '../../Storage/ElasticClient'
export default async function deleteDomains (elastic: ElasticClient, domains:string[]):Promise<number> {
if (domains === []) {
return
}
await deleteDomainFeeds(elastic, domains)
return deleteDomainNodes(elastic, domains)
}

Wyświetl plik

@ -0,0 +1,7 @@
export default function getBannedDomains ():string[] {
const domains = process.env.BANNED_DOMAINS ?? ''
if (domains === '') {
return []
}
return domains.split(',').map(domain => domain.toLowerCase())
}

Wyświetl plik

@ -1,9 +0,0 @@
import { PrismaClient, Tag } from '@prisma/client'
import { fetchTags } from '../../Storage/Tags/fetchTags'
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
export const fetchOrCreateTags = async (prisma:PrismaClient, tagNames:string[]):Promise<Tag[]> => {
console.log('Searching for tags', { count: tagNames.length })
await createMissingTags(prisma, tagNames)
return await fetchTags(prisma, tagNames)
}

Wyświetl plik

@ -1,4 +1,3 @@
import { PrismaClient } from '@prisma/client'
import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess' import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess'
import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry' import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry'
import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed' import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed'
@ -9,18 +8,22 @@ import { NodeProvider } from '../Fediverse/Providers/NodeProvider'
import { FeedProvider } from '../Fediverse/Providers/FeedProvider' import { FeedProvider } from '../Fediverse/Providers/FeedProvider'
import { refreshFeeds } from './Feeds/refreshFeeds' import { refreshFeeds } from './Feeds/refreshFeeds'
import { deleteOldFeeds } from '../Storage/Feeds/deleteOldFeeds' import { deleteOldFeeds } from '../Storage/Feeds/deleteOldFeeds'
import refreshNodeIps from './Dns/refreshNodeIps'
import { ElasticClient } from '../Storage/ElasticClient'
import updateNodeFeedStats from './Nodes/updateNodeFeedStats'
export const processNextNode = async (prisma:PrismaClient, providerRegistry:ProviderRegistry):Promise<void> => { export const processNextNode = async (elastic: ElasticClient, providerRegistry:ProviderRegistry):Promise<void> => {
console.info('#############################################') console.info('#############################################')
let node = await fetchNodeToProcess(prisma) let node = await fetchNodeToProcess(elastic)
node = await setNodeRefreshAttempted(prisma, node) node = await setNodeRefreshAttempted(elastic, node)
node = await refreshNodeInfo(prisma, node) node = await refreshNodeIps(elastic, node)
node = await refreshNodeInfo(elastic, node)
if (!providerRegistry.containsKey(node.softwareName)) { if (!providerRegistry.containsKey(node.softwareName)) {
console.warn('Unknown software', { domain: node.domain, software: node.softwareName }) console.warn('Unknown software', { domain: node.domain, software: node.softwareName })
await deleteOldFeeds(prisma, node) await deleteOldFeeds(elastic, node)
await setNodeRefreshed(prisma, node) await setNodeRefreshed(elastic, node)
return return
} }
const provider = providerRegistry.getProviderByKey(node.softwareName) const provider = providerRegistry.getProviderByKey(node.softwareName)
@ -28,18 +31,19 @@ export const processNextNode = async (prisma:PrismaClient, providerRegistry:Prov
await Promise.all( await Promise.all(
provider.getNodeProviders().map((nodeProvider:NodeProvider) => { provider.getNodeProviders().map((nodeProvider:NodeProvider) => {
console.info('Searching for nodes', { domain: node.domain, provider: nodeProvider.getKey() }) console.info('Searching for nodes', { domain: node.domain, provider: nodeProvider.getKey() })
return findNewNodes(prisma, nodeProvider, node) return findNewNodes(elastic, nodeProvider, node)
}) })
) )
await Promise.all( await Promise.all(
provider.getFeedProviders().map((feedProvider:FeedProvider) => { provider.getFeedProviders().map((feedProvider:FeedProvider) => {
console.info('Searching for feeds', { domain: node.domain, provider: feedProvider.getKey() }) console.info('Searching for feeds', { domain: node.domain, provider: feedProvider.getKey() })
return refreshFeeds(prisma, feedProvider, node) return refreshFeeds(elastic, feedProvider, node)
}) })
) )
await deleteOldFeeds(prisma, node) await deleteOldFeeds(elastic, node)
await updateNodeFeedStats(elastic, node)
await setNodeRefreshed(prisma, node) await setNodeRefreshed(elastic, node)
} }

Wyświetl plik

@ -0,0 +1,29 @@
import Field from './Field'
interface Feed {
domain: string
foundAt: number,
refreshedAt?: number,
name: string,
fullName: string,
displayName: string,
description: string,
strippedDescription?: string,
followersCount?: number,
followingCount?: number,
statusesCount?: number,
lastStatusAt?: number,
createdAt?: number,
bot?: boolean,
locked: boolean,
url: string,
avatar?: string,
type: 'account' | 'channel',
parentFeedName?: string,
parentFeedDomain?: string
fields: Field[],
extractedEmails: string[],
extractedTags: string[]
}
export default Feed

Wyświetl plik

@ -0,0 +1,8 @@
interface Field {
name: string,
value: string
strippedName?: string
strippedValue?: string
}
export default Field

Wyświetl plik

@ -0,0 +1,13 @@
interface Geo {
cityName?: string,
continentName?: string,
countryIsoCode?: string,
countryName?: string,
latitude: number
longitude: number
location?: string,
regionIsoCode?: string,
regionName?: string
}
export default Geo

Wyświetl plik

@ -0,0 +1,25 @@
import Geo from './Geo'
interface Node {
name?:string,
strippedName?:string,
foundAt: number,
refreshAttemptedAt?: number
refreshedAt?: number
openRegistrations?: boolean
domain: string,
serverIps?: string[],
geoip?: Geo[],
softwareName?: string;
softwareVersion?: string
standardizedSoftwareVersion?: string
halfYearActiveUserCount?: number,
monthActiveUserCount?: number,
statusesCount?: number,
totalUserCount?: number,
discoveredByDomain?:string,
accountFeedCount?: number,
channelFeedCount?: number,
}
export default Node

Wyświetl plik

@ -0,0 +1,3 @@
const feedIndex = 'feed'
export default feedIndex

Wyświetl plik

@ -0,0 +1,3 @@
const nodeIndex = 'node'
export default nodeIndex

Wyświetl plik

@ -0,0 +1,15 @@
import { Client } from '@elastic/elasticsearch'
const elasticClient = new Client({
node: {
url: new URL(process.env.ELASTIC_URL ?? 'http://elastic:9200')
},
auth: {
username: process.env.ELASTIC_USER ?? 'elastic',
password: process.env.ELASTIC_PASSWORD
}
})
export type ElasticClient = typeof elasticClient
export default elasticClient

Wyświetl plik

@ -1,15 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const createFeedEmails = async (prisma:PrismaClient, feed:Feed, emails:string[]):Promise<number> => {
const result = await prisma.email.createMany({
data: emails.map(email => {
return {
feedId: feed.id,
address: email
}
}),
skipDuplicates: true
})
console.log('Added emails to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const deleteAllFeedEmails = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
const result = await prisma.email.deleteMany({
where: {
feedId: feed.id
}
})
console.log('Removed all emails from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,5 +1,6 @@
import { FeedData } from '../../Fediverse/Providers/FeedData' import { FeedData } from '../../Fediverse/Providers/FeedData'
export default interface StorageFeedData extends FeedData{ export default interface StorageFeedData extends FeedData{
fulltext:string extractedTags:string[],
extractedEmails:string[],
} }

Wyświetl plik

@ -0,0 +1,84 @@
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
import dateProperty from '../Properties/dateProperty'
import { IngestProcessorContainer } from '@elastic/elasticsearch/lib/api/types'
const assertFeedIndex = async (elastic: ElasticClient): Promise<void> => {
console.info('Setting feed pipeline')
const processors: IngestProcessorContainer[] = []
await elastic.ingest.putPipeline({
id: 'feed',
description: 'Default feed pipeline',
processors: processors
})
console.info('Checking feed index')
const exists = await elastic.indices.exists({
index: feedIndex
})
if (exists) {
console.info('Feed index exists')
} else {
console.info('Creating feed index')
await elastic.indices.create({
index: feedIndex,
settings: {
default_pipeline: 'feed',
analysis: {
analyzer: {
standard: {
type: 'standard'
},
html: {
type: 'custom',
tokenizer: 'standard',
filter: [
'lowercase'
],
char_filter: [
'html_strip'
]
}
}
}
}
})
}
console.info('Setting feed mapping')
await elastic.indices.putMapping({
index: feedIndex,
properties: {
domain: { type: 'text', analyzer: 'standard' },
foundAt: dateProperty,
refreshedAt: dateProperty,
name: { type: 'text', analyzer: 'standard' },
fullName: { type: 'text', analyzer: 'standard' },
displayName: { type: 'text', analyzer: 'html' },
description: { type: 'text', analyzer: 'html' },
followersCount: { type: 'integer' },
followingCount: { type: 'integer' },
statusesCount: { type: 'integer' },
lastStatusAt: dateProperty,
createdAt: dateProperty,
bot: { type: 'boolean' },
locked: { type: 'boolean' },
url: { type: 'text' },
avatar: { type: 'text' },
type: { type: 'keyword' },
parentFeedName: { type: 'text', analyzer: 'standard' },
parentFeedDomain: { type: 'text', analyzer: 'standard' },
fields: {
type: 'nested',
properties: {
name: { type: 'text', analyzer: 'html' },
value: { type: 'text', analyzer: 'html' }
}
},
extractedEmails: { type: 'text', analyzer: 'standard' },
extractedTags: { type: 'text', analyzer: 'standard' }
}
})
await elastic.indices.refresh({ index: feedIndex })
}
export default assertFeedIndex

Wyświetl plik

@ -0,0 +1,39 @@
import { ElasticClient } from '../ElasticClient'
import Feed from '../Definitions/Feed'
import feedIndex from '../Definitions/feedIndex'
import { NodeStats } from '../../Jobs/Nodes/updateNodeFeedStats'
type Aggregation = {
buckets:{
key:'account'|'channel',
// eslint-disable-next-line camelcase
doc_count:number
}[]
}
export default async function countNodeFeeds (elastic: ElasticClient, domain:string):Promise<NodeStats> {
await elastic.indices.refresh({ index: feedIndex })
const response = await elastic.search<Feed>({
index: feedIndex,
query: {
term: { domain }
},
size: 0,
aggs: {
types: {
terms: {
field: 'type'
}
}
}
})
const types = response.aggregations.types as Aggregation
const result:NodeStats = {
channel: 0,
account: 0
}
types.buckets.forEach(item => {
result[item.key] += item.doc_count
})
return result
}

Wyświetl plik

@ -1,9 +1,18 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
import StorageFeedData from './StorageFeedData' import StorageFeedData from './StorageFeedData'
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
import getFeed from './getFeed'
import Feed from '../Definitions/Feed'
import Node from '../Definitions/Node'
export const createFeed = async (prisma: PrismaClient, feedData: StorageFeedData, node: Node): Promise<Feed> => { export const createFeed = async (elastic: ElasticClient, feedData: StorageFeedData, node: Node): Promise<Feed> => {
const feed = await prisma.feed.create({ const fullName = `${feedData.name}@${node.domain}`
data: { await elastic.create<Feed>({
index: feedIndex,
id: fullName,
document: {
fullName,
domain: node.domain,
url: feedData.url, url: feedData.url,
name: feedData.name, name: feedData.name,
bot: feedData.bot, bot: feedData.bot,
@ -11,16 +20,22 @@ export const createFeed = async (prisma: PrismaClient, feedData: StorageFeedData
followersCount: feedData.followersCount, followersCount: feedData.followersCount,
followingCount: feedData.followingCount, followingCount: feedData.followingCount,
statusesCount: feedData.statusesCount, statusesCount: feedData.statusesCount,
lastStatusAt: feedData.lastStatusAt, lastStatusAt: feedData.lastStatusAt?.getTime(),
description: feedData.description, description: feedData.description,
displayName: feedData.displayName, displayName: feedData.displayName,
locked: feedData.locked, locked: feedData.locked,
nodeId: node.id, createdAt: feedData.createdAt.getTime(),
createdAt: feedData.createdAt, foundAt: (new Date()).getTime(),
fulltext: feedData.fulltext, fields: feedData.fields.map(field => {
return { name: field.name, value: field.value }
}),
extractedEmails: feedData.extractedEmails,
extractedTags: feedData.extractedTags,
parentFeedName: feedData.parentFeed?.name,
parentFeedDomain: feedData.parentFeed?.hostDomain,
type: feedData.type type: feedData.type
} }
}) })
console.info('Created new feed', { feedName: feed.name, nodeDomain: node.domain }) console.info('Created new feed', { feedName: feedData.name, nodeDomain: node.domain })
return feed return getFeed(elastic, fullName)
} }

Wyświetl plik

@ -0,0 +1,28 @@
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
export const deleteDomainFeeds = async (elastic: ElasticClient, domains:string[]): Promise<number> => {
await elastic.indices.refresh({ index: feedIndex })
const result = await elastic.deleteByQuery({
index: feedIndex,
query: {
bool: {
should: domains.map(domain => {
return {
regexp: {
domain: {
value: '(.*\\.)?' + domain,
case_insensitive: true
}
}
}
}),
minimum_should_match: 1
}
}
})
console.info('Deleted domain feeds', {
count: result.deleted, domains
})
return result.deleted
}

Wyświetl plik

@ -1,18 +1,22 @@
import { Node, PrismaClient } from '@prisma/client' import { ElasticClient } from '../ElasticClient'
import Node from '../Definitions/Node'
import feedIndex from '../Definitions/feedIndex'
export const deleteOldFeeds = async (prisma: PrismaClient, node: Node): Promise<number> => { export const deleteOldFeeds = async (elastic: ElasticClient, node: Node): Promise<number> => {
const result = await prisma.feed.deleteMany({ await elastic.indices.refresh({ index: feedIndex })
where: { const result = await elastic.deleteByQuery({
nodeId: { index: feedIndex,
equals: node.id query: {
}, bool: {
refreshedAt: { must: [
lt: node.refreshAttemptedAt { match: { domain: node.domain } },
{ range: { refreshedAt: { lt: node.refreshAttemptedAt } } }
]
} }
} }
}) })
console.info('Deleted old feeds', { console.info('Deleted old feeds', {
count: result.count, olderThen: node.refreshAttemptedAt, nodeDomain: node.domain count: result.deleted, olderThen: node.refreshAttemptedAt, nodeDomain: node.domain
}) })
return result.count return result.deleted
} }

Wyświetl plik

@ -1,10 +0,0 @@
import { Node, PrismaClient, Feed } from '@prisma/client'
export const fetchFeedByNodeAndName = async (prisma:PrismaClient, node:Node, name:string):Promise<Feed> => {
return await prisma.feed.findFirst({
where: {
node: node,
name: name
}
})
}

Wyświetl plik

@ -0,0 +1,13 @@
import Feed from '../Definitions/Feed'
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
const getFeed = async (elastic: ElasticClient, feedFullName:string):Promise<Feed> => {
const result = await elastic.get<Feed>({
index: feedIndex,
id: feedFullName
})
return result._source
}
export default getFeed

Wyświetl plik

@ -1,9 +1,14 @@
import { Feed, PrismaClient } from '@prisma/client'
import StorageFeedData from './StorageFeedData' import StorageFeedData from './StorageFeedData'
import Feed from '../Definitions/Feed'
import { ElasticClient } from '../ElasticClient'
import feedIndex from '../Definitions/feedIndex'
import getFeed from './getFeed'
export const updateFeed = async (prisma:PrismaClient, feed:Feed, feedData:StorageFeedData):Promise<Feed> => { export const updateFeed = async (elastic: ElasticClient, feed:Feed, feedData:StorageFeedData):Promise<Feed> => {
const updatedFeed = await prisma.feed.update({ await elastic.update<Feed>({
data: { index: feedIndex,
id: feed.fullName,
doc: {
url: feedData.url, url: feedData.url,
bot: feedData.bot, bot: feedData.bot,
avatar: feedData.avatar, avatar: feedData.avatar,
@ -15,14 +20,15 @@ export const updateFeed = async (prisma:PrismaClient, feed:Feed, feedData:Storag
displayName: feedData.displayName, displayName: feedData.displayName,
locked: feedData.locked, locked: feedData.locked,
createdAt: feedData.createdAt, createdAt: feedData.createdAt,
refreshedAt: new Date(), refreshedAt: (new Date()).getTime(),
fulltext: feedData.fulltext, type: feedData.type,
type: feedData.type fields: feedData.fields.map(field => {
}, return { name: field.name, value: field.value }
where: { }),
id: feed.id extractedEmails: feedData.extractedEmails,
extractedTags: feedData.extractedTags
} }
}) })
console.info('Updated feed', { feedName: feed.name, nodeId: feed.nodeId }) console.info('Updated feed', { feedName: feed.name, nodeDomain: feed.domain })
return updatedFeed return getFeed(elastic, feed.fullName)
} }

Wyświetl plik

@ -1,17 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
import { FieldData } from '../../Fediverse/Providers/FieldData'
export const createFeedFields = async (prisma:PrismaClient, feed:Feed, fieldData:FieldData[]):Promise<number> => {
const result = await prisma.field.createMany({
data: fieldData.map(item => {
return {
feedId: feed.id,
name: item.name,
value: item.value
}
}),
skipDuplicates: true
})
console.log('Added fields to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const deleteAllFeedFields = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
const result = await prisma.field.deleteMany({
where: {
feedId: feed.id
}
})
console.log('Removed all fields from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -0,0 +1,95 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import dateProperty from '../Properties/dateProperty'
const assertNodeIndex = async (elastic: ElasticClient):Promise<void> => {
console.info('Setting node pipeline')
await elastic.ingest.putPipeline({
id: 'node',
description: 'Default node pipeline',
processors: [
{
// @ts-ignore
geoip: {
ignore_missing: true,
field: 'serverIps',
properties: [
'location',
'continent_name',
'country_name',
'country_iso_code',
'region_iso_code',
'region_name',
'city_name'
],
target_field: 'geoip'
}
},
{
grok: {
ignore_missing: true,
field: 'softwareVersion',
patterns: ['%{VERSION:standardizedSoftwareVersion}'],
pattern_definitions: {
VERSION: '(?:[0-9]+\\.?[0-9]+\\.?[0-9]+)'
}
}
}
]
})
console.info('Checking node index')
const exists = await elastic.indices.exists({
index: nodeIndex
})
if (exists) {
console.info('Node index exists')
} else {
console.info('Creating node index')
await elastic.indices.create({
index: nodeIndex,
settings: {
default_pipeline: 'node'
}
})
}
console.info('Setting node mapping')
await elastic.indices.putMapping({
index: nodeIndex,
properties: {
name: { type: 'text' },
domain: { type: 'text' },
softwareName: { type: 'keyword' },
softwareVersion: { type: 'text' },
standardizedSoftwareVersion: { type: 'keyword' },
totalUserCount: { type: 'integer' },
monthActiveUserCount: { type: 'integer' },
halfYearActiveUserCount: { type: 'integer' },
statusesCount: { type: 'integer' },
foundAt: dateProperty,
refreshedAt: dateProperty,
refreshAttemptedAt: dateProperty,
openRegistrations: { type: 'boolean' },
serverIps: { type: 'ip' },
discoveredByDomain: { type: 'text' },
geoip: {
type: 'nested',
properties: {
location: { type: 'geo_point' },
continent_name: { type: 'keyword' },
country_name: { type: 'keyword' },
country_iso_code: { type: 'keyword' },
region_name: { type: 'keyword' },
region_iso_code: { type: 'keyword' },
city_name: { type: 'keyword' }
}
},
accountFeedCount: { type: 'integer' },
channelFeedCount: { type: 'integer' }
}
})
await elastic.indices.refresh({ index: nodeIndex })
}
export default assertNodeIndex

Wyświetl plik

@ -1,15 +1,24 @@
import { PrismaClient, Node } from '@prisma/client' import { ElasticClient } from '../ElasticClient'
export const createMissingNodes = async (client:PrismaClient, domains:string[]):Promise<number> => { import nodeIndex from '../Definitions/nodeIndex'
const result = await client.node.createMany( export const createMissingNodes = async (elastic: ElasticClient, domains:string[], discoveredByDomain:string|undefined):Promise<number> => {
{ const response = await elastic.bulk({
data: domains.map(domain => { index: nodeIndex,
return { body: domains.flatMap(domain => [
domain: domain {
} create: { _id: domain }
}), },
skipDuplicates: true {
} domain: domain,
) discoveredByDomain,
console.info('Created new nodes', { count: result.count }) foundAt: (new Date()).getTime()
return result.count }
])
})
const createdCount = response.items.filter(item => item.create.status === 201).length
console.warn('Created new nodes', {
requestedCount: domains.length,
createdCount: createdCount,
errors: response.items.filter(item => item.create.status !== 201).map(item => item.create.error.reason)
})
return createdCount
} }

Wyświetl plik

@ -0,0 +1,28 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
export const deleteDomainNodes = async (elastic: ElasticClient, domains:string[]): Promise<number> => {
await elastic.indices.refresh({ index: nodeIndex })
const result = await elastic.deleteByQuery({
index: nodeIndex,
query: {
bool: {
should: domains.map(domain => {
return {
regexp: {
domain: {
value: '(.*\\.)?' + domain,
case_insensitive: true
}
}
}
}),
minimum_should_match: 1
}
}
})
console.info('Deleted domain nodes', {
count: result.deleted, domains
})
return result.deleted
}

Wyświetl plik

@ -1,66 +1,20 @@
import { Node, PrismaClient } from '@prisma/client'
import { NoNodeFoundError } from './NoNodeFoundError' import { NoNodeFoundError } from './NoNodeFoundError'
import findNotProcessedNodeWithAttemptLimit from './findNotProcessedNodeWithAttemptLimit'
import findNodeWithOldestRefreshWithLimits from './findNodeWithOldestRefreshWithLimits'
import Node from '../Definitions/Node'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
export const fetchNodeToProcess = async (prisma: PrismaClient): Promise<Node> => { export const fetchNodeToProcess = async (elastic: ElasticClient): Promise<Node> => {
const currentTimestamp = Date.now() await elastic.indices.refresh({ index: nodeIndex })
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000 let node = await findNotProcessedNodeWithAttemptLimit(elastic)
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds) if (node !== null) {
console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds }) return node
const newNode = await prisma.node.findFirst({
orderBy: {
foundAt: 'asc'
},
where: {
refreshedAt: null,
OR: [
{
refreshAttemptedAt: {
lt: attemptLimitDate
}
},
{
refreshAttemptedAt: null
}
]
}
})
if (newNode) {
console.log('Found not yet processed node', { domain: newNode.domain })
return newNode
} }
const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000 node = await findNodeWithOldestRefreshWithLimits(elastic)
const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds) if (node !== null) {
console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', { return node
refreshLimitMilliseconds,
refreshLimitDate,
attemptLimitDate,
attemptLimitMilliseconds
})
const node = await prisma.node.findFirst({
orderBy: {
refreshedAt: 'asc'
},
where: {
refreshedAt: {
lt: refreshLimitDate
},
OR: [
{
refreshAttemptedAt: {
lt: attemptLimitDate
}
},
{
refreshAttemptedAt: null
}
]
}
})
if (!node) {
throw new NoNodeFoundError()
} }
console.log('Found oldest node', { domain: node.domain }) throw new NoNodeFoundError()
return node
} }

Wyświetl plik

@ -0,0 +1,45 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
const findNodeWithOldestRefreshWithLimits = async (elastic: ElasticClient): Promise<Node | null> => {
const currentTimestamp = Date.now()
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000
const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds)
console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', {
refreshLimitMilliseconds,
refreshLimitDate,
attemptLimitDate,
attemptLimitMilliseconds
})
const result = await elastic.search<Node>({
index: nodeIndex,
body: {
size: 1,
sort: [{
refreshedAt: { order: 'asc' }
}],
query: {
bool: {
must: [
{ range: { refreshedAt: { lt: refreshLimitDate.getTime() } } }
],
should: [
{ range: { refreshAttemptedAt: { lt: attemptLimitDate.getTime() } } },
{ bool: { must_not: [{ exists: { field: 'refreshAttemptedAt' } }] } }
],
minimum_should_match: 1
}
}
}
})
if (result.hits.hits.length > 0) {
const node = result.hits.hits[0]._source
console.log('Found oldest node', { node })
return node
}
return null
}
export default findNodeWithOldestRefreshWithLimits

Wyświetl plik

@ -0,0 +1,37 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
const findNotProcessedNodeWithAttemptLimit = async (elastic: ElasticClient): Promise<Node|null> => {
const currentTimestamp = Date.now()
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds })
const result = await elastic.search<Node>({
index: nodeIndex,
body: {
size: 1,
sort: [{
foundAt: { order: 'asc' }
}],
query: {
bool: {
must_not: [
{ exists: { field: 'refreshedAt' } }],
should: [
{ bool: { must_not: [{ exists: { field: 'refreshAttemptedAt' } }] } },
{ range: { refreshAttemptedAt: { lt: attemptLimitDate.getTime() } } }
],
minimum_should_match: 1
}
}
}
})
if (result.hits.hits.length > 0) {
const node = result.hits.hits[0]._source
console.log('Found not yet processed node', { node })
return node
}
return null
}
export default findNotProcessedNodeWithAttemptLimit

Wyświetl plik

@ -0,0 +1,13 @@
import { ElasticClient } from '../ElasticClient'
import Node from '../Definitions/Node'
import nodeIndex from '../Definitions/nodeIndex'
const getNode = async (elastic:ElasticClient, domain:string):Promise<Node> => {
const result = await elastic.get<Node>({
index: nodeIndex,
id: domain
})
return result._source
}
export default getNode

Wyświetl plik

@ -0,0 +1,7 @@
import getBannedDomains from '../../Jobs/Seed/getBannedDomains'
export default function isDomainNotBanned (domain):boolean {
return getBannedDomains().filter(
banned => domain.match(new RegExp('(.*\\.)?' + banned, 'gi')) !== null
).length === 0
}

Wyświetl plik

@ -1,14 +1,17 @@
import { Node, PrismaClient } from '@prisma/client' import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
import getNode from './getNode'
export const setNodeRefreshAttempted = async (prisma:PrismaClient, node:Node):Promise<Node> => { export const setNodeRefreshAttempted = async (elastic: ElasticClient, node:Node):Promise<Node> => {
const date = new Date() const date = new Date()
console.info('Setting node refresh attempt', { domain: node.domain, date: date }) console.info('Setting node refresh attempt', { domain: node.domain, date: date })
return await prisma.node.update({ await elastic.update<Node>({
data: { index: nodeIndex,
refreshAttemptedAt: date id: node.domain,
}, doc: {
where: { refreshAttemptedAt: date.getTime()
id: node.id
} }
}) })
return getNode(elastic, node.domain)
} }

Wyświetl plik

@ -1,14 +1,17 @@
import { Node, PrismaClient } from '@prisma/client' import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
import getNode from './getNode'
export const setNodeRefreshed = async (prisma:PrismaClient, node:Node):Promise<Node> => { export const setNodeRefreshed = async (elastic: ElasticClient, node:Node):Promise<Node> => {
const date = new Date() const date = new Date()
console.info('Setting node refreshed', { domain: node.domain, date: date }) console.info('Setting node refreshed', { domain: node.domain, date: date })
return await prisma.node.update({ await elastic.update<Node>({
data: { index: nodeIndex,
refreshedAt: date id: node.domain,
}, doc: {
where: { refreshedAt: date.getTime()
id: node.id
} }
}) })
return getNode(elastic, node.domain)
} }

Wyświetl plik

@ -0,0 +1,18 @@
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import Node from '../Definitions/Node'
import getNode from './getNode'
import { NodeStats } from '../../Jobs/Nodes/updateNodeFeedStats'
export const setNodeStats = async (elastic: ElasticClient, node:Node, stats: NodeStats):Promise<Node> => {
console.info('Setting node stats', { domain: node.domain, stats })
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
accountFeedCount: stats.account,
channelFeedCount: stats.channel
}
})
return getNode(elastic, node.domain)
}

Wyświetl plik

@ -1,21 +0,0 @@
import { Node, PrismaClient } from '@prisma/client'
import { NodeInfo } from '../../Fediverse/NodeInfo/retrieveNodeInfo'
export const updateNode = async (prisma: PrismaClient, node: Node, nodeInfo:NodeInfo):Promise<Node> => {
const updated = await prisma.node.update({
where: {
id: node.id
},
data: {
softwareName: nodeInfo.software.name.toLocaleLowerCase(),
softwareVersion: nodeInfo.software.version,
totalUserCount: nodeInfo.usage?.users?.total ?? null,
monthActiveUserCount: nodeInfo.usage?.users?.activeMonth ?? null,
halfYearActiveUserCount: nodeInfo.usage?.users?.activeHalfyear ?? null,
statusesCount: nodeInfo.usage?.localPosts ?? null,
openRegistrations: nodeInfo.openRegistrations ?? null
}
})
console.info('Updated node info', { domain: updated.domain, software: updated.softwareName })
return updated
}

Wyświetl plik

@ -0,0 +1,33 @@
import { NodeInfo } from '../../Fediverse/NodeInfo/retrieveNodeInfo'
import Node from '../Definitions/Node'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import getNode from './getNode'
const assertPositiveInt = (number:number|undefined):number|undefined => {
if (number === undefined) {
return undefined
}
return Math.max(0, Math.round(number))
}
export const updateNodeInfo = async (elastic: ElasticClient, node: Node, nodeInfo:NodeInfo):Promise<Node> => {
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
name: nodeInfo?.name,
openRegistrations: nodeInfo?.openRegistrations,
softwareName: nodeInfo?.software?.name?.toLocaleLowerCase(),
softwareVersion: nodeInfo?.software?.version,
halfYearActiveUserCount: assertPositiveInt(nodeInfo?.usage?.users?.activeHalfyear),
monthActiveUserCount: assertPositiveInt(nodeInfo?.usage?.users.activeMonth),
statusesCount: assertPositiveInt(nodeInfo?.usage?.localPosts),
totalUserCount: assertPositiveInt(nodeInfo?.usage?.users?.total)
}
})
const resultNode = await getNode(elastic, node.domain)
console.info('Updated node info', { node })
return resultNode
}

Wyświetl plik

@ -0,0 +1,17 @@
import Node from '../Definitions/Node'
import { ElasticClient } from '../ElasticClient'
import nodeIndex from '../Definitions/nodeIndex'
import getNode from './getNode'
export const updateNodeIps = async (elastic:ElasticClient, node: Node, ips:string[]):Promise<Node> => {
await elastic.update<Node>({
index: nodeIndex,
id: node.domain,
doc: {
serverIps: ips
}
})
const resultNode = await getNode(elastic, node.domain)
console.info('Updated node ips', { resultNode })
return resultNode
}

Wyświetl plik

@ -1,5 +0,0 @@
import { PrismaClient } from '@prisma/client'
const prismaClient = new PrismaClient()
export default prismaClient

Wyświetl plik

@ -0,0 +1,5 @@
import { MappingProperty } from '@elastic/elasticsearch/lib/api/types'
const dateProperty:MappingProperty = { type: 'date', format: 'epoch_millis' }
export default dateProperty

Wyświetl plik

@ -1,15 +0,0 @@
import { PrismaClient, Tag, Feed } from '@prisma/client'
export const createFeedTags = async (prisma:PrismaClient, feed:Feed, tags:Tag[]):Promise<number> => {
const result = await prisma.feedToTag.createMany({
data: tags.map(tag => {
return {
feedId: feed.id,
tagId: tag.id
}
}),
skipDuplicates: true
})
console.log('Added tags to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,15 +0,0 @@
import { PrismaClient } from '@prisma/client'
export const createMissingTags = async (prisma:PrismaClient, tagNames:string[]):Promise<number> => {
const result = await prisma.tag.createMany({
data: tagNames.map(tagName => {
return {
name: tagName
}
}
),
skipDuplicates: true
})
console.log('Created missing tags', { count: result.count })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Feed } from '@prisma/client'
export const deleteAllFeedTags = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
const result = await prisma.feedToTag.deleteMany({
where: {
feedId: feed.id
}
})
console.log('Removed all tags from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
return result.count
}

Wyświetl plik

@ -1,11 +0,0 @@
import { PrismaClient, Tag } from '@prisma/client'
export const fetchTags = async (prisma:PrismaClient, tagNames:string[]):Promise<Tag[]> => {
return await prisma.tag.findMany({
where: {
name: {
in: tagNames
}
}
})
}

Wyświetl plik

@ -1,12 +1,16 @@
import providerRegistry from './Fediverse/Providers' import providerRegistry from './Fediverse/Providers'
import prismaClient from './Storage/PrismaClient'
import { addNodeSeed } from './Jobs/Seed/addNodeSeed' import { addNodeSeed } from './Jobs/Seed/addNodeSeed'
import { processNextNode } from './Jobs/processNextNode' import { processNextNode } from './Jobs/processNextNode'
import assertNodeIndex from './Storage/Nodes/assertNodeIndex'
import assertFeedIndex from './Storage/Feeds/assertFeedIndex'
import elasticClient from './Storage/ElasticClient'
import deleteDomains from './Jobs/Seed/deleteBannedNodes'
import getBannedDomains from './Jobs/Seed/getBannedDomains'
const loop = async (): Promise<void> => { const loop = async (): Promise<void> => {
while (true) { while (true) {
try { try {
await processNextNode(prismaClient, providerRegistry) await processNextNode(elasticClient, providerRegistry)
} catch (err) { } catch (err) {
console.warn(err) console.warn(err)
const waitForJobMilliseconds = parseInt(process.env.WAIT_FOR_JOB_MINUTES ?? '60') * 60 * 1000 const waitForJobMilliseconds = parseInt(process.env.WAIT_FOR_JOB_MINUTES ?? '60') * 60 * 1000
@ -18,8 +22,11 @@ const loop = async (): Promise<void> => {
} }
const app = async (): Promise<void> => { const app = async (): Promise<void> => {
const seedDomain = process.env.SEED_NODE_DOMAIN ?? 'mastodon.social' await assertNodeIndex(elasticClient)
await addNodeSeed(prismaClient, seedDomain) await assertFeedIndex(elasticClient)
await deleteDomains(elasticClient, getBannedDomains())
const seedDomains = (process.env.SEED_NODE_DOMAIN ?? 'mastodon.social').split(',')
await addNodeSeed(elasticClient, seedDomains)
setTimeout(loop) setTimeout(loop)
} }