kopia lustrzana https://github.com/Stopka/fedicrawl
Replaced postgresql by elastic search
rodzic
7eb28e5d84
commit
f6f9d758a1
|
@ -1,20 +1,22 @@
|
|||
FROM node:16-bullseye AS build
|
||||
ENV POSTGRES_URL='postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public' \
|
||||
ENV ELASTIC_URL='http://elastic:9200' \
|
||||
ELASTIC_USER='elastic' \
|
||||
ELASTIC_PASSWORD='' \
|
||||
SEED_NODE_DOMAIN='mastodon.social' \
|
||||
REATTEMPT_MINUTES='60' \
|
||||
REFRESH_HOURS='120' \
|
||||
WAIT_FOR_JOB_MINUTES='60' \
|
||||
DEFAULT_TIMEOUT_MILLISECONDS='10000' \
|
||||
BANNED_DOMAINS='' \
|
||||
TZ='UTC'
|
||||
WORKDIR /srv
|
||||
COPY application/package*.json ./
|
||||
COPY application/prisma ./prisma/
|
||||
RUN npm install
|
||||
COPY application/. .
|
||||
RUN npm run build
|
||||
|
||||
FROM build AS dev
|
||||
CMD npm run dev
|
||||
CMD npx tsc --watch
|
||||
|
||||
FROM node:16-bullseye AS prod
|
||||
RUN groupadd -g 1001 nodejs
|
||||
|
@ -22,7 +24,6 @@ RUN useradd -u 1001 -g 1001 nextjs
|
|||
WORKDIR /srv
|
||||
USER nextjs
|
||||
COPY --from=build /srv/node_modules ./node_modules
|
||||
COPY --from=build /srv/prisma ./prisma
|
||||
COPY --from=build /srv/package*.json ./
|
||||
COPY --from=build /srv/dist ./dist
|
||||
CMD npm run start:deploy
|
||||
|
|
21
README.md
21
README.md
|
@ -24,15 +24,18 @@ Data providers for more apps will be probably added soon (Pull requests are welc
|
|||
|
||||
Configuration is done using environmental variables:
|
||||
|
||||
| Variable | Description | Default value / Example value |
|
||||
|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------|
|
||||
| `POSTGRES_URL` | Postgres database uri | `postgresql://fedisearch:passwd@postgres:5432/fedisearch?schema=public` |
|
||||
| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` |
|
||||
| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` |
|
||||
| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` |
|
||||
| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` |
|
||||
| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` |
|
||||
| `TZ` | _Optional_, Timezone | `UTC` |
|
||||
| Variable | Description | Default value / Example value |
|
||||
|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------|
|
||||
| `ELASTIC_URL` | Url address of ElasticSearch server | `http://elastic:9200` |
|
||||
| `ELASTIC_USER` | Username for EalsticSearch server | `elastic` |
|
||||
| `ELASTIC_PASSWORD` | Username for EalsticSearch server | empty |
|
||||
| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` |
|
||||
| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` |
|
||||
| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` |
|
||||
| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` |
|
||||
| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` |
|
||||
| `BANNED_DOMAINS` | _Optional_, Domains not to index (even with subdomains) | _empty_ |
|
||||
| `TZ` | _Optional_, Timezone | `UTC` |
|
||||
## Deploy
|
||||
App is designed to be run in docker container and deployed using docker-compose.
|
||||
More info can be found in [FediSearch example docker-compose](https://github.com/Stopka/fedisearch-compose) project
|
||||
|
|
Plik diff jest za duży
Load Diff
|
@ -19,18 +19,12 @@
|
|||
"test:cov": "jest --coverage",
|
||||
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
|
||||
"test:e2e": "jest --config ./test/jest-e2e.json",
|
||||
"migrate:dev": "prisma migrate dev",
|
||||
"migrate:dev:create": "prisma migrate dev --create-only",
|
||||
"migrate:deploy": "npx prisma migrate deploy",
|
||||
"migrate:reset": "npx prisma migrate reset",
|
||||
"migrate:resolve": "npx prisma migrate resolve",
|
||||
"prisma:generate": "npx prisma generate",
|
||||
"prisma:studio": "npx prisma studio",
|
||||
"start:deploy": "npm run migrate:deploy && npm run start"
|
||||
},
|
||||
"dependencies": {
|
||||
"@prisma/client": "^3.6.0",
|
||||
"@elastic/elasticsearch": "^8.2.1",
|
||||
"axios": "^0.21.1",
|
||||
"geoip-lite": "^1.4.6",
|
||||
"npmlog": "^6.0.0",
|
||||
"rimraf": "^3.0.2",
|
||||
"striptags": "^3.2.0",
|
||||
|
@ -38,6 +32,7 @@
|
|||
"zod": "^3.11.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/geoip-lite": "^1.4.1",
|
||||
"@types/jest": "^27.0.2",
|
||||
"@types/node": "^16.11.10",
|
||||
"@types/npmlog": "^4.1.3",
|
||||
|
@ -50,7 +45,6 @@
|
|||
"eslint-plugin-promise": "^5.1.1",
|
||||
"eslint-plugin-react": "^7.27.1",
|
||||
"jest": "^27.3.0",
|
||||
"prisma": "^3.6.0",
|
||||
"standard": "*",
|
||||
"ts-jest": "^27.0.7",
|
||||
"typescript": "^4.3.5"
|
||||
|
|
|
@ -1,174 +0,0 @@
|
|||
-- CreateEnum
|
||||
CREATE TYPE "FeedType" AS ENUM ('account', 'channel');
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "Tag" (
|
||||
"id" UUID NOT NULL,
|
||||
"name" TEXT NOT NULL,
|
||||
|
||||
CONSTRAINT "Tag_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "Email" (
|
||||
"id" UUID NOT NULL,
|
||||
"address" TEXT NOT NULL,
|
||||
"feedId" UUID NOT NULL,
|
||||
|
||||
CONSTRAINT "Email_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "FeedToTag" (
|
||||
"feedId" UUID NOT NULL,
|
||||
"tagId" UUID NOT NULL,
|
||||
|
||||
CONSTRAINT "FeedToTag_pkey" PRIMARY KEY ("feedId","tagId")
|
||||
);
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "Field" (
|
||||
"id" UUID NOT NULL,
|
||||
"name" TEXT NOT NULL,
|
||||
"value" TEXT NOT NULL,
|
||||
"feedId" UUID NOT NULL,
|
||||
|
||||
CONSTRAINT "Field_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "Feed" (
|
||||
"id" UUID NOT NULL,
|
||||
"nodeId" UUID NOT NULL,
|
||||
"foundAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"refreshedAt" TIMESTAMP(3) NOT NULL,
|
||||
"name" TEXT NOT NULL,
|
||||
"displayName" TEXT NOT NULL,
|
||||
"description" TEXT NOT NULL,
|
||||
"followersCount" INTEGER NOT NULL,
|
||||
"followingCount" INTEGER NOT NULL,
|
||||
"statusesCount" INTEGER,
|
||||
"bot" BOOLEAN,
|
||||
"url" TEXT NOT NULL,
|
||||
"avatar" TEXT,
|
||||
"locked" BOOLEAN NOT NULL,
|
||||
"lastStatusAt" TIMESTAMP(3),
|
||||
"createdAt" TIMESTAMP(3) NOT NULL,
|
||||
"type" "FeedType" NOT NULL DEFAULT E'account',
|
||||
"parentFeedName" TEXT,
|
||||
"parentFeedDomain" TEXT,
|
||||
"fulltext" TEXT NOT NULL DEFAULT E'',
|
||||
|
||||
CONSTRAINT "Feed_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "Node" (
|
||||
"id" UUID NOT NULL,
|
||||
"softwareName" TEXT,
|
||||
"softwareVersion" TEXT,
|
||||
"totalUserCount" INTEGER,
|
||||
"monthActiveUserCount" INTEGER,
|
||||
"halfYearActiveUserCount" INTEGER,
|
||||
"statusesCount" INTEGER,
|
||||
"openRegistrations" BOOLEAN,
|
||||
"foundAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"refreshedAt" TIMESTAMP(3),
|
||||
"refreshAttemptedAt" TIMESTAMP(3),
|
||||
"domain" TEXT NOT NULL,
|
||||
|
||||
CONSTRAINT "Node_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Email_address_idx" ON "Email"("address");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_name_idx" ON "Field"("name");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_value_idx" ON "Field"("value");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "Email" ADD CONSTRAINT "Email_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_tagId_fkey" FOREIGN KEY ("tagId") REFERENCES "Tag"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "Field" ADD CONSTRAINT "Field_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "Feed" ADD CONSTRAINT "Feed_nodeId_fkey" FOREIGN KEY ("nodeId") REFERENCES "Node"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
|
@ -1,163 +0,0 @@
|
|||
/*
|
||||
Warnings:
|
||||
|
||||
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
|
||||
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
|
||||
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
|
||||
|
||||
*/
|
||||
-- DropIndex
|
||||
DROP INDEX "Email_address_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_bot_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_createdAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_description_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_displayName_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_fulltext_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_lastStatusAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_locked_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_name_nodeId_key";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_refreshedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_type_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Field_name_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Field_value_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_domain_key";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_foundAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_halfYearActiveUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_monthActiveUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_openRegistrations_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_refreshAttemptedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_refreshedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_softwareName_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_softwareVersion_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_statusesCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_totalUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Tag_name_key";
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Email_address_idx" ON "Email"("address");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_name_idx" ON "Field"("name");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_value_idx" ON "Field"("value");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
|
|
@ -1,167 +0,0 @@
|
|||
/*
|
||||
Warnings:
|
||||
|
||||
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
|
||||
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
|
||||
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
|
||||
|
||||
*/
|
||||
-- DropIndex
|
||||
DROP INDEX "Email_address_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_bot_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_createdAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_description_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_displayName_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_fulltext_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_lastStatusAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_locked_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_name_nodeId_key";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_refreshedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_type_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Field_name_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Field_value_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_domain_key";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_foundAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_halfYearActiveUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_monthActiveUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_openRegistrations_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_refreshAttemptedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_refreshedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_softwareName_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_softwareVersion_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_statusesCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_totalUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Tag_name_key";
|
||||
|
||||
-- AlterTable
|
||||
ALTER TABLE "Feed" ALTER COLUMN "followersCount" DROP NOT NULL,
|
||||
ALTER COLUMN "followingCount" DROP NOT NULL;
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Email_address_idx" ON "Email"("address");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_name_idx" ON "Field"("name");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_value_idx" ON "Field"("value");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
|
|
@ -1,4 +0,0 @@
|
|||
update "Node"
|
||||
set "refreshedAt"=NULL,
|
||||
"refreshAttemptedAt"=NULL
|
||||
where "Node"."softwareName" like 'pleroma';
|
|
@ -1,4 +0,0 @@
|
|||
update "Node"
|
||||
set "refreshedAt"=NULL,
|
||||
"refreshAttemptedAt"=NULL
|
||||
where "Node"."softwareName" like 'misskey';
|
|
@ -1,3 +0,0 @@
|
|||
update "Node"
|
||||
set "softwareName"=lower("softwareName")
|
||||
where true;
|
|
@ -1,4 +0,0 @@
|
|||
update "Node"
|
||||
set "refreshedAt"=NULL,
|
||||
"refreshAttemptedAt"=NULL
|
||||
where "Node"."softwareName" like 'hometown';
|
|
@ -1,4 +0,0 @@
|
|||
update "Node"
|
||||
set "refreshedAt"=NULL,
|
||||
"refreshAttemptedAt"=NULL
|
||||
where "Node"."softwareName" like 'ecko';
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
Warnings:
|
||||
|
||||
- A unique constraint covering the columns `[name,nodeId]` on the table `Feed` will be added. If there are existing duplicate values, this will fail.
|
||||
- A unique constraint covering the columns `[domain]` on the table `Node` will be added. If there are existing duplicate values, this will fail.
|
||||
- A unique constraint covering the columns `[name]` on the table `Tag` will be added. If there are existing duplicate values, this will fail.
|
||||
|
||||
*/
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "Email" DROP CONSTRAINT "Email_feedId_fkey";
|
||||
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "Feed" DROP CONSTRAINT "Feed_nodeId_fkey";
|
||||
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "FeedToTag" DROP CONSTRAINT "FeedToTag_feedId_fkey";
|
||||
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "FeedToTag" DROP CONSTRAINT "FeedToTag_tagId_fkey";
|
||||
|
||||
-- DropForeignKey
|
||||
ALTER TABLE "Field" DROP CONSTRAINT "Field_feedId_fkey";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Email_address_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_bot_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_createdAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_description_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_displayName_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_fulltext_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_lastStatusAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_locked_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_name_nodeId_key";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_parentFeedName_parentFeedDomain_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_refreshedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Feed_type_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Field_name_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Field_value_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_domain_key";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_foundAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_halfYearActiveUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_monthActiveUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_openRegistrations_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_refreshAttemptedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_refreshedAt_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_softwareName_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_softwareVersion_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_statusesCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Node_totalUserCount_idx";
|
||||
|
||||
-- DropIndex
|
||||
DROP INDEX "Tag_name_key";
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Email_address_idx" ON "Email"("address");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_displayName_idx" ON "Feed"("displayName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_description_idx" ON "Feed"("description");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_bot_idx" ON "Feed"("bot");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_locked_idx" ON "Feed"("locked");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_lastStatusAt_idx" ON "Feed"("lastStatusAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_createdAt_idx" ON "Feed"("createdAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_refreshedAt_idx" ON "Feed"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_parentFeedName_parentFeedDomain_idx" ON "Feed"("parentFeedName", "parentFeedDomain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_type_idx" ON "Feed"("type");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Feed_fulltext_idx" ON "Feed"("fulltext");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Feed_name_nodeId_key" ON "Feed"("name", "nodeId");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_name_idx" ON "Field"("name");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Field_value_idx" ON "Field"("value");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Node_domain_key" ON "Node"("domain");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareName_idx" ON "Node"("softwareName");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_softwareVersion_idx" ON "Node"("softwareVersion");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_totalUserCount_idx" ON "Node"("totalUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_monthActiveUserCount_idx" ON "Node"("monthActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_halfYearActiveUserCount_idx" ON "Node"("halfYearActiveUserCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_statusesCount_idx" ON "Node"("statusesCount");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_openRegistrations_idx" ON "Node"("openRegistrations");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshedAt_idx" ON "Node"("refreshedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_refreshAttemptedAt_idx" ON "Node"("refreshAttemptedAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE INDEX "Node_foundAt_idx" ON "Node"("foundAt");
|
||||
|
||||
-- CreateIndex
|
||||
CREATE UNIQUE INDEX "Tag_name_key" ON "Tag"("name");
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "Email" ADD CONSTRAINT "Email_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "FeedToTag" ADD CONSTRAINT "FeedToTag_tagId_fkey" FOREIGN KEY ("tagId") REFERENCES "Tag"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "Field" ADD CONSTRAINT "Field_feedId_fkey" FOREIGN KEY ("feedId") REFERENCES "Feed"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- AddForeignKey
|
||||
ALTER TABLE "Feed" ADD CONSTRAINT "Feed_nodeId_fkey" FOREIGN KEY ("nodeId") REFERENCES "Node"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
|
@ -1,4 +0,0 @@
|
|||
update "Node"
|
||||
set "refreshedAt"=NULL,
|
||||
"refreshAttemptedAt"=NULL
|
||||
where "Node"."softwareName" like 'friendica';
|
|
@ -1,3 +0,0 @@
|
|||
# Please do not edit this file manually
|
||||
# It should be added in your version-control system (i.e. Git)
|
||||
provider = "postgresql"
|
|
@ -1,115 +0,0 @@
|
|||
datasource db {
|
||||
url = env("POSTGRES_URL")
|
||||
provider = "postgresql"
|
||||
}
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
previewFeatures = ["extendedIndexes","fullTextSearch","referentialActions"]
|
||||
}
|
||||
|
||||
model Tag {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
name String @unique
|
||||
feedToTag FeedToTag[]
|
||||
}
|
||||
|
||||
model Email {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
address String
|
||||
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
|
||||
feedId String @db.Uuid
|
||||
|
||||
@@index([address])
|
||||
}
|
||||
|
||||
model FeedToTag {
|
||||
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
|
||||
feedId String @db.Uuid
|
||||
tag Tag @relation(fields: [tagId], references: [id], onDelete: Cascade)
|
||||
tagId String @db.Uuid
|
||||
|
||||
@@id([feedId, tagId])
|
||||
}
|
||||
|
||||
model Field {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
name String
|
||||
value String
|
||||
feed Feed @relation(fields: [feedId], references: [id], onDelete: Cascade)
|
||||
feedId String @db.Uuid
|
||||
|
||||
@@index([name])
|
||||
@@index([value])
|
||||
}
|
||||
|
||||
enum FeedType{
|
||||
account
|
||||
channel
|
||||
}
|
||||
|
||||
model Feed {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
node Node @relation(fields: [nodeId], references: [id], onDelete: Cascade)
|
||||
nodeId String @db.Uuid
|
||||
foundAt DateTime @default(now())
|
||||
refreshedAt DateTime @updatedAt
|
||||
name String
|
||||
displayName String
|
||||
description String
|
||||
feedToTags FeedToTag[]
|
||||
fields Field[]
|
||||
emails Email[]
|
||||
followersCount Int?
|
||||
followingCount Int?
|
||||
statusesCount Int?
|
||||
bot Boolean?
|
||||
url String
|
||||
avatar String?
|
||||
locked Boolean
|
||||
lastStatusAt DateTime?
|
||||
createdAt DateTime
|
||||
type FeedType @default(account)
|
||||
parentFeedName String?
|
||||
parentFeedDomain String?
|
||||
fulltext String @default("")
|
||||
|
||||
@@index([displayName])
|
||||
@@index([description])
|
||||
@@index([bot])
|
||||
@@index([locked])
|
||||
@@index([lastStatusAt])
|
||||
@@index([createdAt])
|
||||
@@index([refreshedAt])
|
||||
@@index([parentFeedName,parentFeedDomain])
|
||||
@@index([type])
|
||||
@@index([fulltext])
|
||||
@@unique([name, nodeId])
|
||||
}
|
||||
|
||||
model Node {
|
||||
id String @id @default(uuid()) @db.Uuid
|
||||
softwareName String?
|
||||
softwareVersion String?
|
||||
totalUserCount Int?
|
||||
monthActiveUserCount Int?
|
||||
halfYearActiveUserCount Int?
|
||||
statusesCount Int?
|
||||
openRegistrations Boolean?
|
||||
foundAt DateTime @default(now())
|
||||
refreshedAt DateTime?
|
||||
refreshAttemptedAt DateTime?
|
||||
domain String @unique
|
||||
feeds Feed[]
|
||||
|
||||
@@index([softwareName])
|
||||
@@index([softwareVersion])
|
||||
@@index([totalUserCount])
|
||||
@@index([monthActiveUserCount])
|
||||
@@index([halfYearActiveUserCount])
|
||||
@@index([statusesCount])
|
||||
@@index([openRegistrations])
|
||||
@@index([refreshedAt])
|
||||
@@index([refreshAttemptedAt])
|
||||
@@index([foundAt])
|
||||
}
|
|
@ -4,6 +4,7 @@ import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse'
|
|||
import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds'
|
||||
|
||||
const schema = z.object({
|
||||
name: z.string().optional(),
|
||||
software: z.object({
|
||||
name: z.string(),
|
||||
version: z.string()
|
||||
|
|
|
@ -6,17 +6,17 @@ export interface FeedData {
|
|||
description:string,
|
||||
followersCount: number,
|
||||
followingCount:number,
|
||||
statusesCount:number,
|
||||
bot:boolean,
|
||||
statusesCount?:number,
|
||||
bot?:boolean,
|
||||
url: string,
|
||||
avatar:string|null,
|
||||
avatar?:string,
|
||||
locked:boolean,
|
||||
lastStatusAt:Date|null,
|
||||
lastStatusAt?:Date,
|
||||
createdAt:Date
|
||||
fields: FieldData[],
|
||||
type: 'account'|'channel'
|
||||
parentFeed: {
|
||||
parentFeed?: {
|
||||
name:string
|
||||
hostDomain:string
|
||||
}|null
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
export interface FieldData{
|
||||
name: string,
|
||||
value: string,
|
||||
verifiedAt: Date|null
|
||||
verifiedAt: Date|undefined
|
||||
}
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
import axios from 'axios'
|
||||
import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse'
|
||||
import { FeedData } from '../FeedData'
|
||||
import { z } from 'zod'
|
||||
import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds'
|
||||
import { NodeProviderMethod } from '../NodeProviderMethod'
|
||||
import { NoMoreFeedsError } from '../NoMoreFeedsError'
|
||||
import { FeedProviderMethod } from '../FeedProviderMethod'
|
||||
|
||||
|
@ -91,24 +89,24 @@ export const retrieveUsersPage:FeedProviderMethod = async (domain, page) => {
|
|||
url: `https://${domain}/@${item.username}`,
|
||||
avatar: item.avatarUrl,
|
||||
locked: item.isLocked,
|
||||
lastStatusAt: item.updatedAt !== null ? new Date(item.updatedAt) : null,
|
||||
lastStatusAt: item.updatedAt !== null ? new Date(item.updatedAt) : undefined,
|
||||
createdAt: new Date(item.createdAt),
|
||||
fields: [
|
||||
...item.fields.map(field => {
|
||||
return {
|
||||
name: replaceEmojis(field.name, item.emojis),
|
||||
value: replaceEmojis(field.value, item.emojis),
|
||||
verifiedAt: null
|
||||
verifiedAt: undefined
|
||||
}
|
||||
}),
|
||||
...[
|
||||
{ name: 'Location', value: item.location, verifiedAt: null },
|
||||
{ name: 'Birthday', value: item.birthday, verifiedAt: null },
|
||||
{ name: 'Language', value: item.lang, verifiedAt: null }
|
||||
{ name: 'Location', value: item.location, verifiedAt: undefined },
|
||||
{ name: 'Birthday', value: item.birthday, verifiedAt: undefined },
|
||||
{ name: 'Language', value: item.lang, verifiedAt: undefined }
|
||||
].filter(field => field.value !== null)
|
||||
],
|
||||
type: 'account',
|
||||
parentFeed: null
|
||||
parentFeed: undefined
|
||||
}
|
||||
}
|
||||
)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import { z } from 'zod'
|
||||
|
||||
export const avatarSchema = z.nullable(z.object({
|
||||
export const avatarSchema = z.optional(z.nullable(z.object({
|
||||
path: z.string()
|
||||
}))
|
||||
})))
|
||||
|
||||
export type Avatar = z.infer<typeof avatarSchema>
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import { Avatar } from './Avatar'
|
||||
|
||||
export const parseAvatarUrl = (data:Avatar, domain:string):string|null => {
|
||||
export const parseAvatarUrl = (data:Avatar, domain:string):string|undefined => {
|
||||
if (data === null) {
|
||||
return null
|
||||
return undefined
|
||||
}
|
||||
return `https://${domain}${data.path}`
|
||||
}
|
||||
|
|
|
@ -57,11 +57,11 @@ export const retrieveAccounts:FeedProviderMethod = async (domain, page) => {
|
|||
followersCount: item.followersCount,
|
||||
followingCount: item.followingCount,
|
||||
createdAt: new Date(item.createdAt),
|
||||
bot: null,
|
||||
lastStatusAt: null,
|
||||
statusesCount: null,
|
||||
bot: undefined,
|
||||
lastStatusAt: undefined,
|
||||
statusesCount: undefined,
|
||||
type: 'account',
|
||||
parentFeed: null
|
||||
parentFeed: undefined
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -54,7 +54,7 @@ export const retrieveVideoChannels:FeedProviderMethod = async (domain, page) =>
|
|||
.filter(item => item.host === domain)
|
||||
.map((item):FeedData => {
|
||||
const fields:FieldData[] = item.support
|
||||
? [{ name: 'support', value: item.support, verifiedAt: null }]
|
||||
? [{ name: 'support', value: item.support, verifiedAt: undefined }]
|
||||
: []
|
||||
return {
|
||||
name: item.name,
|
||||
|
@ -67,9 +67,9 @@ export const retrieveVideoChannels:FeedProviderMethod = async (domain, page) =>
|
|||
followersCount: item.followersCount,
|
||||
followingCount: item.followingCount,
|
||||
createdAt: new Date(item.createdAt),
|
||||
bot: null,
|
||||
lastStatusAt: null,
|
||||
statusesCount: null,
|
||||
bot: undefined,
|
||||
lastStatusAt: undefined,
|
||||
statusesCount: undefined,
|
||||
type: 'channel',
|
||||
parentFeed: {
|
||||
name: item.ownerAccount.name,
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
import { lookup } from 'dns/promises'
|
||||
import { updateNodeIps } from '../../Storage/Nodes/updateNodeIps'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
|
||||
const refreshNodeIps = async (elastic: ElasticClient, node:Node):Promise<Node> => {
|
||||
console.info('Looking up node ip addresses', {
|
||||
nodeDomain: node.domain
|
||||
})
|
||||
try {
|
||||
const addresses = await lookup(node.domain, { all: true })
|
||||
return updateNodeIps(elastic, node, addresses.map(resolution => resolution.address))
|
||||
} catch (error) {
|
||||
console.warn('Could not lookup the domain', { node, error })
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
export default refreshNodeIps
|
|
@ -1,28 +1,15 @@
|
|||
import { Node, PrismaClient, Feed } from '@prisma/client'
|
||||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
|
||||
import { createFeedTags } from '../../Storage/Tags/createFeedTags'
|
||||
import { fetchTags } from '../../Storage/Tags/fetchTags'
|
||||
import { extractTags } from '../../StringTools/extractTags'
|
||||
import { extractEmails } from '../../StringTools/extractEmails'
|
||||
import { createFeedFields } from '../../Storage/Fields/createFeedFields'
|
||||
import { createFeedEmails } from '../../Storage/Emails/createFeedEmails'
|
||||
import { createFeed } from '../../Storage/Feeds/createFeed'
|
||||
import prepareFulltext from './prepareFulltext'
|
||||
import Feed from '../../Storage/Definitions/Feed'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const addFeed = async (prisma: PrismaClient, node: Node, feedData: FeedData): Promise<Feed> => {
|
||||
export const addFeed = async (elastic: ElasticClient, node: Node, feedData: FeedData): Promise<Feed> => {
|
||||
const fulltext = prepareFulltext(feedData, node)
|
||||
const feed = await createFeed(prisma, { ...feedData, fulltext }, node)
|
||||
|
||||
await createFeedFields(prisma, feed, feedData.fields)
|
||||
|
||||
const tagNames = extractTags(fulltext)
|
||||
await createMissingTags(prisma, tagNames)
|
||||
const tags = await fetchTags(prisma, tagNames)
|
||||
await createFeedTags(prisma, feed, tags)
|
||||
|
||||
const emails = extractEmails(fulltext)
|
||||
await createFeedEmails(prisma, feed, emails)
|
||||
|
||||
return feed
|
||||
const extractedTags = extractTags(fulltext)
|
||||
const extractedEmails = extractEmails(fulltext)
|
||||
return await createFeed(elastic, { ...feedData, extractedTags, extractedEmails }, node)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
import striptags from 'striptags'
|
||||
import { Node } from '@prisma/client'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
|
||||
export default function (feedData: FeedData, node: Node):string {
|
||||
return striptags(
|
||||
|
|
|
@ -1,33 +1,17 @@
|
|||
import { PrismaClient, Feed, Node } from '@prisma/client'
|
||||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
|
||||
import { createFeedTags } from '../../Storage/Tags/createFeedTags'
|
||||
import { fetchTags } from '../../Storage/Tags/fetchTags'
|
||||
import { extractTags } from '../../StringTools/extractTags'
|
||||
import { extractEmails } from '../../StringTools/extractEmails'
|
||||
import { createFeedFields } from '../../Storage/Fields/createFeedFields'
|
||||
import { createFeedEmails } from '../../Storage/Emails/createFeedEmails'
|
||||
import { deleteAllFeedFields } from '../../Storage/Fields/deleteAllFeedFields'
|
||||
import { deleteAllFeedTags } from '../../Storage/Tags/deleteAllFeedTags'
|
||||
import { deleteAllFeedEmails } from '../../Storage/Emails/deleteAllFeedEmails'
|
||||
import { updateFeed } from '../../Storage/Feeds/updateFeed'
|
||||
import Feed from '../../Storage/Definitions/Feed'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import prepareFulltext from './prepareFulltext'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const refreshFeed = async (prisma: PrismaClient, feed:Feed, feedData: FeedData, node: Node): Promise<Feed> => {
|
||||
export const refreshFeed = async (elastic: ElasticClient, feed:Feed, feedData: FeedData, node: Node): Promise<Feed> => {
|
||||
const fulltext = prepareFulltext(feedData, node)
|
||||
|
||||
await deleteAllFeedFields(prisma, feed)
|
||||
await createFeedFields(prisma, feed, feedData.fields)
|
||||
const extractedTags = extractTags(fulltext)
|
||||
const extractedEmails = extractEmails(fulltext)
|
||||
|
||||
const tagNames = extractTags(fulltext)
|
||||
await createMissingTags(prisma, tagNames)
|
||||
const tags = await fetchTags(prisma, tagNames)
|
||||
await deleteAllFeedTags(prisma, feed)
|
||||
await createFeedTags(prisma, feed, tags)
|
||||
|
||||
const emails = extractEmails(fulltext)
|
||||
await deleteAllFeedEmails(prisma, feed)
|
||||
await createFeedEmails(prisma, feed, emails)
|
||||
|
||||
return await updateFeed(prisma, feed, { ...feedData, fulltext })
|
||||
return await updateFeed(elastic, feed, { ...feedData, extractedTags, extractedEmails })
|
||||
}
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import { refreshFeedsOnPage } from './refreshFeedsOnPage'
|
||||
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
|
||||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const refreshFeeds = async (prisma:PrismaClient, provider:FeedProvider, node:Node):Promise<void> => {
|
||||
export const refreshFeeds = async (elastic: ElasticClient, provider:FeedProvider, node:Node):Promise<void> => {
|
||||
try {
|
||||
for (let page = 0; true; page++) {
|
||||
console.info('Retrieve feeds page', { nodeDomain: node.domain, provider: provider.getKey(), page: page })
|
||||
await refreshFeedsOnPage(prisma, provider, node, page)
|
||||
await refreshFeedsOnPage(elastic, provider, node, page)
|
||||
}
|
||||
} catch (e) {
|
||||
console.info('Feed search finished: ' + e, { nodeDomain: node.domain, provider: provider.getKey() })
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
import { Node, PrismaClient, Feed } from '@prisma/client'
|
||||
import { refreshOrAddFeed } from './refreshOrAddFeed'
|
||||
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import Feed from '../../Storage/Definitions/Feed'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const refreshFeedsOnPage = async (prisma: PrismaClient, provider:FeedProvider, node:Node, page:number):Promise<Feed[]> => {
|
||||
export const refreshFeedsOnPage = async (elastic: ElasticClient, provider:FeedProvider, node:Node, page:number):Promise<Feed[]> => {
|
||||
const feedData = await provider.retrieveFeeds(node.domain, page)
|
||||
console.info('Retrieved feeds', { count: feedData.length, domain: node.domain, provider: provider.getKey(), page: page })
|
||||
return Promise.all(feedData.map(
|
||||
feedDataItem => refreshOrAddFeed(prisma, node, feedDataItem)
|
||||
feedDataItem => refreshOrAddFeed(elastic, node, feedDataItem)
|
||||
))
|
||||
}
|
||||
|
|
|
@ -1,15 +1,22 @@
|
|||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
import { fetchFeedByNodeAndName } from '../../Storage/Feeds/fetchFeedByNodeAndName'
|
||||
import { refreshFeed } from './refreshFeed'
|
||||
import { addFeed } from './addFeed'
|
||||
import { Node, PrismaClient, Feed } from '@prisma/client'
|
||||
import Feed from '../../Storage/Definitions/Feed'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
import getFeed from '../../Storage/Feeds/getFeed'
|
||||
|
||||
export const refreshOrAddFeed = async (prisma:PrismaClient, node:Node, feedData:FeedData):Promise<Feed> => {
|
||||
const feed = await fetchFeedByNodeAndName(prisma, node, feedData.name)
|
||||
export const refreshOrAddFeed = async (elastic: ElasticClient, node:Node, feedData:FeedData):Promise<Feed> => {
|
||||
let feed:Feed|null = null
|
||||
try {
|
||||
feed = await getFeed(elastic, `${feedData.name}@${node.domain}`)
|
||||
} catch (e) {
|
||||
|
||||
}
|
||||
if (feed) {
|
||||
console.info('Refreshing feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type })
|
||||
return await refreshFeed(prisma, feed, feedData, node)
|
||||
return await refreshFeed(elastic, feed, feedData, node)
|
||||
}
|
||||
console.info('Adding feed', { nodeDomain: node.domain, feedName: feedData.name, feedType: feedData.type })
|
||||
return await addFeed(prisma, node, feedData)
|
||||
return await addFeed(elastic, node, feedData)
|
||||
}
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { retrieveDomainNodeInfo } from '../../Fediverse/NodeInfo/retrieveDomainNodeInfo'
|
||||
import { updateNode } from '../../Storage/Nodes/updateNode'
|
||||
import { updateNodeInfo } from '../../Storage/Nodes/updateNodeInfo'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const refreshNodeInfo = async (prisma: PrismaClient, node:Node):Promise<Node> => {
|
||||
export const refreshNodeInfo = async (elastic: ElasticClient, node:Node):Promise<Node> => {
|
||||
console.info('Updating info of node', { nodeDomain: node.domain })
|
||||
try {
|
||||
const nodeInfo = await retrieveDomainNodeInfo(node.domain)
|
||||
return await updateNode(prisma, node, nodeInfo)
|
||||
return await updateNodeInfo(elastic, node, nodeInfo)
|
||||
} catch (error) {
|
||||
console.warn('Failed to update node info: ' + error)
|
||||
return node
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
|
||||
import { findNewNodesOnPage } from './findNewNodesOnPage'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const findNewNodes = async (prisma: PrismaClient, provider:NodeProvider, node:Node):Promise<void> => {
|
||||
export const findNewNodes = async (elastic: ElasticClient, provider:NodeProvider, node:Node):Promise<void> => {
|
||||
try {
|
||||
for (let page = 0; true; page++) {
|
||||
console.info('Retrieve node page', { domain: node.domain, provider: provider.getKey() })
|
||||
await findNewNodesOnPage(prisma, provider, node, page)
|
||||
await findNewNodesOnPage(elastic, provider, node, page)
|
||||
}
|
||||
} catch (e) {
|
||||
console.info('Node search finished: ' + e, { domain: node.domain, provider: provider.getKey() })
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
import { PrismaClient, Node } from '@prisma/client'
|
||||
import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes'
|
||||
import { NodeProvider } from '../../Fediverse/Providers/NodeProvider'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
import isDomainNotBanned from '../../Storage/Nodes/isDomainNotBanned'
|
||||
|
||||
export const findNewNodesOnPage = async (
|
||||
prisma:PrismaClient, provider: NodeProvider, node:Node, page:number
|
||||
elastic: ElasticClient, provider: NodeProvider, node:Node, page:number
|
||||
):Promise<number> => {
|
||||
const domains = await provider.retrieveNodes(node.domain, page)
|
||||
let domains = await provider.retrieveNodes(node.domain, page)
|
||||
domains = domains.filter(isDomainNotBanned)
|
||||
console.log('Found nodes', { count: domains.length, domain: node.domain, provider: provider.getKey(), page: page })
|
||||
return await createMissingNodes(prisma, domains)
|
||||
return await createMissingNodes(elastic, domains, node.domain)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
import countNodeFeeds from '../../Storage/Feeds/countNodeFeeds'
|
||||
import { setNodeStats } from '../../Storage/Nodes/setNodeStats'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
|
||||
export type NodeStats ={
|
||||
account: number,
|
||||
channel: number,
|
||||
}
|
||||
|
||||
export default async function updateNodeFeedStats (elasticClient: ElasticClient, node: Node) {
|
||||
await setNodeStats(
|
||||
elasticClient,
|
||||
node,
|
||||
await countNodeFeeds(elasticClient, node.domain)
|
||||
)
|
||||
}
|
|
@ -1,8 +1,8 @@
|
|||
import { PrismaClient } from '@prisma/client'
|
||||
import { createMissingNodes } from '../../Storage/Nodes/createMissingNodes'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export const addNodeSeed = async (prisma:PrismaClient, domain:string):Promise<boolean> => {
|
||||
console.info('Trying to add seed domain node', { domain: domain })
|
||||
const result = await createMissingNodes(prisma, [domain])
|
||||
export const addNodeSeed = async (elastic: ElasticClient, domains:string[]):Promise<boolean> => {
|
||||
console.info('Trying to add seed domain nodes', { domains: domains })
|
||||
const result = await createMissingNodes(elastic, domains, undefined)
|
||||
return result > 0
|
||||
}
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
import { deleteDomainFeeds } from '../../Storage/Feeds/deleteDomainFeeds'
|
||||
import { deleteDomainNodes } from '../../Storage/Nodes/deleteDomainNodes'
|
||||
import { ElasticClient } from '../../Storage/ElasticClient'
|
||||
|
||||
export default async function deleteDomains (elastic: ElasticClient, domains:string[]):Promise<number> {
|
||||
if (domains === []) {
|
||||
return
|
||||
}
|
||||
await deleteDomainFeeds(elastic, domains)
|
||||
return deleteDomainNodes(elastic, domains)
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
export default function getBannedDomains ():string[] {
|
||||
const domains = process.env.BANNED_DOMAINS ?? ''
|
||||
if (domains === '') {
|
||||
return []
|
||||
}
|
||||
return domains.split(',').map(domain => domain.toLowerCase())
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
import { PrismaClient, Tag } from '@prisma/client'
|
||||
import { fetchTags } from '../../Storage/Tags/fetchTags'
|
||||
import { createMissingTags } from '../../Storage/Tags/createMissingTags'
|
||||
|
||||
export const fetchOrCreateTags = async (prisma:PrismaClient, tagNames:string[]):Promise<Tag[]> => {
|
||||
console.log('Searching for tags', { count: tagNames.length })
|
||||
await createMissingTags(prisma, tagNames)
|
||||
return await fetchTags(prisma, tagNames)
|
||||
}
|
|
@ -1,4 +1,3 @@
|
|||
import { PrismaClient } from '@prisma/client'
|
||||
import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess'
|
||||
import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry'
|
||||
import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed'
|
||||
|
@ -9,18 +8,22 @@ import { NodeProvider } from '../Fediverse/Providers/NodeProvider'
|
|||
import { FeedProvider } from '../Fediverse/Providers/FeedProvider'
|
||||
import { refreshFeeds } from './Feeds/refreshFeeds'
|
||||
import { deleteOldFeeds } from '../Storage/Feeds/deleteOldFeeds'
|
||||
import refreshNodeIps from './Dns/refreshNodeIps'
|
||||
import { ElasticClient } from '../Storage/ElasticClient'
|
||||
import updateNodeFeedStats from './Nodes/updateNodeFeedStats'
|
||||
|
||||
export const processNextNode = async (prisma:PrismaClient, providerRegistry:ProviderRegistry):Promise<void> => {
|
||||
export const processNextNode = async (elastic: ElasticClient, providerRegistry:ProviderRegistry):Promise<void> => {
|
||||
console.info('#############################################')
|
||||
let node = await fetchNodeToProcess(prisma)
|
||||
node = await setNodeRefreshAttempted(prisma, node)
|
||||
let node = await fetchNodeToProcess(elastic)
|
||||
node = await setNodeRefreshAttempted(elastic, node)
|
||||
|
||||
node = await refreshNodeInfo(prisma, node)
|
||||
node = await refreshNodeIps(elastic, node)
|
||||
node = await refreshNodeInfo(elastic, node)
|
||||
|
||||
if (!providerRegistry.containsKey(node.softwareName)) {
|
||||
console.warn('Unknown software', { domain: node.domain, software: node.softwareName })
|
||||
await deleteOldFeeds(prisma, node)
|
||||
await setNodeRefreshed(prisma, node)
|
||||
await deleteOldFeeds(elastic, node)
|
||||
await setNodeRefreshed(elastic, node)
|
||||
return
|
||||
}
|
||||
const provider = providerRegistry.getProviderByKey(node.softwareName)
|
||||
|
@ -28,18 +31,19 @@ export const processNextNode = async (prisma:PrismaClient, providerRegistry:Prov
|
|||
await Promise.all(
|
||||
provider.getNodeProviders().map((nodeProvider:NodeProvider) => {
|
||||
console.info('Searching for nodes', { domain: node.domain, provider: nodeProvider.getKey() })
|
||||
return findNewNodes(prisma, nodeProvider, node)
|
||||
return findNewNodes(elastic, nodeProvider, node)
|
||||
})
|
||||
)
|
||||
|
||||
await Promise.all(
|
||||
provider.getFeedProviders().map((feedProvider:FeedProvider) => {
|
||||
console.info('Searching for feeds', { domain: node.domain, provider: feedProvider.getKey() })
|
||||
return refreshFeeds(prisma, feedProvider, node)
|
||||
return refreshFeeds(elastic, feedProvider, node)
|
||||
})
|
||||
)
|
||||
|
||||
await deleteOldFeeds(prisma, node)
|
||||
await deleteOldFeeds(elastic, node)
|
||||
await updateNodeFeedStats(elastic, node)
|
||||
|
||||
await setNodeRefreshed(prisma, node)
|
||||
await setNodeRefreshed(elastic, node)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
import Field from './Field'
|
||||
|
||||
interface Feed {
|
||||
domain: string
|
||||
foundAt: number,
|
||||
refreshedAt?: number,
|
||||
name: string,
|
||||
fullName: string,
|
||||
displayName: string,
|
||||
description: string,
|
||||
strippedDescription?: string,
|
||||
followersCount?: number,
|
||||
followingCount?: number,
|
||||
statusesCount?: number,
|
||||
lastStatusAt?: number,
|
||||
createdAt?: number,
|
||||
bot?: boolean,
|
||||
locked: boolean,
|
||||
url: string,
|
||||
avatar?: string,
|
||||
type: 'account' | 'channel',
|
||||
parentFeedName?: string,
|
||||
parentFeedDomain?: string
|
||||
fields: Field[],
|
||||
extractedEmails: string[],
|
||||
extractedTags: string[]
|
||||
}
|
||||
|
||||
export default Feed
|
|
@ -0,0 +1,8 @@
|
|||
interface Field {
|
||||
name: string,
|
||||
value: string
|
||||
strippedName?: string
|
||||
strippedValue?: string
|
||||
}
|
||||
|
||||
export default Field
|
|
@ -0,0 +1,13 @@
|
|||
interface Geo {
|
||||
cityName?: string,
|
||||
continentName?: string,
|
||||
countryIsoCode?: string,
|
||||
countryName?: string,
|
||||
latitude: number
|
||||
longitude: number
|
||||
location?: string,
|
||||
regionIsoCode?: string,
|
||||
regionName?: string
|
||||
}
|
||||
|
||||
export default Geo
|
|
@ -0,0 +1,25 @@
|
|||
import Geo from './Geo'
|
||||
|
||||
interface Node {
|
||||
name?:string,
|
||||
strippedName?:string,
|
||||
foundAt: number,
|
||||
refreshAttemptedAt?: number
|
||||
refreshedAt?: number
|
||||
openRegistrations?: boolean
|
||||
domain: string,
|
||||
serverIps?: string[],
|
||||
geoip?: Geo[],
|
||||
softwareName?: string;
|
||||
softwareVersion?: string
|
||||
standardizedSoftwareVersion?: string
|
||||
halfYearActiveUserCount?: number,
|
||||
monthActiveUserCount?: number,
|
||||
statusesCount?: number,
|
||||
totalUserCount?: number,
|
||||
discoveredByDomain?:string,
|
||||
accountFeedCount?: number,
|
||||
channelFeedCount?: number,
|
||||
}
|
||||
|
||||
export default Node
|
|
@ -0,0 +1,3 @@
|
|||
const feedIndex = 'feed'
|
||||
|
||||
export default feedIndex
|
|
@ -0,0 +1,3 @@
|
|||
const nodeIndex = 'node'
|
||||
|
||||
export default nodeIndex
|
|
@ -0,0 +1,15 @@
|
|||
import { Client } from '@elastic/elasticsearch'
|
||||
|
||||
const elasticClient = new Client({
|
||||
node: {
|
||||
url: new URL(process.env.ELASTIC_URL ?? 'http://elastic:9200')
|
||||
},
|
||||
auth: {
|
||||
username: process.env.ELASTIC_USER ?? 'elastic',
|
||||
password: process.env.ELASTIC_PASSWORD
|
||||
}
|
||||
})
|
||||
|
||||
export type ElasticClient = typeof elasticClient
|
||||
|
||||
export default elasticClient
|
|
@ -1,15 +0,0 @@
|
|||
import { PrismaClient, Feed } from '@prisma/client'
|
||||
|
||||
export const createFeedEmails = async (prisma:PrismaClient, feed:Feed, emails:string[]):Promise<number> => {
|
||||
const result = await prisma.email.createMany({
|
||||
data: emails.map(email => {
|
||||
return {
|
||||
feedId: feed.id,
|
||||
address: email
|
||||
}
|
||||
}),
|
||||
skipDuplicates: true
|
||||
})
|
||||
console.log('Added emails to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
|
||||
return result.count
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
import { PrismaClient, Feed } from '@prisma/client'
|
||||
|
||||
export const deleteAllFeedEmails = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
|
||||
const result = await prisma.email.deleteMany({
|
||||
where: {
|
||||
feedId: feed.id
|
||||
}
|
||||
})
|
||||
console.log('Removed all emails from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
|
||||
return result.count
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
|
||||
export default interface StorageFeedData extends FeedData{
|
||||
fulltext:string
|
||||
extractedTags:string[],
|
||||
extractedEmails:string[],
|
||||
}
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
import dateProperty from '../Properties/dateProperty'
|
||||
import { IngestProcessorContainer } from '@elastic/elasticsearch/lib/api/types'
|
||||
|
||||
const assertFeedIndex = async (elastic: ElasticClient): Promise<void> => {
|
||||
console.info('Setting feed pipeline')
|
||||
const processors: IngestProcessorContainer[] = []
|
||||
await elastic.ingest.putPipeline({
|
||||
id: 'feed',
|
||||
description: 'Default feed pipeline',
|
||||
processors: processors
|
||||
})
|
||||
console.info('Checking feed index')
|
||||
const exists = await elastic.indices.exists({
|
||||
index: feedIndex
|
||||
})
|
||||
if (exists) {
|
||||
console.info('Feed index exists')
|
||||
} else {
|
||||
console.info('Creating feed index')
|
||||
await elastic.indices.create({
|
||||
index: feedIndex,
|
||||
settings: {
|
||||
default_pipeline: 'feed',
|
||||
analysis: {
|
||||
analyzer: {
|
||||
standard: {
|
||||
type: 'standard'
|
||||
},
|
||||
html: {
|
||||
type: 'custom',
|
||||
tokenizer: 'standard',
|
||||
filter: [
|
||||
'lowercase'
|
||||
],
|
||||
char_filter: [
|
||||
'html_strip'
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
console.info('Setting feed mapping')
|
||||
await elastic.indices.putMapping({
|
||||
index: feedIndex,
|
||||
properties: {
|
||||
domain: { type: 'text', analyzer: 'standard' },
|
||||
foundAt: dateProperty,
|
||||
refreshedAt: dateProperty,
|
||||
name: { type: 'text', analyzer: 'standard' },
|
||||
fullName: { type: 'text', analyzer: 'standard' },
|
||||
displayName: { type: 'text', analyzer: 'html' },
|
||||
description: { type: 'text', analyzer: 'html' },
|
||||
followersCount: { type: 'integer' },
|
||||
followingCount: { type: 'integer' },
|
||||
statusesCount: { type: 'integer' },
|
||||
lastStatusAt: dateProperty,
|
||||
createdAt: dateProperty,
|
||||
bot: { type: 'boolean' },
|
||||
locked: { type: 'boolean' },
|
||||
url: { type: 'text' },
|
||||
avatar: { type: 'text' },
|
||||
type: { type: 'keyword' },
|
||||
parentFeedName: { type: 'text', analyzer: 'standard' },
|
||||
parentFeedDomain: { type: 'text', analyzer: 'standard' },
|
||||
fields: {
|
||||
type: 'nested',
|
||||
properties: {
|
||||
name: { type: 'text', analyzer: 'html' },
|
||||
value: { type: 'text', analyzer: 'html' }
|
||||
}
|
||||
},
|
||||
extractedEmails: { type: 'text', analyzer: 'standard' },
|
||||
extractedTags: { type: 'text', analyzer: 'standard' }
|
||||
}
|
||||
})
|
||||
await elastic.indices.refresh({ index: feedIndex })
|
||||
}
|
||||
|
||||
export default assertFeedIndex
|
|
@ -0,0 +1,39 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import Feed from '../Definitions/Feed'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
import { NodeStats } from '../../Jobs/Nodes/updateNodeFeedStats'
|
||||
|
||||
type Aggregation = {
|
||||
buckets:{
|
||||
key:'account'|'channel',
|
||||
// eslint-disable-next-line camelcase
|
||||
doc_count:number
|
||||
}[]
|
||||
}
|
||||
|
||||
export default async function countNodeFeeds (elastic: ElasticClient, domain:string):Promise<NodeStats> {
|
||||
await elastic.indices.refresh({ index: feedIndex })
|
||||
const response = await elastic.search<Feed>({
|
||||
index: feedIndex,
|
||||
query: {
|
||||
term: { domain }
|
||||
},
|
||||
size: 0,
|
||||
aggs: {
|
||||
types: {
|
||||
terms: {
|
||||
field: 'type'
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
const types = response.aggregations.types as Aggregation
|
||||
const result:NodeStats = {
|
||||
channel: 0,
|
||||
account: 0
|
||||
}
|
||||
types.buckets.forEach(item => {
|
||||
result[item.key] += item.doc_count
|
||||
})
|
||||
return result
|
||||
}
|
|
@ -1,9 +1,18 @@
|
|||
import { Node, PrismaClient, Feed } from '@prisma/client'
|
||||
import StorageFeedData from './StorageFeedData'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
import getFeed from './getFeed'
|
||||
import Feed from '../Definitions/Feed'
|
||||
import Node from '../Definitions/Node'
|
||||
|
||||
export const createFeed = async (prisma: PrismaClient, feedData: StorageFeedData, node: Node): Promise<Feed> => {
|
||||
const feed = await prisma.feed.create({
|
||||
data: {
|
||||
export const createFeed = async (elastic: ElasticClient, feedData: StorageFeedData, node: Node): Promise<Feed> => {
|
||||
const fullName = `${feedData.name}@${node.domain}`
|
||||
await elastic.create<Feed>({
|
||||
index: feedIndex,
|
||||
id: fullName,
|
||||
document: {
|
||||
fullName,
|
||||
domain: node.domain,
|
||||
url: feedData.url,
|
||||
name: feedData.name,
|
||||
bot: feedData.bot,
|
||||
|
@ -11,16 +20,22 @@ export const createFeed = async (prisma: PrismaClient, feedData: StorageFeedData
|
|||
followersCount: feedData.followersCount,
|
||||
followingCount: feedData.followingCount,
|
||||
statusesCount: feedData.statusesCount,
|
||||
lastStatusAt: feedData.lastStatusAt,
|
||||
lastStatusAt: feedData.lastStatusAt?.getTime(),
|
||||
description: feedData.description,
|
||||
displayName: feedData.displayName,
|
||||
locked: feedData.locked,
|
||||
nodeId: node.id,
|
||||
createdAt: feedData.createdAt,
|
||||
fulltext: feedData.fulltext,
|
||||
createdAt: feedData.createdAt.getTime(),
|
||||
foundAt: (new Date()).getTime(),
|
||||
fields: feedData.fields.map(field => {
|
||||
return { name: field.name, value: field.value }
|
||||
}),
|
||||
extractedEmails: feedData.extractedEmails,
|
||||
extractedTags: feedData.extractedTags,
|
||||
parentFeedName: feedData.parentFeed?.name,
|
||||
parentFeedDomain: feedData.parentFeed?.hostDomain,
|
||||
type: feedData.type
|
||||
}
|
||||
})
|
||||
console.info('Created new feed', { feedName: feed.name, nodeDomain: node.domain })
|
||||
return feed
|
||||
console.info('Created new feed', { feedName: feedData.name, nodeDomain: node.domain })
|
||||
return getFeed(elastic, fullName)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
|
||||
export const deleteDomainFeeds = async (elastic: ElasticClient, domains:string[]): Promise<number> => {
|
||||
await elastic.indices.refresh({ index: feedIndex })
|
||||
const result = await elastic.deleteByQuery({
|
||||
index: feedIndex,
|
||||
query: {
|
||||
bool: {
|
||||
should: domains.map(domain => {
|
||||
return {
|
||||
regexp: {
|
||||
domain: {
|
||||
value: '(.*\\.)?' + domain,
|
||||
case_insensitive: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
minimum_should_match: 1
|
||||
}
|
||||
}
|
||||
})
|
||||
console.info('Deleted domain feeds', {
|
||||
count: result.deleted, domains
|
||||
})
|
||||
return result.deleted
|
||||
}
|
|
@ -1,18 +1,22 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import Node from '../Definitions/Node'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
|
||||
export const deleteOldFeeds = async (prisma: PrismaClient, node: Node): Promise<number> => {
|
||||
const result = await prisma.feed.deleteMany({
|
||||
where: {
|
||||
nodeId: {
|
||||
equals: node.id
|
||||
},
|
||||
refreshedAt: {
|
||||
lt: node.refreshAttemptedAt
|
||||
export const deleteOldFeeds = async (elastic: ElasticClient, node: Node): Promise<number> => {
|
||||
await elastic.indices.refresh({ index: feedIndex })
|
||||
const result = await elastic.deleteByQuery({
|
||||
index: feedIndex,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ match: { domain: node.domain } },
|
||||
{ range: { refreshedAt: { lt: node.refreshAttemptedAt } } }
|
||||
]
|
||||
}
|
||||
}
|
||||
})
|
||||
console.info('Deleted old feeds', {
|
||||
count: result.count, olderThen: node.refreshAttemptedAt, nodeDomain: node.domain
|
||||
count: result.deleted, olderThen: node.refreshAttemptedAt, nodeDomain: node.domain
|
||||
})
|
||||
return result.count
|
||||
return result.deleted
|
||||
}
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
import { Node, PrismaClient, Feed } from '@prisma/client'
|
||||
|
||||
export const fetchFeedByNodeAndName = async (prisma:PrismaClient, node:Node, name:string):Promise<Feed> => {
|
||||
return await prisma.feed.findFirst({
|
||||
where: {
|
||||
node: node,
|
||||
name: name
|
||||
}
|
||||
})
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
import Feed from '../Definitions/Feed'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
|
||||
const getFeed = async (elastic: ElasticClient, feedFullName:string):Promise<Feed> => {
|
||||
const result = await elastic.get<Feed>({
|
||||
index: feedIndex,
|
||||
id: feedFullName
|
||||
})
|
||||
return result._source
|
||||
}
|
||||
|
||||
export default getFeed
|
|
@ -1,9 +1,14 @@
|
|||
import { Feed, PrismaClient } from '@prisma/client'
|
||||
import StorageFeedData from './StorageFeedData'
|
||||
import Feed from '../Definitions/Feed'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import feedIndex from '../Definitions/feedIndex'
|
||||
import getFeed from './getFeed'
|
||||
|
||||
export const updateFeed = async (prisma:PrismaClient, feed:Feed, feedData:StorageFeedData):Promise<Feed> => {
|
||||
const updatedFeed = await prisma.feed.update({
|
||||
data: {
|
||||
export const updateFeed = async (elastic: ElasticClient, feed:Feed, feedData:StorageFeedData):Promise<Feed> => {
|
||||
await elastic.update<Feed>({
|
||||
index: feedIndex,
|
||||
id: feed.fullName,
|
||||
doc: {
|
||||
url: feedData.url,
|
||||
bot: feedData.bot,
|
||||
avatar: feedData.avatar,
|
||||
|
@ -15,14 +20,15 @@ export const updateFeed = async (prisma:PrismaClient, feed:Feed, feedData:Storag
|
|||
displayName: feedData.displayName,
|
||||
locked: feedData.locked,
|
||||
createdAt: feedData.createdAt,
|
||||
refreshedAt: new Date(),
|
||||
fulltext: feedData.fulltext,
|
||||
type: feedData.type
|
||||
},
|
||||
where: {
|
||||
id: feed.id
|
||||
refreshedAt: (new Date()).getTime(),
|
||||
type: feedData.type,
|
||||
fields: feedData.fields.map(field => {
|
||||
return { name: field.name, value: field.value }
|
||||
}),
|
||||
extractedEmails: feedData.extractedEmails,
|
||||
extractedTags: feedData.extractedTags
|
||||
}
|
||||
})
|
||||
console.info('Updated feed', { feedName: feed.name, nodeId: feed.nodeId })
|
||||
return updatedFeed
|
||||
console.info('Updated feed', { feedName: feed.name, nodeDomain: feed.domain })
|
||||
return getFeed(elastic, feed.fullName)
|
||||
}
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
import { PrismaClient, Feed } from '@prisma/client'
|
||||
import { FieldData } from '../../Fediverse/Providers/FieldData'
|
||||
|
||||
export const createFeedFields = async (prisma:PrismaClient, feed:Feed, fieldData:FieldData[]):Promise<number> => {
|
||||
const result = await prisma.field.createMany({
|
||||
data: fieldData.map(item => {
|
||||
return {
|
||||
feedId: feed.id,
|
||||
name: item.name,
|
||||
value: item.value
|
||||
}
|
||||
}),
|
||||
skipDuplicates: true
|
||||
})
|
||||
console.log('Added fields to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
|
||||
return result.count
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
import { PrismaClient, Feed } from '@prisma/client'
|
||||
|
||||
export const deleteAllFeedFields = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
|
||||
const result = await prisma.field.deleteMany({
|
||||
where: {
|
||||
feedId: feed.id
|
||||
}
|
||||
})
|
||||
console.log('Removed all fields from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
|
||||
return result.count
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import dateProperty from '../Properties/dateProperty'
|
||||
|
||||
const assertNodeIndex = async (elastic: ElasticClient):Promise<void> => {
|
||||
console.info('Setting node pipeline')
|
||||
await elastic.ingest.putPipeline({
|
||||
id: 'node',
|
||||
description: 'Default node pipeline',
|
||||
processors: [
|
||||
{
|
||||
// @ts-ignore
|
||||
geoip: {
|
||||
ignore_missing: true,
|
||||
field: 'serverIps',
|
||||
properties: [
|
||||
'location',
|
||||
'continent_name',
|
||||
'country_name',
|
||||
'country_iso_code',
|
||||
'region_iso_code',
|
||||
'region_name',
|
||||
'city_name'
|
||||
],
|
||||
target_field: 'geoip'
|
||||
}
|
||||
},
|
||||
{
|
||||
grok: {
|
||||
ignore_missing: true,
|
||||
field: 'softwareVersion',
|
||||
patterns: ['%{VERSION:standardizedSoftwareVersion}'],
|
||||
pattern_definitions: {
|
||||
VERSION: '(?:[0-9]+\\.?[0-9]+\\.?[0-9]+)'
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
console.info('Checking node index')
|
||||
const exists = await elastic.indices.exists({
|
||||
index: nodeIndex
|
||||
})
|
||||
if (exists) {
|
||||
console.info('Node index exists')
|
||||
} else {
|
||||
console.info('Creating node index')
|
||||
await elastic.indices.create({
|
||||
index: nodeIndex,
|
||||
settings: {
|
||||
default_pipeline: 'node'
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
console.info('Setting node mapping')
|
||||
await elastic.indices.putMapping({
|
||||
index: nodeIndex,
|
||||
properties: {
|
||||
name: { type: 'text' },
|
||||
domain: { type: 'text' },
|
||||
softwareName: { type: 'keyword' },
|
||||
softwareVersion: { type: 'text' },
|
||||
standardizedSoftwareVersion: { type: 'keyword' },
|
||||
totalUserCount: { type: 'integer' },
|
||||
monthActiveUserCount: { type: 'integer' },
|
||||
halfYearActiveUserCount: { type: 'integer' },
|
||||
statusesCount: { type: 'integer' },
|
||||
foundAt: dateProperty,
|
||||
refreshedAt: dateProperty,
|
||||
refreshAttemptedAt: dateProperty,
|
||||
openRegistrations: { type: 'boolean' },
|
||||
serverIps: { type: 'ip' },
|
||||
discoveredByDomain: { type: 'text' },
|
||||
geoip: {
|
||||
type: 'nested',
|
||||
properties: {
|
||||
location: { type: 'geo_point' },
|
||||
continent_name: { type: 'keyword' },
|
||||
country_name: { type: 'keyword' },
|
||||
country_iso_code: { type: 'keyword' },
|
||||
region_name: { type: 'keyword' },
|
||||
region_iso_code: { type: 'keyword' },
|
||||
city_name: { type: 'keyword' }
|
||||
}
|
||||
},
|
||||
accountFeedCount: { type: 'integer' },
|
||||
channelFeedCount: { type: 'integer' }
|
||||
}
|
||||
})
|
||||
await elastic.indices.refresh({ index: nodeIndex })
|
||||
}
|
||||
|
||||
export default assertNodeIndex
|
|
@ -1,15 +1,24 @@
|
|||
import { PrismaClient, Node } from '@prisma/client'
|
||||
export const createMissingNodes = async (client:PrismaClient, domains:string[]):Promise<number> => {
|
||||
const result = await client.node.createMany(
|
||||
{
|
||||
data: domains.map(domain => {
|
||||
return {
|
||||
domain: domain
|
||||
}
|
||||
}),
|
||||
skipDuplicates: true
|
||||
}
|
||||
)
|
||||
console.info('Created new nodes', { count: result.count })
|
||||
return result.count
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
export const createMissingNodes = async (elastic: ElasticClient, domains:string[], discoveredByDomain:string|undefined):Promise<number> => {
|
||||
const response = await elastic.bulk({
|
||||
index: nodeIndex,
|
||||
body: domains.flatMap(domain => [
|
||||
{
|
||||
create: { _id: domain }
|
||||
},
|
||||
{
|
||||
domain: domain,
|
||||
discoveredByDomain,
|
||||
foundAt: (new Date()).getTime()
|
||||
}
|
||||
])
|
||||
})
|
||||
const createdCount = response.items.filter(item => item.create.status === 201).length
|
||||
console.warn('Created new nodes', {
|
||||
requestedCount: domains.length,
|
||||
createdCount: createdCount,
|
||||
errors: response.items.filter(item => item.create.status !== 201).map(item => item.create.error.reason)
|
||||
})
|
||||
return createdCount
|
||||
}
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
|
||||
export const deleteDomainNodes = async (elastic: ElasticClient, domains:string[]): Promise<number> => {
|
||||
await elastic.indices.refresh({ index: nodeIndex })
|
||||
const result = await elastic.deleteByQuery({
|
||||
index: nodeIndex,
|
||||
query: {
|
||||
bool: {
|
||||
should: domains.map(domain => {
|
||||
return {
|
||||
regexp: {
|
||||
domain: {
|
||||
value: '(.*\\.)?' + domain,
|
||||
case_insensitive: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
minimum_should_match: 1
|
||||
}
|
||||
}
|
||||
})
|
||||
console.info('Deleted domain nodes', {
|
||||
count: result.deleted, domains
|
||||
})
|
||||
return result.deleted
|
||||
}
|
|
@ -1,66 +1,20 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { NoNodeFoundError } from './NoNodeFoundError'
|
||||
import findNotProcessedNodeWithAttemptLimit from './findNotProcessedNodeWithAttemptLimit'
|
||||
import findNodeWithOldestRefreshWithLimits from './findNodeWithOldestRefreshWithLimits'
|
||||
import Node from '../Definitions/Node'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
|
||||
export const fetchNodeToProcess = async (prisma: PrismaClient): Promise<Node> => {
|
||||
const currentTimestamp = Date.now()
|
||||
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
|
||||
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
|
||||
console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds })
|
||||
const newNode = await prisma.node.findFirst({
|
||||
orderBy: {
|
||||
foundAt: 'asc'
|
||||
},
|
||||
where: {
|
||||
refreshedAt: null,
|
||||
OR: [
|
||||
{
|
||||
refreshAttemptedAt: {
|
||||
lt: attemptLimitDate
|
||||
}
|
||||
},
|
||||
{
|
||||
refreshAttemptedAt: null
|
||||
}
|
||||
]
|
||||
|
||||
}
|
||||
})
|
||||
if (newNode) {
|
||||
console.log('Found not yet processed node', { domain: newNode.domain })
|
||||
return newNode
|
||||
export const fetchNodeToProcess = async (elastic: ElasticClient): Promise<Node> => {
|
||||
await elastic.indices.refresh({ index: nodeIndex })
|
||||
let node = await findNotProcessedNodeWithAttemptLimit(elastic)
|
||||
if (node !== null) {
|
||||
return node
|
||||
}
|
||||
const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000
|
||||
const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds)
|
||||
console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', {
|
||||
refreshLimitMilliseconds,
|
||||
refreshLimitDate,
|
||||
attemptLimitDate,
|
||||
attemptLimitMilliseconds
|
||||
})
|
||||
const node = await prisma.node.findFirst({
|
||||
orderBy: {
|
||||
refreshedAt: 'asc'
|
||||
},
|
||||
where: {
|
||||
refreshedAt: {
|
||||
lt: refreshLimitDate
|
||||
},
|
||||
OR: [
|
||||
{
|
||||
refreshAttemptedAt: {
|
||||
lt: attemptLimitDate
|
||||
}
|
||||
},
|
||||
{
|
||||
refreshAttemptedAt: null
|
||||
}
|
||||
]
|
||||
}
|
||||
})
|
||||
if (!node) {
|
||||
throw new NoNodeFoundError()
|
||||
node = await findNodeWithOldestRefreshWithLimits(elastic)
|
||||
if (node !== null) {
|
||||
return node
|
||||
}
|
||||
|
||||
console.log('Found oldest node', { domain: node.domain })
|
||||
return node
|
||||
throw new NoNodeFoundError()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import Node from '../Definitions/Node'
|
||||
|
||||
const findNodeWithOldestRefreshWithLimits = async (elastic: ElasticClient): Promise<Node | null> => {
|
||||
const currentTimestamp = Date.now()
|
||||
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
|
||||
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
|
||||
const refreshLimitMilliseconds = parseInt(process.env.REFRESH_HOURS ?? '168') * 60 * 60 * 1000
|
||||
const refreshLimitDate = new Date(currentTimestamp - refreshLimitMilliseconds)
|
||||
console.log('Searching instance not refreshed for longest time and before refreshLimit and attemptLimit', {
|
||||
refreshLimitMilliseconds,
|
||||
refreshLimitDate,
|
||||
attemptLimitDate,
|
||||
attemptLimitMilliseconds
|
||||
})
|
||||
const result = await elastic.search<Node>({
|
||||
index: nodeIndex,
|
||||
body: {
|
||||
size: 1,
|
||||
sort: [{
|
||||
refreshedAt: { order: 'asc' }
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ range: { refreshedAt: { lt: refreshLimitDate.getTime() } } }
|
||||
],
|
||||
should: [
|
||||
{ range: { refreshAttemptedAt: { lt: attemptLimitDate.getTime() } } },
|
||||
{ bool: { must_not: [{ exists: { field: 'refreshAttemptedAt' } }] } }
|
||||
],
|
||||
minimum_should_match: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
if (result.hits.hits.length > 0) {
|
||||
const node = result.hits.hits[0]._source
|
||||
console.log('Found oldest node', { node })
|
||||
return node
|
||||
}
|
||||
return null
|
||||
}
|
||||
export default findNodeWithOldestRefreshWithLimits
|
|
@ -0,0 +1,37 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import Node from '../Definitions/Node'
|
||||
|
||||
const findNotProcessedNodeWithAttemptLimit = async (elastic: ElasticClient): Promise<Node|null> => {
|
||||
const currentTimestamp = Date.now()
|
||||
const attemptLimitMilliseconds = parseInt(process.env.REATTEMPT_MINUTES ?? '60') * 60 * 1000
|
||||
const attemptLimitDate = new Date(currentTimestamp - attemptLimitMilliseconds)
|
||||
console.log('Searching for not yet processed node not attempted before attemptLimit', { attemptLimitDate, attemptLimitMilliseconds })
|
||||
const result = await elastic.search<Node>({
|
||||
index: nodeIndex,
|
||||
body: {
|
||||
size: 1,
|
||||
sort: [{
|
||||
foundAt: { order: 'asc' }
|
||||
}],
|
||||
query: {
|
||||
bool: {
|
||||
must_not: [
|
||||
{ exists: { field: 'refreshedAt' } }],
|
||||
should: [
|
||||
{ bool: { must_not: [{ exists: { field: 'refreshAttemptedAt' } }] } },
|
||||
{ range: { refreshAttemptedAt: { lt: attemptLimitDate.getTime() } } }
|
||||
],
|
||||
minimum_should_match: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
if (result.hits.hits.length > 0) {
|
||||
const node = result.hits.hits[0]._source
|
||||
console.log('Found not yet processed node', { node })
|
||||
return node
|
||||
}
|
||||
return null
|
||||
}
|
||||
export default findNotProcessedNodeWithAttemptLimit
|
|
@ -0,0 +1,13 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import Node from '../Definitions/Node'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
|
||||
const getNode = async (elastic:ElasticClient, domain:string):Promise<Node> => {
|
||||
const result = await elastic.get<Node>({
|
||||
index: nodeIndex,
|
||||
id: domain
|
||||
})
|
||||
return result._source
|
||||
}
|
||||
|
||||
export default getNode
|
|
@ -0,0 +1,7 @@
|
|||
import getBannedDomains from '../../Jobs/Seed/getBannedDomains'
|
||||
|
||||
export default function isDomainNotBanned (domain):boolean {
|
||||
return getBannedDomains().filter(
|
||||
banned => domain.match(new RegExp('(.*\\.)?' + banned, 'gi')) !== null
|
||||
).length === 0
|
||||
}
|
|
@ -1,14 +1,17 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import Node from '../Definitions/Node'
|
||||
import getNode from './getNode'
|
||||
|
||||
export const setNodeRefreshAttempted = async (prisma:PrismaClient, node:Node):Promise<Node> => {
|
||||
export const setNodeRefreshAttempted = async (elastic: ElasticClient, node:Node):Promise<Node> => {
|
||||
const date = new Date()
|
||||
console.info('Setting node refresh attempt', { domain: node.domain, date: date })
|
||||
return await prisma.node.update({
|
||||
data: {
|
||||
refreshAttemptedAt: date
|
||||
},
|
||||
where: {
|
||||
id: node.id
|
||||
await elastic.update<Node>({
|
||||
index: nodeIndex,
|
||||
id: node.domain,
|
||||
doc: {
|
||||
refreshAttemptedAt: date.getTime()
|
||||
}
|
||||
})
|
||||
return getNode(elastic, node.domain)
|
||||
}
|
||||
|
|
|
@ -1,14 +1,17 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import Node from '../Definitions/Node'
|
||||
import getNode from './getNode'
|
||||
|
||||
export const setNodeRefreshed = async (prisma:PrismaClient, node:Node):Promise<Node> => {
|
||||
export const setNodeRefreshed = async (elastic: ElasticClient, node:Node):Promise<Node> => {
|
||||
const date = new Date()
|
||||
console.info('Setting node refreshed', { domain: node.domain, date: date })
|
||||
return await prisma.node.update({
|
||||
data: {
|
||||
refreshedAt: date
|
||||
},
|
||||
where: {
|
||||
id: node.id
|
||||
await elastic.update<Node>({
|
||||
index: nodeIndex,
|
||||
id: node.domain,
|
||||
doc: {
|
||||
refreshedAt: date.getTime()
|
||||
}
|
||||
})
|
||||
return getNode(elastic, node.domain)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import Node from '../Definitions/Node'
|
||||
import getNode from './getNode'
|
||||
import { NodeStats } from '../../Jobs/Nodes/updateNodeFeedStats'
|
||||
|
||||
export const setNodeStats = async (elastic: ElasticClient, node:Node, stats: NodeStats):Promise<Node> => {
|
||||
console.info('Setting node stats', { domain: node.domain, stats })
|
||||
await elastic.update<Node>({
|
||||
index: nodeIndex,
|
||||
id: node.domain,
|
||||
doc: {
|
||||
accountFeedCount: stats.account,
|
||||
channelFeedCount: stats.channel
|
||||
}
|
||||
})
|
||||
return getNode(elastic, node.domain)
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
import { Node, PrismaClient } from '@prisma/client'
|
||||
import { NodeInfo } from '../../Fediverse/NodeInfo/retrieveNodeInfo'
|
||||
|
||||
export const updateNode = async (prisma: PrismaClient, node: Node, nodeInfo:NodeInfo):Promise<Node> => {
|
||||
const updated = await prisma.node.update({
|
||||
where: {
|
||||
id: node.id
|
||||
},
|
||||
data: {
|
||||
softwareName: nodeInfo.software.name.toLocaleLowerCase(),
|
||||
softwareVersion: nodeInfo.software.version,
|
||||
totalUserCount: nodeInfo.usage?.users?.total ?? null,
|
||||
monthActiveUserCount: nodeInfo.usage?.users?.activeMonth ?? null,
|
||||
halfYearActiveUserCount: nodeInfo.usage?.users?.activeHalfyear ?? null,
|
||||
statusesCount: nodeInfo.usage?.localPosts ?? null,
|
||||
openRegistrations: nodeInfo.openRegistrations ?? null
|
||||
}
|
||||
})
|
||||
console.info('Updated node info', { domain: updated.domain, software: updated.softwareName })
|
||||
return updated
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
import { NodeInfo } from '../../Fediverse/NodeInfo/retrieveNodeInfo'
|
||||
import Node from '../Definitions/Node'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import getNode from './getNode'
|
||||
|
||||
const assertPositiveInt = (number:number|undefined):number|undefined => {
|
||||
if (number === undefined) {
|
||||
return undefined
|
||||
}
|
||||
return Math.max(0, Math.round(number))
|
||||
}
|
||||
|
||||
export const updateNodeInfo = async (elastic: ElasticClient, node: Node, nodeInfo:NodeInfo):Promise<Node> => {
|
||||
await elastic.update<Node>({
|
||||
index: nodeIndex,
|
||||
id: node.domain,
|
||||
doc: {
|
||||
name: nodeInfo?.name,
|
||||
openRegistrations: nodeInfo?.openRegistrations,
|
||||
softwareName: nodeInfo?.software?.name?.toLocaleLowerCase(),
|
||||
softwareVersion: nodeInfo?.software?.version,
|
||||
halfYearActiveUserCount: assertPositiveInt(nodeInfo?.usage?.users?.activeHalfyear),
|
||||
monthActiveUserCount: assertPositiveInt(nodeInfo?.usage?.users.activeMonth),
|
||||
statusesCount: assertPositiveInt(nodeInfo?.usage?.localPosts),
|
||||
totalUserCount: assertPositiveInt(nodeInfo?.usage?.users?.total)
|
||||
}
|
||||
})
|
||||
|
||||
const resultNode = await getNode(elastic, node.domain)
|
||||
console.info('Updated node info', { node })
|
||||
return resultNode
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
import Node from '../Definitions/Node'
|
||||
import { ElasticClient } from '../ElasticClient'
|
||||
import nodeIndex from '../Definitions/nodeIndex'
|
||||
import getNode from './getNode'
|
||||
|
||||
export const updateNodeIps = async (elastic:ElasticClient, node: Node, ips:string[]):Promise<Node> => {
|
||||
await elastic.update<Node>({
|
||||
index: nodeIndex,
|
||||
id: node.domain,
|
||||
doc: {
|
||||
serverIps: ips
|
||||
}
|
||||
})
|
||||
const resultNode = await getNode(elastic, node.domain)
|
||||
console.info('Updated node ips', { resultNode })
|
||||
return resultNode
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
import { PrismaClient } from '@prisma/client'
|
||||
|
||||
const prismaClient = new PrismaClient()
|
||||
|
||||
export default prismaClient
|
|
@ -0,0 +1,5 @@
|
|||
import { MappingProperty } from '@elastic/elasticsearch/lib/api/types'
|
||||
|
||||
const dateProperty:MappingProperty = { type: 'date', format: 'epoch_millis' }
|
||||
|
||||
export default dateProperty
|
|
@ -1,15 +0,0 @@
|
|||
import { PrismaClient, Tag, Feed } from '@prisma/client'
|
||||
|
||||
export const createFeedTags = async (prisma:PrismaClient, feed:Feed, tags:Tag[]):Promise<number> => {
|
||||
const result = await prisma.feedToTag.createMany({
|
||||
data: tags.map(tag => {
|
||||
return {
|
||||
feedId: feed.id,
|
||||
tagId: tag.id
|
||||
}
|
||||
}),
|
||||
skipDuplicates: true
|
||||
})
|
||||
console.log('Added tags to feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
|
||||
return result.count
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
import { PrismaClient } from '@prisma/client'
|
||||
|
||||
export const createMissingTags = async (prisma:PrismaClient, tagNames:string[]):Promise<number> => {
|
||||
const result = await prisma.tag.createMany({
|
||||
data: tagNames.map(tagName => {
|
||||
return {
|
||||
name: tagName
|
||||
}
|
||||
}
|
||||
),
|
||||
skipDuplicates: true
|
||||
})
|
||||
console.log('Created missing tags', { count: result.count })
|
||||
return result.count
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
import { PrismaClient, Feed } from '@prisma/client'
|
||||
|
||||
export const deleteAllFeedTags = async (prisma:PrismaClient, feed:Feed):Promise<number> => {
|
||||
const result = await prisma.feedToTag.deleteMany({
|
||||
where: {
|
||||
feedId: feed.id
|
||||
}
|
||||
})
|
||||
console.log('Removed all tags from feed', { count: result.count, feedName: feed.name, nodeId: feed.nodeId })
|
||||
return result.count
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
import { PrismaClient, Tag } from '@prisma/client'
|
||||
|
||||
export const fetchTags = async (prisma:PrismaClient, tagNames:string[]):Promise<Tag[]> => {
|
||||
return await prisma.tag.findMany({
|
||||
where: {
|
||||
name: {
|
||||
in: tagNames
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
|
@ -1,12 +1,16 @@
|
|||
import providerRegistry from './Fediverse/Providers'
|
||||
import prismaClient from './Storage/PrismaClient'
|
||||
import { addNodeSeed } from './Jobs/Seed/addNodeSeed'
|
||||
import { processNextNode } from './Jobs/processNextNode'
|
||||
import assertNodeIndex from './Storage/Nodes/assertNodeIndex'
|
||||
import assertFeedIndex from './Storage/Feeds/assertFeedIndex'
|
||||
import elasticClient from './Storage/ElasticClient'
|
||||
import deleteDomains from './Jobs/Seed/deleteBannedNodes'
|
||||
import getBannedDomains from './Jobs/Seed/getBannedDomains'
|
||||
|
||||
const loop = async (): Promise<void> => {
|
||||
while (true) {
|
||||
try {
|
||||
await processNextNode(prismaClient, providerRegistry)
|
||||
await processNextNode(elasticClient, providerRegistry)
|
||||
} catch (err) {
|
||||
console.warn(err)
|
||||
const waitForJobMilliseconds = parseInt(process.env.WAIT_FOR_JOB_MINUTES ?? '60') * 60 * 1000
|
||||
|
@ -18,8 +22,11 @@ const loop = async (): Promise<void> => {
|
|||
}
|
||||
|
||||
const app = async (): Promise<void> => {
|
||||
const seedDomain = process.env.SEED_NODE_DOMAIN ?? 'mastodon.social'
|
||||
await addNodeSeed(prismaClient, seedDomain)
|
||||
await assertNodeIndex(elasticClient)
|
||||
await assertFeedIndex(elasticClient)
|
||||
await deleteDomains(elasticClient, getBannedDomains())
|
||||
const seedDomains = (process.env.SEED_NODE_DOMAIN ?? 'mastodon.social').split(',')
|
||||
await addNodeSeed(elasticClient, seedDomains)
|
||||
setTimeout(loop)
|
||||
}
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue