From 1cbd5df5aea8f8755a2a0b75998abaaf0813b05d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0t=C4=9Bp=C3=A1n=20=C5=A0korpil?= Date: Tue, 29 Nov 2022 14:11:27 +0100 Subject: [PATCH] Added config for seed domain timeout --- Dockerfile | 1 + README.md | 29 ++++++++++--------- .../NodeInfo/retrieveDomainNodeInfo.ts | 2 +- .../Fediverse/NodeInfo/retrieveNodeInfo.ts | 6 ++-- .../Fediverse/NodeInfo/retrieveWellKnown.ts | 6 ++-- .../Mastodon/retrieveLocalPublicUsersPage.ts | 6 ++-- .../Providers/Mastodon/retrievePeers.ts | 6 ++-- .../Misskey/retrieveInstancesPage.ts | 6 ++-- .../Providers/Misskey/retrieveUsersPage.ts | 6 ++-- .../Providers/Peertube/retrieveAccounts.ts | 6 ++-- .../Providers/Peertube/retrieveFollowers.ts | 6 ++-- .../Peertube/retrieveVideoChannels.ts | 6 ++-- .../src/Fediverse/RobotsTxt/RobotsTxt.ts | 6 ++-- .../src/Fediverse/RobotsTxt/RobotsTxtError.ts | 6 ++-- .../src/Fediverse/RobotsTxt/fetchRobotsTxt.ts | 20 ++++++------- .../Fediverse/getSeedTimeoutMilliseconds.ts | 5 ++++ .../src/Fediverse/getTimeoutMilliseconds.ts | 9 ++++++ application/src/Jobs/Seed/getSeedDomains.ts | 5 ++++ application/src/Jobs/Seed/isSeedDomain.ts | 5 ++++ application/src/app.ts | 6 ++-- 20 files changed, 86 insertions(+), 62 deletions(-) create mode 100644 application/src/Fediverse/getSeedTimeoutMilliseconds.ts create mode 100644 application/src/Fediverse/getTimeoutMilliseconds.ts create mode 100644 application/src/Jobs/Seed/getSeedDomains.ts create mode 100644 application/src/Jobs/Seed/isSeedDomain.ts diff --git a/Dockerfile b/Dockerfile index 9f5e93a..4bd493f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,7 @@ ENV ELASTIC_URL='http://elastic:9200' \ REFRESH_HOURS='120' \ WAIT_FOR_JOB_MINUTES='60' \ DEFAULT_TIMEOUT_MILLISECONDS='10000' \ + SEED_TIMEOUT_MILLISECONDS=${DEFAULT_TIMEOUT_MILLISECONDS} \ BANNED_DOMAINS='' \ MAX_CRAWLING_DEPTH='' \ CRAWLING_VERSION='0' \ diff --git a/README.md b/README.md index 51349ce..596ab5c 100644 --- a/README.md +++ b/README.md @@ -24,20 +24,21 @@ Data providers for more apps will be probably added soon (Pull requests are welc Configuration is done using environmental variables: -| Variable | Description | Default value / Example value | -|--------------------------------|--------------------------------------------------------------------------------------------------|-------------------------------| - | `ELASTIC_URL` | Url address of ElasticSearch server | `http://elastic:9200` | -| `ELASTIC_USER` | Username for EalsticSearch server | `elastic` | -| `ELASTIC_PASSWORD` | Username for EalsticSearch server | empty | -| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social` | -| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` | -| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` | -| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` | -| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` | -| `BANNED_DOMAINS` | _Optional_, Domains not to index (even with subdomains) | _empty_ | -| `CRAWLING_VERSION` | _Optional_, Increasing this number can enforce recrawling of the whole index | 0 | -| `MAX_CRAWLING_DEPTH` | _Optional_, Limits how far is fediverse indexed from seed nodes | _empty_ | -| `TZ` | _Optional_, Timezone | `UTC` | +| Variable | Description | Default value / Example value | +|--------------------------------|-----------------------------------------------------------------------------------------------------|-------------------------------------------| + | `ELASTIC_URL` | Url address of ElasticSearch server | `http://elastic:9200` | +| `ELASTIC_USER` | Username for EalsticSearch server | `elastic` | +| `ELASTIC_PASSWORD` | Username for EalsticSearch server | empty | +| `SEED_NODE_DOMAIN` | Domain of the first node to search users and other nodes on | `mastodon.social,mastodon.online` | +| `REATTEMPT_MINUTES` | _Optional_, How many minutes should be waited for next node refresh attempt if the refresh fails | `60 ` | +| `REFRESH_HOURS` | _Optional_, How often (in hours) should be node info refreshed | `120` | +| `WAIT_FOR_JOB_MINUTES` | _Optional_, How many minutes should the thread sleep if there are no nodes to refresh | `60` | +| `DEFAULT_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh | `10000` | +| `SEED_TIMEOUT_MILLISECONDS` | _Optional_, How many milliseconds should http wait for node api response on refresh of seed domains | _value of `DEFAULT_TIMEOUT_MILLISECONDS`_ | +| `BANNED_DOMAINS` | _Optional_, Domains not to index (even with subdomains) | _empty_ | +| `CRAWLING_VERSION` | _Optional_, Increasing this number can enforce recrawling of the whole index | 0 | +| `MAX_CRAWLING_DEPTH` | _Optional_, Limits how far is fediverse indexed from seed nodes | _empty_ | +| `TZ` | _Optional_, Timezone | `UTC` | ## Deploy App is designed to be run in docker container and deployed using docker-compose. More info can be found in [FediSearch example docker-compose](https://github.com/Stopka/fedisearch-compose) project diff --git a/application/src/Fediverse/NodeInfo/retrieveDomainNodeInfo.ts b/application/src/Fediverse/NodeInfo/retrieveDomainNodeInfo.ts index bf49dc7..0c0774a 100644 --- a/application/src/Fediverse/NodeInfo/retrieveDomainNodeInfo.ts +++ b/application/src/Fediverse/NodeInfo/retrieveDomainNodeInfo.ts @@ -14,5 +14,5 @@ export const retrieveDomainNodeInfo = async ( if (typeof link === 'undefined') { throw new NoSupportedLinkError(domain) } - return await retrieveNodeInfo(link.href, robotsTxt) + return await retrieveNodeInfo(new URL(link.href), robotsTxt) } diff --git a/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts b/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts index 8428d91..9b693ca 100644 --- a/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts +++ b/application/src/Fediverse/NodeInfo/retrieveNodeInfo.ts @@ -1,6 +1,6 @@ import { z } from 'zod' import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse' -import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../getTimeoutMilliseconds.js' import RobotsTxt from '../RobotsTxt/RobotsTxt.js' const schema = z.object({ @@ -27,10 +27,10 @@ const schema = z.object({ export type NodeInfo = z.infer -export const retrieveNodeInfo = async (url: string, robotsTxt: RobotsTxt): Promise => { +export const retrieveNodeInfo = async (url: URL, robotsTxt: RobotsTxt): Promise => { console.info('Retrieving node info', { url }) const nodeInfoResponse = await robotsTxt.getIfAllowed(url, { - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(url.hostname) }) assertSuccessJsonResponse(nodeInfoResponse) return schema.parse(nodeInfoResponse.data) diff --git a/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts b/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts index d5ca66c..edc311f 100644 --- a/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts +++ b/application/src/Fediverse/NodeInfo/retrieveWellKnown.ts @@ -1,6 +1,6 @@ import { assertSuccessJsonResponse } from '../assertSuccessJsonResponse' import { z } from 'zod' -import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../getTimeoutMilliseconds.js' import RobotsTxt from '../RobotsTxt/RobotsTxt.js' const wellKnownSchema = z.object({ @@ -16,9 +16,9 @@ export type WellKnown = z.infer export const retrieveWellKnown = async (domain: string, robotsTxt: RobotsTxt): Promise => { console.info('Retrieving well known', { domain }) - const wellKnownUrl = `https://${domain}/.well-known/nodeinfo` + const wellKnownUrl = new URL(`https://${domain}/.well-known/nodeinfo`) const wellKnownResponse = await robotsTxt.getIfAllowed(wellKnownUrl, { - timeout: getDefaultTimeoutMilliseconds(), + timeout: getTimeoutMilliseconds(domain), maxContentLength: 5000 }) assertSuccessJsonResponse(wellKnownResponse) diff --git a/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts b/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts index 2df82ca..28d342a 100644 --- a/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts +++ b/application/src/Fediverse/Providers/Mastodon/retrieveLocalPublicUsersPage.ts @@ -1,6 +1,6 @@ import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { FeedProviderMethod } from '../FeedProviderMethod' import { NoMoreFeedsError } from '../NoMoreFeedsError' import { FeedData } from '../FeedData' @@ -56,13 +56,13 @@ export const retrieveLocalPublicUsersPage: FeedProviderMethod = async ( page, robotsTxt ): Promise => { - const response = await robotsTxt.getIfAllowed(`https://${domain}/api/v1/directory`, { + const response = await robotsTxt.getIfAllowed(new URL(`https://${domain}/api/v1/directory`), { params: { limit, offset: page * limit, local: true }, - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) }) assertSuccessJsonResponse(response) const responseData = schema.parse(response.data) diff --git a/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts b/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts index 823702b..10ac0dc 100644 --- a/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts +++ b/application/src/Fediverse/Providers/Mastodon/retrievePeers.ts @@ -1,6 +1,6 @@ import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { NodeProviderMethod } from '../NodeProviderMethod' import { NoMoreNodesError } from '../NoMoreNodesError' @@ -11,9 +11,9 @@ export const retrievePeers: NodeProviderMethod = async (domain, page, robotsTxt) throw new NoMoreNodesError('peer') } const response = await robotsTxt.getIfAllowed( - `https://${domain}/api/v1/instance/peers`, + new URL(`https://${domain}/api/v1/instance/peers`), { - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) } ) assertSuccessJsonResponse(response) diff --git a/application/src/Fediverse/Providers/Misskey/retrieveInstancesPage.ts b/application/src/Fediverse/Providers/Misskey/retrieveInstancesPage.ts index 142c197..6f2d1e9 100644 --- a/application/src/Fediverse/Providers/Misskey/retrieveInstancesPage.ts +++ b/application/src/Fediverse/Providers/Misskey/retrieveInstancesPage.ts @@ -1,6 +1,6 @@ import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { NodeProviderMethod } from '../NodeProviderMethod' import { NoMoreNodesError } from '../NoMoreNodesError' @@ -18,7 +18,7 @@ export const retrieveInstancesPage: NodeProviderMethod = async ( robotsTxt ) => { const response = await robotsTxt.postIfAllowed( - `https://${domain}/api/federation/instances`, + new URL(`https://${domain}/api/federation/instances`), { host: null, blocked: null, @@ -32,7 +32,7 @@ export const retrieveInstancesPage: NodeProviderMethod = async ( sort: '+id' }, { - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) } ) assertSuccessJsonResponse(response) diff --git a/application/src/Fediverse/Providers/Misskey/retrieveUsersPage.ts b/application/src/Fediverse/Providers/Misskey/retrieveUsersPage.ts index e360709..9df3363 100644 --- a/application/src/Fediverse/Providers/Misskey/retrieveUsersPage.ts +++ b/application/src/Fediverse/Providers/Misskey/retrieveUsersPage.ts @@ -1,6 +1,6 @@ import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { NoMoreFeedsError } from '../NoMoreFeedsError' import { FeedProviderMethod } from '../FeedProviderMethod' import { FeedData } from '../FeedData' @@ -71,7 +71,7 @@ export const retrieveUsersPage: FeedProviderMethod = async ( robotsTxt ): Promise => { const response = await robotsTxt.postIfAllowed( - `https://${domain}/api/users`, + new URL(`https://${domain}/api/users`), { state: 'all', origin: 'local', @@ -80,7 +80,7 @@ export const retrieveUsersPage: FeedProviderMethod = async ( offset: limit * page }, { - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) } ) assertSuccessJsonResponse(response) diff --git a/application/src/Fediverse/Providers/Peertube/retrieveAccounts.ts b/application/src/Fediverse/Providers/Peertube/retrieveAccounts.ts index 4007dfd..fd7a0be 100644 --- a/application/src/Fediverse/Providers/Peertube/retrieveAccounts.ts +++ b/application/src/Fediverse/Providers/Peertube/retrieveAccounts.ts @@ -1,9 +1,9 @@ +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { FeedData } from '../FeedData' import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' import { avatarSchema } from './Avatar' import { parseAvatarUrl } from './parseAvatarUrl' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' import { parseDescription } from './parseDescription' import { NoMoreFeedsError } from '../NoMoreFeedsError' import { FeedProviderMethod } from '../FeedProviderMethod' @@ -29,13 +29,13 @@ const schema = z.object({ }) export const retrieveAccounts: FeedProviderMethod = async (domain, page, robotsTxt) => { - const response = await robotsTxt.getIfAllowed(`https://${domain}/api/v1/accounts`, { + const response = await robotsTxt.getIfAllowed(new URL(`https://${domain}/api/v1/accounts`), { params: { count: limit, sort: 'createdAt', start: page * limit }, - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) }) assertSuccessJsonResponse(response) const responseData = schema.parse(response.data) diff --git a/application/src/Fediverse/Providers/Peertube/retrieveFollowers.ts b/application/src/Fediverse/Providers/Peertube/retrieveFollowers.ts index 3093f41..82914b4 100644 --- a/application/src/Fediverse/Providers/Peertube/retrieveFollowers.ts +++ b/application/src/Fediverse/Providers/Peertube/retrieveFollowers.ts @@ -1,6 +1,6 @@ import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { NodeProviderMethod } from '../NodeProviderMethod' import { NoMoreNodesError } from '../NoMoreNodesError' @@ -22,14 +22,14 @@ const schema = z.object({ export const retrieveFollowers: NodeProviderMethod = async (domain, page, robotsTxt) => { const response = await robotsTxt.getIfAllowed( - `https://${domain}/api/v1/server/followers`, + new URL(`https://${domain}/api/v1/server/followers`), { params: { count: limit, sort: 'createdAt', start: page * limit }, - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) } ) assertSuccessJsonResponse(response) diff --git a/application/src/Fediverse/Providers/Peertube/retrieveVideoChannels.ts b/application/src/Fediverse/Providers/Peertube/retrieveVideoChannels.ts index 21a007e..58d6ec2 100644 --- a/application/src/Fediverse/Providers/Peertube/retrieveVideoChannels.ts +++ b/application/src/Fediverse/Providers/Peertube/retrieveVideoChannels.ts @@ -1,10 +1,10 @@ +import getTimeoutMilliseconds from '../../getTimeoutMilliseconds.js' import { FeedData } from '../FeedData' import { assertSuccessJsonResponse } from '../../assertSuccessJsonResponse' import { z } from 'zod' import { FieldData } from '../FieldData' import { avatarSchema } from './Avatar' import { parseAvatarUrl } from './parseAvatarUrl' -import { getDefaultTimeoutMilliseconds } from '../../getDefaultTimeoutMilliseconds' import { parseDescription } from './parseDescription' import { FeedProviderMethod } from '../FeedProviderMethod' import { NoMoreFeedsError } from '../NoMoreFeedsError' @@ -40,13 +40,13 @@ export const retrieveVideoChannels: FeedProviderMethod = async ( page, robotsTxt ) => { - const response = await robotsTxt.getIfAllowed(`https://${domain}/api/v1/video-channels`, { + const response = await robotsTxt.getIfAllowed(new URL(`https://${domain}/api/v1/video-channels`), { params: { count: limit, sort: 'createdAt', start: page * limit }, - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) }) assertSuccessJsonResponse(response) const responseData = schema.parse(response.data) diff --git a/application/src/Fediverse/RobotsTxt/RobotsTxt.ts b/application/src/Fediverse/RobotsTxt/RobotsTxt.ts index 01fb4a7..1081fb8 100644 --- a/application/src/Fediverse/RobotsTxt/RobotsTxt.ts +++ b/application/src/Fediverse/RobotsTxt/RobotsTxt.ts @@ -1,7 +1,7 @@ import { AxiosRequestConfig, AxiosResponse } from 'axios' export default interface RobotsTxt { - isAllowed: (url: string) => boolean - getIfAllowed: , D = any>(url: string, config?: AxiosRequestConfig) => Promise - postIfAllowed: , D = any>(url: string, data?: D, config?: AxiosRequestConfig) => Promise + isAllowed: (url: URL) => boolean + getIfAllowed: , D = any>(url: URL, config?: AxiosRequestConfig) => Promise + postIfAllowed: , D = any>(url: URL, data?: D, config?: AxiosRequestConfig) => Promise } diff --git a/application/src/Fediverse/RobotsTxt/RobotsTxtError.ts b/application/src/Fediverse/RobotsTxt/RobotsTxtError.ts index ecd96a0..fc09bf5 100644 --- a/application/src/Fediverse/RobotsTxt/RobotsTxtError.ts +++ b/application/src/Fediverse/RobotsTxt/RobotsTxtError.ts @@ -1,7 +1,7 @@ export class RobotsTxtError extends Error { - public readonly url - public constructor (url: string) { + public readonly url: string + public constructor (url: URL) { super('Request was blocked by robots.txt') - this.url = url + this.url = url.toString() } } diff --git a/application/src/Fediverse/RobotsTxt/fetchRobotsTxt.ts b/application/src/Fediverse/RobotsTxt/fetchRobotsTxt.ts index 767fb58..1c931c5 100644 --- a/application/src/Fediverse/RobotsTxt/fetchRobotsTxt.ts +++ b/application/src/Fediverse/RobotsTxt/fetchRobotsTxt.ts @@ -1,6 +1,6 @@ import axios, { AxiosRequestConfig, AxiosResponse } from 'axios' import robotsParser from 'robots-parser' -import { getDefaultTimeoutMilliseconds } from '../getDefaultTimeoutMilliseconds.js' +import getTimeoutMilliseconds from '../getTimeoutMilliseconds.js' import RobotsTxt from './RobotsTxt.js' import { RobotsTxtError } from './RobotsTxtError.js' @@ -8,35 +8,35 @@ const userAgent = 'FediCrawl/1.0' export default async function fetchRobotsTxt (domain: string): Promise { console.info('Fetching robots.txt', { domain }) - const url = `https://${domain}/robots.txt` + const url = new URL(`https://${domain}/robots.txt`) let content = '' try { - const robotsTxt = await axios.get(url, { + const robotsTxt = await axios.get(url.toString(), { headers: { 'User-Agent': userAgent }, - timeout: getDefaultTimeoutMilliseconds() + timeout: getTimeoutMilliseconds(domain) }) content = String(robotsTxt.data) } catch (error) { console.info('Robots.txt not found', { error, url }) } - const robots = robotsParser(url, content) - const isAllowed = (url: string): boolean => robots.isAllowed(url, userAgent) ?? true + const robots = robotsParser(url.toString(), content) + const isAllowed = (url: URL): boolean => robots.isAllowed(url.toString(), userAgent) ?? true return { isAllowed, - getIfAllowed: async , D = any>(url: string, config?: AxiosRequestConfig): Promise => { + getIfAllowed: async , D = any>(url: URL, config?: AxiosRequestConfig): Promise => { if (!isAllowed(url)) { throw new RobotsTxtError(url) } - return await axios.get(url, { + return await axios.get(url.toString(), { headers: { 'User-Agent': userAgent }, ...config }) }, - postIfAllowed: async , D = any>(url: string, data?: D, config?: AxiosRequestConfig): Promise => { + postIfAllowed: async , D = any>(url: URL, data?: D, config?: AxiosRequestConfig): Promise => { if (!isAllowed(url)) { throw new RobotsTxtError(url) } - return await axios.post(url, data, { + return await axios.post(url.toString(), data, { headers: { 'User-Agent': userAgent }, ...config }) diff --git a/application/src/Fediverse/getSeedTimeoutMilliseconds.ts b/application/src/Fediverse/getSeedTimeoutMilliseconds.ts new file mode 100644 index 0000000..3105ef1 --- /dev/null +++ b/application/src/Fediverse/getSeedTimeoutMilliseconds.ts @@ -0,0 +1,5 @@ +import { getDefaultTimeoutMilliseconds } from './getDefaultTimeoutMilliseconds.js' + +export const getSeedTimeoutMilliseconds = (): number => { + return parseInt(process.env.SEED_TIMEOUT_MILLISECONDS ?? getDefaultTimeoutMilliseconds().toString()) +} diff --git a/application/src/Fediverse/getTimeoutMilliseconds.ts b/application/src/Fediverse/getTimeoutMilliseconds.ts new file mode 100644 index 0000000..e6c657e --- /dev/null +++ b/application/src/Fediverse/getTimeoutMilliseconds.ts @@ -0,0 +1,9 @@ +import isSeedDomain from '../Jobs/Seed/isSeedDomain.js' +import { getDefaultTimeoutMilliseconds } from './getDefaultTimeoutMilliseconds.js' +import { getSeedTimeoutMilliseconds } from './getSeedTimeoutMilliseconds.js' + +export default function getTimeoutMilliseconds (domain: string): number { + return isSeedDomain(domain) + ? getSeedTimeoutMilliseconds() + : getDefaultTimeoutMilliseconds() +} diff --git a/application/src/Jobs/Seed/getSeedDomains.ts b/application/src/Jobs/Seed/getSeedDomains.ts new file mode 100644 index 0000000..5260bbc --- /dev/null +++ b/application/src/Jobs/Seed/getSeedDomains.ts @@ -0,0 +1,5 @@ +export default function getSeedDomains (): string[] { + return (process.env.SEED_NODE_DOMAIN ?? 'mastodon.social,mastodon.online').split( + ',' + ) +} diff --git a/application/src/Jobs/Seed/isSeedDomain.ts b/application/src/Jobs/Seed/isSeedDomain.ts new file mode 100644 index 0000000..eb44f4b --- /dev/null +++ b/application/src/Jobs/Seed/isSeedDomain.ts @@ -0,0 +1,5 @@ +import getSeedDomains from './getSeedDomains.js' + +export default function isSeedDomain (domain: string): boolean { + return getSeedDomains().includes(domain) +} diff --git a/application/src/app.ts b/application/src/app.ts index 5dde91f..5f639cf 100644 --- a/application/src/app.ts +++ b/application/src/app.ts @@ -1,6 +1,7 @@ import providerRegistry from './Fediverse/Providers' import { addNodeSeed } from './Jobs/Seed/addNodeSeed' import { processNextNode } from './Jobs/processNextNode' +import getSeedDomains from './Jobs/Seed/getSeedDomains.js' import assertNodeIndex from './Storage/Nodes/assertNodeIndex' import assertFeedIndex from './Storage/Feeds/assertFeedIndex' import elasticClient from './Storage/ElasticClient' @@ -34,10 +35,7 @@ const app = async (): Promise => { await assertNodeIndex(elasticClient) await assertFeedIndex(elasticClient) await deleteDomains(elasticClient, getBannedDomains()) - const seedDomains = (process.env.SEED_NODE_DOMAIN ?? 'mastodon.social').split( - ',' - ) - await addNodeSeed(elasticClient, seedDomains) + await addNodeSeed(elasticClient, getSeedDomains()) await loop() }