kopia lustrzana https://github.com/Stopka/fedicrawl
Added promise batching
rodzic
8f42791ee2
commit
4064db521f
|
@ -1,6 +1,6 @@
|
|||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
import { extractTags } from '../../StringTools/extractTags'
|
||||
import { extractEmails } from '../../StringTools/extractEmails'
|
||||
import { extractTags } from '../../Utils/extractTags'
|
||||
import { extractEmails } from '../../Utils/extractEmails'
|
||||
import { createFeed } from '../../Storage/Feeds/createFeed'
|
||||
import prepareFulltext from './prepareFulltext'
|
||||
import Feed from '../../Storage/Definitions/Feed'
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { FeedData } from '../../Fediverse/Providers/FeedData'
|
||||
import { extractTags } from '../../StringTools/extractTags'
|
||||
import { extractEmails } from '../../StringTools/extractEmails'
|
||||
import { extractTags } from '../../Utils/extractTags'
|
||||
import { extractEmails } from '../../Utils/extractEmails'
|
||||
import { updateFeed } from '../../Storage/Feeds/updateFeed'
|
||||
import Feed from '../../Storage/Definitions/Feed'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import RobotsTxt from '../../Fediverse/RobotsTxt/RobotsTxt.js'
|
||||
import batchPromises from '../../Utils/batchPromises.js'
|
||||
import { refreshOrAddFeed } from './refreshOrAddFeed'
|
||||
import { FeedProvider } from '../../Fediverse/Providers/FeedProvider'
|
||||
import Node from '../../Storage/Definitions/Node'
|
||||
|
@ -21,10 +22,12 @@ export const refreshFeedsOnPage = async (
|
|||
provider: provider.getKey(),
|
||||
page
|
||||
})
|
||||
return await Promise.all(
|
||||
return await batchPromises(
|
||||
indexableFeedData.map(
|
||||
async (feedDataItem) =>
|
||||
await refreshOrAddFeed(elastic, node, feedDataItem)
|
||||
)
|
||||
(feedDataItem) => {
|
||||
return async () => await refreshOrAddFeed(elastic, node, feedDataItem)
|
||||
}
|
||||
),
|
||||
Number(process.env.STORAGE_BATCH_SIZE ?? 5)
|
||||
)
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ import fetchRobotsTxt from '../Fediverse/RobotsTxt/fetchRobotsTxt.js'
|
|||
import { fetchNodeToProcess } from '../Storage/Nodes/fetchNodeToProcess'
|
||||
import { ProviderRegistry } from '../Fediverse/Providers/ProviderRegistry'
|
||||
import { setNodeRefreshed } from '../Storage/Nodes/setNodeRefreshed'
|
||||
import batchPromises from '../Utils/batchPromises.js'
|
||||
import { refreshNodeInfo } from './NodeInfo/refreshNodeInfo'
|
||||
import { setNodeRefreshAttempted } from '../Storage/Nodes/setNodeRefreshAttempted'
|
||||
import { findNewNodes } from './Nodes/findNewNodes'
|
||||
|
@ -37,24 +38,30 @@ export const processNextNode = async (
|
|||
}
|
||||
const provider = providerRegistry.getProviderByKey(softwareName)
|
||||
|
||||
await Promise.all(
|
||||
provider.getNodeProviders().map(async (nodeProvider: NodeProvider) => {
|
||||
await batchPromises(
|
||||
provider.getNodeProviders().map((nodeProvider: NodeProvider) => {
|
||||
return async () => {
|
||||
console.info('Searching for nodes', {
|
||||
domain: node.domain,
|
||||
provider: nodeProvider.getKey()
|
||||
})
|
||||
return await findNewNodes(elastic, nodeProvider, node, robotsTxt)
|
||||
})
|
||||
}
|
||||
}),
|
||||
Number(process.env.NODE_PROVIDER_BATCH_SIZE ?? 5)
|
||||
)
|
||||
|
||||
await Promise.all(
|
||||
provider.getFeedProviders().map(async (feedProvider: FeedProvider) => {
|
||||
await batchPromises(
|
||||
provider.getFeedProviders().map((feedProvider: FeedProvider) => {
|
||||
return async () => {
|
||||
console.info('Searching for feeds', {
|
||||
domain: node.domain,
|
||||
provider: feedProvider.getKey()
|
||||
})
|
||||
return await refreshFeeds(elastic, feedProvider, node, robotsTxt)
|
||||
})
|
||||
}
|
||||
}),
|
||||
Number(process.env.FEED_PROVIDER_BATCH_SIZE ?? 5)
|
||||
)
|
||||
|
||||
await deleteOldFeeds(elastic, node)
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
type PromiseFactory<TResult> = () => Promise<TResult>
|
||||
|
||||
export default PromiseFactory
|
|
@ -0,0 +1,18 @@
|
|||
import PromiseFactory from './PromiseFactory.js'
|
||||
|
||||
export default async function batchPromises<TResult> (
|
||||
promiseFactories: Array<PromiseFactory<TResult>>,
|
||||
batchSize: number
|
||||
): Promise<TResult[]> {
|
||||
const results: TResult[] = []
|
||||
|
||||
do {
|
||||
const batch = promiseFactories.splice(0, batchSize)
|
||||
results.push(
|
||||
...await Promise.all(
|
||||
batch.map(async promiseFactory => await promiseFactory())
|
||||
)
|
||||
)
|
||||
} while (promiseFactories.length > 0)
|
||||
return results
|
||||
}
|
Ładowanie…
Reference in New Issue