Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions config/updateCodeGov.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

// these will always stay constant
const CONFIG = {
testAgencyDirectory: path.resolve(__dirname, "../test-agency-indexes"),
agencyDirectory: path.resolve(__dirname, "../agency-indexes"),
outputFile: path.resolve(__dirname, "../codegov.json"),
regex: /^(.*?)-.*\.json$/
Expand All @@ -14,17 +13,16 @@
but that prove to be a disadvantage down the road */

// updates the codegov.json file with new data found from ./agency-indexes
async function updateCodeGov(isTesting = false) {
async function updateCodeGov() {
try {
const updatedJSON = {}
directoryPath = isTesting === true ? CONFIG.testAgencyDirectory : CONFIG.agencyDirectory

// read all files in the directory
const filenames = await fs.readdir(directoryPath)
const filenames = await fs.readdir(CONFIG.agencyDirectory)

// we know that the directory will only contain json files so dont need to check for non jsons
for (const file of filenames) {
const filePath = path.join(directoryPath, file)
const filePath = path.join(CONFIG.agencyDirectory, file)

try {
const content = await fs.readFile(filePath, "utf-8")
Expand All @@ -35,9 +33,9 @@
const agencyName = matches[1]

updatedJSON[agencyName] = jsonData
console.log(`✅ Successfully processed: ${file}`)

Check warning on line 36 in config/updateCodeGov.js

View workflow job for this annotation

GitHub Actions / Run linter

Unexpected console statement
} catch (error) {
console.error(`❌ Error processing file: ${file}`, error)

Check warning on line 38 in config/updateCodeGov.js

View workflow job for this annotation

GitHub Actions / Run linter

Unexpected console statement
}
}

Expand All @@ -47,8 +45,8 @@

return updatedJSON
} catch (error) {
console.error("❌ Failed to update codegov.json:", error)

Check warning on line 48 in config/updateCodeGov.js

View workflow job for this annotation

GitHub Actions / Run linter

Unexpected console statement
}
}

updateCodeGov(isTesting = true)
updateCodeGov()
149 changes: 97 additions & 52 deletions config/updateIssuePool.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
issueFilePath: path.resolve(__dirname, "../issue-pool.json"),
regex: /https?:\/\/github\.com\/([^\/]+)\/([^\/]+)/,
githubToken: process.env.GITHUB_TOKEN,
requiredLabel: 'code-gov'
requiredLabel: 'code-gov',
concurrentRepos: 6, // processing 6 repos at once but need to find the sweetspot because at this rate, it takes 18 minutes for the entire script to run through codegov.json. the "bathtub curve" is what we have here and what we need to experiment with and solve 👀
rateLimitRemaining: 5000,
rateLimitReset: Date.now
}

// #region - Helper Functions
Expand All @@ -19,11 +22,29 @@
return HEADERS
}

async function fetchWithRateLimit(url, options = {}) {
if (CONFIG.rateLimitRemaining <= 10 && Date.now() < CONFIG.rateLimitReset) {
const waitTime = CONFIG.rateLimitReset - Date.now() + 1000 // add 1 second buffer
console.log(`Rate limit low (${CONFIG.rateLimitRemaining} remaining). Waiting ${Math.round(waitTime/1000)}s...`)

Check warning on line 28 in config/updateIssuePool.js

View workflow job for this annotation

GitHub Actions / Run linter

Unexpected console statement
await new Promise(resolve => setTimeout(resolve, waitTime))
}

const response = await fetch(url, options)

const remainingHeader = response.headers.get('X-RateLimit-Remaining')
const resetHeader = response.headers.get('X-RateLimit-Reset')

if (remainingHeader) CONFIG.rateLimitRemaining = parseInt(remainingHeader)
if (resetHeader) CONFIG.rateLimitReset = parseInt(resetHeader) * 1000

return response
}

async function getRepoInfo() { // dont know how i feel about this double loop setup...
let repoInfo = []

try {
const content = await fs.readFile(CONFIG.repoFilePath, "utf-8") // filter by tier 3 maturity to get the projects that truly want outside help
const content = await fs.readFile(CONFIG.repoFilePath, "utf-8")
const jsonData = JSON.parse(content)

for (const agencyKey in jsonData) {
Expand All @@ -34,12 +55,17 @@

if (organization.repositoryURL) {
const match = organization.repositoryURL.match(CONFIG.regex)
const [url, owner, repo] = match

repoInfo.push({
ownerName: owner,
repoName: repo
})
if (match) {
const [url, owner, repo] = match

repoInfo.push({
ownerName: owner,
repoName: repo
})
} else {
console.warn(`No match found for URL: ${organization.repositoryURL}`)

Check warning on line 67 in config/updateIssuePool.js

View workflow job for this annotation

GitHub Actions / Run linter

Unexpected console statement
}
}
}
}
Expand Down Expand Up @@ -107,64 +133,83 @@
}
}

// #region - Main Function
async function updateIssuePool() {
const issuePool = {}
const repoInfo = await getRepoInfo()
const headers = getHeaders()

for (let i = 0; i < repoInfo.length; i++) { // switch to a forOf loop here?
const repo = repoInfo[i]
async function processSingleRepository(repo, headers) {
const repoIssues = {}

try {
const repoUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}`
const repoResponse = await fetchWithRateLimit(repoUrl, { headers })

try {
const repoUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}`
const repoResponse = await fetch(repoUrl, { headers })
if (!repoResponse.ok) {
console.error(`Failed to fetch repo info for ${repo.ownerName}/${repo.repoName}: ${repoResponse.status}`)
return repoIssues
}

if (!repoResponse.ok) {
console.error(`Failed to fetch repo info for ${repo.ownerName}/${repo.repoName}: ${repoResponse.status}`)
continue
}
const repoData = await repoResponse.json()
const repoLanguage = repoData.language || ""

const repoData = await repoResponse.json()
const repoLanguage = repoData.language || ""
let page = 1
let hasMore = true

let page = 1
let hasMore = true
while (hasMore) {
const issuesUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}/issues?page=${page}&per_page=100&state=open&labels=${CONFIG.requiredLabel}`
const issuesResponse = await fetchWithRateLimit(issuesUrl, { headers })

while (hasMore) {
const issuesUrl = `https://api.github.com/repos/${repo.ownerName}/${repo.repoName}/issues?page=${page}&per_page=100&state=open&labels=${CONFIG.requiredLabel}`
const issuesResponse = await fetch(issuesUrl, { headers })
if (!issuesResponse.ok) {
console.error(`Failed to fetch issues for ${repo.ownerName}/${repo.repoName}: ${issuesResponse.status}`)
break
}

if (!issuesResponse.ok) {
console.error(`Failed to fetch issues for ${repo.ownerName}/${repo.repoName}: ${issuesResponse.status}`)
break
const issues = await issuesResponse.json()

// endpoint always returns both issues and pull requests so we ignore the PRs
for (const [index, issue] of issues.entries()) {
if (issue.pull_request) {
continue
}

const issues = await issuesResponse.json()

// endpoint always returns both issues and pull requests so we ignore the PRs
for (const issue of issues) {
if (issue.pull_request) {
continue
}

const transformedIssue = transformIssue(issue, repo, repoLanguage)
repoIssues[transformedIssue.id] = transformedIssue // is having the ID is the best key name?
console.log(`✅ Processed ${index + 1}/${issues.length}: ${repo.ownerName}/${repo.repoName}`)
}

const transformedIssue = transformIssue(issue, repo, repoLanguage)
issuePool[transformedIssue.id] = transformedIssue // is having the ID is the best key name?
}
if (issues.length < 100) {
hasMore = false
}

if (issues.length < 100) {
hasMore = false
}
page++
}
} catch (error) {
console.error(`❌ Error processing ${repo.ownerName}/${repo.repoName}:`, error)
}

page++
}
return repoIssues
}

// #region - Main Function
async function updateIssuePool() {
const issuePool = {}
const repoInfo = await getRepoInfo()
const headers = getHeaders()

console.log(`✅ Processed ${i + 1}/${repoInfo.length}: ${repo.ownerName}/${repo.repoName}`)
// process repositories in chunks of 3 for parallel processing
for (let i = 0; i < repoInfo.length; i += CONFIG.concurrentRepos) {
const chunk = repoInfo.slice(i, i + CONFIG.concurrentRepos)
console.log(`Processing chunk ${Math.floor(i/CONFIG.concurrentRepos) + 1}/${Math.ceil(repoInfo.length/CONFIG.concurrentRepos)} (${chunk.length} repos)`)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥


const chunkPromises = chunk.map(repo => processSingleRepository(repo, headers))
const chunkResults = await Promise.allSettled(chunkPromises)

chunkResults.forEach((result, index) => {
if (result.status === 'fulfilled') {
Object.assign(issuePool, result.value)
} else {
console.error(`Failed ${chunk[index].ownerName}/${chunk[index].repoName}:`, result.reason)
}
})

} catch (error) {
console.error(`❌ Error processing ${repo.ownerName}/${repo.repoName}:`, error)
continue
if (i + CONFIG.concurrentRepos < repoInfo.length) {
await new Promise(resolve => setTimeout(resolve, 1000))
}
}

Expand Down
Loading
Loading