diff --git a/deno.json b/deno.json index 19b15ad..356097f 100644 --- a/deno.json +++ b/deno.json @@ -1,6 +1,6 @@ { "tasks": { - "scrape": "deno run --watch scraper.ts" + "scrape": "deno run -A scraper.ts" }, "imports": { "@std/assert": "jsr:@std/assert@1" diff --git a/scraper.ts b/scraper.ts index aca0210..7f77385 100644 --- a/scraper.ts +++ b/scraper.ts @@ -1,7 +1,7 @@ -#!/usr/bin/env -S deno run --allow-net +#!/usr/bin/env -S deno run --allow-net --allow-write // ------------------------------------------------------------ -// Utility: simple linear regression +// Linear Regression // ------------------------------------------------------------ function linearRegression(xs: number[], ys: number[]) { const n = xs.length; @@ -16,24 +16,31 @@ function linearRegression(xs: number[], ys: number[]) { den += (xs[i] - meanX) ** 2; } + if (den === 0) return null; // flat line → no regression possible + const slope = num / den; const intercept = meanY - slope * meanX; - // Compute R² let ssTot = 0; let ssRes = 0; + for (let i = 0; i < n; i++) { const yPred = slope * xs[i] + intercept; ssTot += (ys[i] - meanY) ** 2; ssRes += (ys[i] - yPred) ** 2; } - const r2 = 1 - ssRes / ssTot; - return { slope, intercept, r2 }; + const r2 = ssTot === 0 ? 0 : 1 - ssRes / ssTot; + + const start = intercept; + const end = start + slope*xs.length; + const growth = (end/start) - 1; + + return { slope, intercept, r2, growth}; } // ------------------------------------------------------------ -// Fetch S&P 500 constituents +// Fetch S&P 500 tickers // ------------------------------------------------------------ async function fetchSP500Tickers(): Promise { const url = @@ -43,69 +50,119 @@ async function fetchSP500Tickers(): Promise { if (!res.ok) throw new Error("Failed to fetch S&P 500 CSV"); const csv = await res.text(); - - // Simple CSV parsing const lines = csv.trim().split("\n"); - const header = lines.shift(); // remove header row + lines.shift(); // header - const tickers: string[] = []; - - for (const line of lines) { - const [symbol] = line.split(","); - tickers.push(symbol); - } - - return tickers; + return lines.map((line) => line.split(",")[0]); } // ------------------------------------------------------------ -// Fetch last 30 days of closes for a ticker +// Yahoo Finance fetch // ------------------------------------------------------------ +const now = Math.floor(Date.now() / 1000); +const thirtyDaysAgo = now - 30 * 24 * 60 * 60; +const args = `?period1=${thirtyDaysAgo}&period2=${now}&interval=1d`; +const closingEndpoint =(ticker:string)=>`https://query1.finance.yahoo.com/v8/finance/chart/${ticker}${args}`; + async function fetchLast30Closes(ticker: string): Promise { - const now = Math.floor(Date.now() / 1000); - const thirtyDaysAgo = now - 30 * 24 * 60 * 60; - const url = - `https://query1.finance.yahoo.com/v8/finance/chart/${ticker}?period1=${thirtyDaysAgo}&period2=${now}&interval=1d`; - - const res = await fetch(url); - if (!res.ok) throw new Error(`Failed to fetch data for ${ticker}`); + const res = await fetch(closingEndpoint(ticker)); + if (!res.ok) return []; const json = await res.json(); - const closes = json.chart.result?.[0]?.indicators?.quote?.[0]?.close; + const result = json.chart?.result?.[0]; + if (!result) return []; - if (!closes) return []; - return closes.filter((x: number | null) => x != null); + const closes = result.indicators?.quote?.[0]?.close; + return closes?.filter((x: number | null) => x != null) ?? []; } // ------------------------------------------------------------ -// Main +// Compute regression for a ticker // ------------------------------------------------------------ -const tickers = await fetchSP500Tickers(); +async function ComputeTicker(ticker: string) { + const closes = await fetchLast30Closes(ticker); + if (closes.length < 5) return null; -console.log(`Loaded ${tickers.length} S&P 500 tickers\n`); + const xs = closes.map( (_, index) => index ); + const ys = closes; -for (const ticker of tickers) { - try { - const closes = await fetchLast30Closes(ticker); + return linearRegression(xs, ys); +} - if (closes.length < 5) { - console.log(`${ticker}: insufficient data`); - continue; +// ------------------------------------------------------------ +// Concurrency Throttler +// ------------------------------------------------------------ +async function throttle( + items: T[], + limit: number, + fn: (item: T) => Promise, +) { + const queue: Promise[] = []; + + for (const item of items) { + const p = fn(item); + queue.push(p); + + if (queue.length >= limit) { + await Promise.race(queue); + // Remove settled promises + for (let i = queue.length - 1; i >= 0; i--) { + if (queue[i].catch(() => {}) && true) queue.splice(i, 1); + } } - - // xs = 0..n-1 - const xs = closes.map((_, i) => i); - const ys = closes; - - const { slope, intercept, r2 } = linearRegression(xs, ys); - - console.log( - `${ticker}: slope=${slope.toFixed(4)}, intercept=${intercept.toFixed( - 2, - )}, r2=${r2.toFixed(3)}`, - ); - } catch (err) { - console.log(`${ticker}: error (${err.message})`); } + + await Promise.all(queue); } + +// ------------------------------------------------------------ +// Main Dump +// ------------------------------------------------------------ +async function Dump() { + const spx = await ComputeTicker("^GSPC"); + + if (!spx) { + console.error("Could not get S&P Index data"); + return; + } + + const rows: string[] = ["ticker,slope,intercept,r2,growth"]; + const addRow = (ticker: string, model: ReturnType) => { + rows.push( + `${ticker},${model.slope.toFixed(6)},${model.intercept.toFixed(6)},${model.r2.toFixed(6)},${model?.growth.toFixed(2)}`, + ); + }; + + addRow("SPX", spx); + + const tickers = await fetchSP500Tickers(); + console.log(`${tickers.length} S&P 500 stocks found...`); + console.log( + `Finding stocks with slope better than the S&P Index slope (${spx.slope.toFixed(6)})...`, + ); + + // Throttle to avoid Yahoo soft throttling + const limit = 5; // adjust as needed + + await throttle(tickers, limit, async (ticker) => { + try { + const model = await ComputeTicker(ticker); + if (model && model.growth > spx.growth) { + addRow(ticker, model); + console.log(`${ticker}`); + } + else{ + //console.log("bad: ", ticker, model?.slope) + } + } catch (e) { + console.log(`Skipping "${ticker}" because: ${e}`); + } + }); + + await Deno.writeTextFile("sp500_regression.csv", rows.join("\n")); + console.log("Dumped output to sp500_regression.csv"); +} + +Dump(); +//console.log(closingEndpoint("^GSPC"))