stock-scraper/scraper.ts
2026-01-17 17:17:00 -05:00

345 lines
9.5 KiB
TypeScript

#!/usr/bin/env -S deno run --allow-net --allow-write
//?apikey=1zIRD8Xy8WLOOcTEjT94JnyHBeXDOTcS
// ------------------------------------------------------------
// Linear Regression
// ------------------------------------------------------------
function linearRegression(xs: number[], ys: number[]) {
const n = xs.length;
const meanX = xs.reduce((a, b) => a + b, 0) / n;
const meanY = ys.reduce((a, b) => a + b, 0) / n;
let num = 0;
let den = 0;
for (let i = 0; i < n; i++) {
num += (xs[i] - meanX) * (ys[i] - meanY);
den += (xs[i] - meanX) ** 2;
}
if (den === 0) return null; // flat line → no regression possible
const slope = num / den;
const intercept = meanY - slope * meanX;
let ssTot = 0;
let ssRes = 0;
for (let i = 0; i < n; i++) {
const yPred = slope * xs[i] + intercept;
ssTot += (ys[i] - meanY) ** 2;
ssRes += (ys[i] - yPred) ** 2;
}
const r2 = ssTot === 0 ? 0 : 1 - ssRes / ssTot;
const start = intercept;
const end = start + slope*xs.length;
const growth = (end/start) - 1;
return { slope, intercept, r2, growth};
}
// ------------------------------------------------------------
// Fetch S&P 500 tickers
// ------------------------------------------------------------
async function fetchSP500Tickers(): Promise<string[]> {
const url =
"https://datahub.io/core/s-and-p-500-companies/_r/-/data/constituents.csv";
const res = await fetch(url);
if (!res.ok) throw new Error("Failed to fetch S&P 500 CSV");
const csv = await res.text();
const lines = csv.trim().split("\n");
lines.shift(); // header
return lines.map((line) => line.split(",")[0]);
}
// ------------------------------------------------------------
// Yahoo Finance fetch
// ------------------------------------------------------------
function parseDate(dateStr: string): number {
const date = new Date(dateStr);
if (isNaN(date.getTime())) {
throw new Error(`Invalid date format: ${dateStr}. Use YYYY-MM-DD`);
}
return Math.floor(date.getTime() / 1000);
}
function getDateRange(): { period1: number; period2: number } {
const args = Deno.args;
// If command line args provided, use them
if (args.length === 1) {
const days = parseInt(args[0], 10);
if (isNaN(days) || days <= 0) {
throw new Error(`Invalid number of days: ${args[0]}`);
}
const now = Math.floor(Date.now() / 1000);
const daysAgo = now - days * 24 * 60 * 60;
console.log(`Using last ${days} days.`);
return { period1: daysAgo, period2: now };
}
if (args.length >= 2) {
const startDate = parseDate(args[0]);
const endDate = parseDate(args[1]);
if (startDate > endDate) {
throw new Error("Start date must be before end date");
}
console.log(`Using date range from ${args[0]} to ${args[1]}.`);
return { period1: startDate, period2: endDate };
}
// Interactive mode
console.log("\n=== Date Range Selection ===");
console.log("1. Last 30 days (default)");
console.log("2. Last N days (custom)");
console.log("3. Custom date range (YYYY-MM-DD)");
const choice = prompt("Choose an option (1-3):", "1");
if (choice === "1" || choice === null) {
// Default: last 30 days
const now = Math.floor(Date.now() / 1000);
const thirtyDaysAgo = now - 30 * 24 * 60 * 60;
console.log("Using last 30 days.");
return { period1: thirtyDaysAgo, period2: now };
}
if (choice === "2") {
const daysStr = prompt("Enter number of days:", "30");
if (!daysStr) {
throw new Error("No days specified");
}
const days = parseInt(daysStr, 10);
if (isNaN(days) || days <= 0) {
throw new Error(`Invalid number of days: ${daysStr}`);
}
const now = Math.floor(Date.now() / 1000);
const daysAgo = now - days * 24 * 60 * 60;
console.log(`Using last ${days} days.`);
return { period1: daysAgo, period2: now };
}
if (choice === "3") {
const dateRangeStr = prompt("Enter date range (YYYY-MM-DD YYYY-MM-DD):", "2025-12-18 2026-01-17");
if (!dateRangeStr) {
throw new Error("No date range specified");
}
const dates = dateRangeStr.trim().split(/\s+/);
if (dates.length !== 2) {
throw new Error("Please provide two dates in format: YYYY-MM-DD YYYY-MM-DD");
}
const startStr = dates[0];
const endStr = dates[1];
const startDate = parseDate(startStr);
const endDate = parseDate(endStr);
if (startDate > endDate) {
throw new Error("Start date must be before end date");
}
console.log(`Using date range from ${startStr} to ${endStr}.`);
return { period1: startDate, period2: endDate };
}
throw new Error("Invalid option selected");
}
let dateRange: { period1: number; period2: number };
let args: string;
let closingEndpoint: (ticker: string) => string;
async function fetchLast30Closes(ticker: string): Promise<number[]> {
const res = await fetch(closingEndpoint(ticker));
if (!res.ok) return [];
const json = await res.json();
const result = json.chart?.result?.[0];
if (!result) return [];
const closes = result.indicators?.quote?.[0]?.close;
return closes?.filter((x: number | null) => x != null) ?? [];
}
// ------------------------------------------------------------
// Compute regression for a ticker
// ------------------------------------------------------------
async function ComputeTicker(ticker: string) {
const closes = await fetchLast30Closes(ticker);
if (closes.length < 5) return null;
const xs = closes.map( (_, index) => index );
const ys = closes;
return linearRegression(xs, ys);
}
// ------------------------------------------------------------
// Concurrency Throttler
// ------------------------------------------------------------
async function throttle<T>(
items: T[],
limit: number,
fn: (item: T) => Promise<void>,
) {
const queue: Promise<void>[] = [];
for (const item of items) {
const p = fn(item);
queue.push(p);
if (queue.length >= limit) {
await Promise.race(queue);
// Remove settled promises
for (let i = queue.length - 1; i >= 0; i--) {
if (queue[i].catch(() => {}) && true) queue.splice(i, 1);
}
}
}
await Promise.all(queue);
}
function buildHtmlTable(
results: { ticker: string; model: ReturnType<typeof linearRegression> }[],
dateRangeLabel: string,
): string {
const rows = results.map(({ ticker, model }) => {
const link = `<a href="https://www.tradingview.com/symbols/NASDAQ-${ticker}/?timeframe=6M" target="_blank">${ticker}</a>`;
return `
<tr>
<td>${link}</td>
<td class="extra">${model.slope.toFixed(6)}</td>
<td class="extra">${model.intercept.toFixed(6)}</td>
<td class="extra">${model.r2.toFixed(4)}</td>
<td>${(model.growth * 100).toFixed(2)}%</td>
</tr>
`;
}).join("");
return `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>S&P 500 Regression Results</title>
<style>
body { font-family: Arial, sans-serif; padding: 20px; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ccc; padding: 8px; text-align: left; }
th { background: #f4f4f4; }
tr:nth-child(even) { background: #fafafa; }
.extra{display:none;}
</style>
</head>
<body>
<h1>S&P 500 Regression Results</h1>
<p><em>Date Range: ${dateRangeLabel}</em></p>
<table>
<thead>
<tr>
<th style="width:75px;">Ticker</th>
<th class="extra">Slope</th>
<th class="extra">Intercept</th>
<th class="extra">R²</th>
<th>Growth</th>
</tr>
</thead>
<tbody>
${rows}
</tbody>
</table>
</body>
</html>
`;
}
// ------------------------------------------------------------
// Main Dump
// ------------------------------------------------------------
function getDateRangeLabel(): string {
const args = Deno.args;
if (args.length === 0) {
return "Last 30 days";
}
if (args.length === 1) {
const days = parseInt(args[0], 10);
return `Last ${days} days`;
}
if (args.length >= 2) {
return `${args[0]} to ${args[1]}`;
}
return "Unknown range";
}
async function Dump() {
// Initialize date range interactively
dateRange = getDateRange();
args = `?period1=${dateRange.period1}&period2=${dateRange.period2}&interval=1d`;
closingEndpoint = (ticker: string) => `https://query1.finance.yahoo.com/v8/finance/chart/${ticker}${args}`;
const spx = await ComputeTicker("^GSPC");
if (!spx) {
console.error("Could not get S&P Index data");
return;
}
const results: { ticker: string; model: ReturnType<typeof linearRegression> }[] = [];
const addRow = (ticker: string, model: ReturnType<typeof linearRegression>) => {
results.push({ ticker, model });
};
addRow("SPX", spx);
const tickers = await fetchSP500Tickers();
console.log(`${tickers.length} S&P 500 stocks found...`);
console.log(
`Finding stocks with growth better than the S&P Index growth (${spx.growth.toFixed(4)})...`,
);
const limit = 5;
await throttle(tickers, limit, async (ticker) => {
try {
const model = await ComputeTicker(ticker);
if (model && model.growth > spx.growth) {
addRow(ticker, model);
console.log(`${ticker}`);
}
} catch (e) {
console.log(`Skipping "${ticker}" because: ${e}`);
}
});
// Sort by growth descending
results.sort((a, b) => b.model.growth - a.model.growth);
// Build HTML
const dateRangeLabel = getDateRangeLabel();
const html = buildHtmlTable(results, dateRangeLabel);
await Deno.writeTextFile("sp500_regression.html", html);
console.log("Dumped output to sp500_regression.html");
}
Dump();