#!/usr/bin/env python3 """ Load the "Upload" tab of a forecast workbook into fanalysis.GS.TBFC (SQL Server). Usage: python load_tbfc.py "" - Reads the saved workbook from disk (Excel need not be open). - Infers a SQL type per column from the Excel cell types: date cells -> DATE numeric cells -> DECIMAL(18,2) (rounded half-up) else (text) -> NVARCHAR(100) - Creates fanalysis.GS.TBFC on first run if it doesn't exist. - Replace-by-Source: deletes existing rows whose [Source] matches the Source value(s) in this upload, then inserts the new rows. Other scenarios already in the table are left untouched. - Integrated (Windows) auth to USMIDSQL01; the whole load is one transaction. """ import sys import datetime from decimal import Decimal, ROUND_HALF_UP import openpyxl import pyodbc SERVER, DATABASE, SCHEMA, TABLE = "USMIDSQL01", "fanalysis", "GS", "TBFC" SHEET = "Upload" HEADER_ROW = 8 # data starts on HEADER_ROW + 1 FIRST_COL = 2 # column B LAST_COL = 11 # column K NVARCHAR_LEN = 100 AMOUNT_DP = 2 # decimal places for numeric columns FQ_TABLE = f"[{DATABASE}].[{SCHEMA}].[{TABLE}]" def pick_driver(): prefer = ["ODBC Driver 18 for SQL Server", "ODBC Driver 17 for SQL Server", "SQL Server Native Client 11.0", "SQL Server"] avail = pyodbc.drivers() for d in prefer: if d in avail: return d if avail: return avail[-1] raise RuntimeError("No ODBC driver for SQL Server found.") def connect(): driver = pick_driver() cs = f"Driver={{{driver}}};Server={SERVER};Database={DATABASE};Trusted_Connection=yes;" if "18" in driver: # driver 18 encrypts by default cs += "Encrypt=no;TrustServerCertificate=yes;" return pyodbc.connect(cs, autocommit=False) def read_upload(path): wb = openpyxl.load_workbook(path, read_only=True, data_only=True) if SHEET not in wb.sheetnames: raise RuntimeError(f'Sheet "{SHEET}" not found. Tabs: {wb.sheetnames}') ws = wb[SHEET] headers = [ws.cell(HEADER_ROW, c).value for c in range(FIRST_COL, LAST_COL + 1)] rows = [] for r in range(HEADER_ROW + 1, ws.max_row + 1): vals = [ws.cell(r, c).value for c in range(FIRST_COL, LAST_COL + 1)] if all(v is None or (isinstance(v, str) and not v.strip()) for v in vals): continue # skip blank rows rows.append(vals) wb.close() return headers, rows def infer_types(headers, rows): """Return [(sql_type, kind)] per column; kind in {date, num, text}.""" out = [] for j in range(len(headers)): vals = [row[j] for row in rows if row[j] is not None] if vals and all(isinstance(v, (datetime.datetime, datetime.date)) for v in vals): out.append(("DATE", "date")) elif vals and all(isinstance(v, (int, float)) and not isinstance(v, bool) for v in vals): out.append((f"DECIMAL(18,{AMOUNT_DP})", "num")) else: out.append((f"NVARCHAR({NVARCHAR_LEN})", "text")) return out def coerce(value, kind): if value is None: return None if kind == "date": return value.date() if isinstance(value, datetime.datetime) else value if kind == "num": return Decimal(str(value)).quantize(Decimal(10) ** -AMOUNT_DP, rounding=ROUND_HALF_UP) return str(value).strip()[:NVARCHAR_LEN] def main(): if len(sys.argv) < 2: print("usage: python load_tbfc.py ", file=sys.stderr) return 2 path = sys.argv[1] headers, rows = read_upload(path) if not rows: print("No data rows found on the Upload tab.", file=sys.stderr) return 1 types = infer_types(headers, rows) if "Source" not in headers: raise RuntimeError(f'No "Source" column found. Headers: {headers}') src_idx = headers.index("Source") data = [[coerce(v, types[j][1]) for j, v in enumerate(row)] for row in rows] sources = sorted({row[src_idx] for row in data if row[src_idx] is not None}) cols_ddl = ",\n ".join(f"[{h}] {t}" for h, (t, _) in zip(headers, types)) create_sql = (f"IF OBJECT_ID('{DATABASE}.{SCHEMA}.{TABLE}','U') IS NULL\n" f"CREATE TABLE {FQ_TABLE} (\n {cols_ddl}\n);") col_list = ", ".join(f"[{h}]" for h in headers) insert_sql = f"INSERT INTO {FQ_TABLE} ({col_list}) VALUES ({', '.join('?' for _ in headers)})" cn = connect() cur = cn.cursor() try: cur.execute(create_sql) deleted = 0 if sources: marks = ", ".join("?" for _ in sources) cur.execute(f"DELETE FROM {FQ_TABLE} WHERE [Source] IN ({marks})", *sources) deleted = cur.rowcount cur.fast_executemany = True cur.executemany(insert_sql, data) cn.commit() except Exception: cn.rollback() raise finally: cur.close() cn.close() print("TBFC load complete.") print(f" Source(s): {', '.join(sources)}") print(f" Deleted: {deleted} existing row(s)") print(f" Inserted: {len(data)} row(s)") print(" Schema: " + ", ".join(f"{h} {t}" for h, (t, _) in zip(headers, types))) return 0 if __name__ == "__main__": sys.exit(main())