From 97121465ddf772013604ffdb5d7378885bc6ee26 Mon Sep 17 00:00:00 2001 From: Jiwen liu <61498169+liujiwen-up@users.noreply.github.com> Date: Wed, 22 Nov 2023 03:42:10 +0800 Subject: [PATCH] feat: Add Apache Doris support (#24714) Co-authored-by: Evan Rusackas --- README.md | 1 + docs/docs/databases/doris.mdx | 26 ++ .../databases/installing-database-drivers.mdx | 1 + docs/src/resources/data.js | 5 + docs/static/img/databases/doris.png | Bin 0 -> 11539 bytes setup.py | 1 + superset-frontend/src/assets/images/doris.png | Bin 0 -> 11539 bytes superset/db_engine_specs/doris.py | 278 ++++++++++++++++++ .../unit_tests/db_engine_specs/test_doris.py | 147 +++++++++ 9 files changed, 459 insertions(+) create mode 100644 docs/docs/databases/doris.mdx create mode 100644 docs/static/img/databases/doris.png create mode 100644 superset-frontend/src/assets/images/doris.png create mode 100644 superset/db_engine_specs/doris.py create mode 100644 tests/unit_tests/db_engine_specs/test_doris.py diff --git a/README.md b/README.md index 757c0fb503..3588d99419 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ Here are some of the major database solutions that are supported: yugabyte databend starrocks + doris

**A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/databases/installing-database-drivers). diff --git a/docs/docs/databases/doris.mdx b/docs/docs/databases/doris.mdx new file mode 100644 index 0000000000..62c16afeb3 --- /dev/null +++ b/docs/docs/databases/doris.mdx @@ -0,0 +1,26 @@ +--- +title: Apache Doris +hide_title: true +sidebar_position: 5 +version: 1 +--- + +## Doris + +The [sqlalchemy-doris](https://pypi.org/project/pydoris/) library is the recommended way to connect to Apache Doris through SQLAlchemy. + +You'll need the following setting values to form the connection string: + +- **User**: User Name +- **Password**: Password +- **Host**: Doris FE Host +- **Port**: Doris FE port +- **Catalog**: Catalog Name +- **Database**: Database Name + + +Here's what the connection string looks like: + +``` +doris://:@:/. +``` diff --git a/docs/docs/databases/installing-database-drivers.mdx b/docs/docs/databases/installing-database-drivers.mdx index f698b7ab8e..f11b4ec5eb 100644 --- a/docs/docs/databases/installing-database-drivers.mdx +++ b/docs/docs/databases/installing-database-drivers.mdx @@ -25,6 +25,7 @@ Some of the recommended packages are shown below. Please refer to [setup.py](htt | Database | PyPI package | Connection String | | --------------------------------------------------------- | ---------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | | [Amazon Athena](/docs/databases/athena) | `pip install pyathena[pandas]` , `pip install PyAthenaJDBC` | `awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com/{schema_name}?s3_staging_dir={s3_staging_dir}&... ` | +| [Apache Doris](/docs/databases/doris) | `pip install pydoris` | `doris://:@:/.` | | [Amazon DynamoDB](/docs/databases/dynamodb) | `pip install pydynamodb` | `dynamodb://{access_key_id}:{secret_access_key}@dynamodb.{region_name}.amazonaws.com?connector=superset` | | [Amazon Redshift](/docs/databases/redshift) | `pip install sqlalchemy-redshift` | ` redshift+psycopg2://:@:5439/` | | [Apache Drill](/docs/databases/drill) | `pip install sqlalchemy-drill` | `drill+sadrill:// For JDBC drill+jdbc://` | diff --git a/docs/src/resources/data.js b/docs/src/resources/data.js index a07be55267..42cf835a49 100644 --- a/docs/src/resources/data.js +++ b/docs/src/resources/data.js @@ -117,4 +117,9 @@ export const Databases = [ href: 'https://www.microsoft.com/en-us/sql-server', imgName: 'msql.png', }, + { + title: 'Apache Doris', + href: 'https://doris.apache.org/', + imgName: 'doris.png', + }, ]; diff --git a/docs/static/img/databases/doris.png b/docs/static/img/databases/doris.png new file mode 100644 index 0000000000000000000000000000000000000000..4d88f2a36cf721a1a8817f2bfc8c94eedf4f61c1 GIT binary patch literal 11539 zcma*N1yCG8*EYHY4FpR91h)`^2e;tv?(Tub-DMLzxCeKFEbgw024@$C1ZQyz{%_vz z{&nkCyAGt;S-X3gxmp3lES=1)sN@{XY^>C+%q)FeMyx&q0L+bYl46?POYn3N<^E4^ zhe!vImO}6&Ly#L@A7teKkdWU<@JRC^ZKHkvo{;{otVK*fNA?mR`|A6vBkWN(E1gs; z`pM^ksS|@Ak|+FUY;3u?%ZFlkD2VeVLDx%}>gX6xf!j`s$q@F}l}K8)O3vt_#25=m z1kU6FvRul!J=MXSLNFG?jZ1)^&;3G#$=#fwLt#HFU_n*Rnb)voIjz=v(XR)!(A+ED zdQwNqz>4t{6K?O{?~4y9wP`w|2 zk;PbxZI7FlUi%AVN0xk?eLk5nEmr~DAD8+RJc_a!B}%9h3@xiO8``fAY{tw}U;lmo zA;SVWEbUqfhnG!?(m?J@m1^@}zpOYV1{ z`U{cdoz*FZn-FwsdVfLqQXF&iOveZ3MtJrEK_1tBwn(!$uL!3 z&Jfg<@1{sPXxEsA$RS!m0i@@diPRLLJfd0~wXG>SpU$)}4ll%7wFvrmv{~o+0)^ho zI5RsyuZ7+2>7|>e-AchZz+|KO_JFntqwqXf$<-b$naImrp{T~I09>7N1+DGwjU^!N z-L);x%@BRJk7O&lR>*E&?Xp`_TG=_K0V{m>La7wqA>2kQ94Bu{Ia_sBa|zftzK$aXVahMV5+jzafo-|mQ-8yMtsCEar2+4`Zr;D-$v_Vd~YAX zCo{}}j@7ke7*|-jNM%5Bes|^IHdLNZ4rm+VHmJ-!p;=r9i)X_WqUdMdy>p`c7P_&d zyG)x7rRg*g*t!}NU$FIo!etezdM^q{+=vUBXAX6_UaTa!-gg~U_!qrC0y9&Nt<*tI zs^g;e+hZe_OS*OZW%g|=s(J^LYFb~`mNH#8e{Uw};ca?6NC_{Fx@^%v@|Q9bs-46D zvt)M53CnR(FDkNL2OQm58r^DhZ1AaMzaYhM{AuU)Mb+TOjlZY3XGytE9=O(~9N|=r zI}hAv83^oWXB(LPF}+4j@Vyg8S4NaLoezpqx6Z}cBO(lhGQ8Z2bdHvTgh?+ADzi(MPgnn(Lp9jF7 zR--436}9^V4l(R&g-r8rL|rAZwfR$(PDz8x8_6x(c>Abc`k9?MlSRk*Wj;2=(+!H3 zb}Y%bUj&+ zPoo$Is{IFV3a|0C!x?QW(&s-Dn+%E;9n`@&o-R_+A{Nb8UC>mMyfp(x@M}^ts8X1Y zP<;$|9*vgr)b888Q2HW&-l+xHE4^8lm)pAch;Z)R+&8Lz7q*&EaMDwP#%)Wl4Hihk z+@V`ET~FX+gLOGY{i8GXK;$y8Pz4fW@?<|r zC#yxIhNd{km8}iHcMFR0iA5yTcNO(9f( zC20yOg8gF3VI{kJbWg*g;`ZceEO-YMnuWQHP2K1*@b0W-O^ zvujxjPGk%;=6%_O{_VA>-!Bje?f*SQok1pXcXyl1Qg^$~SbuM;y_^W`J@>L}JZ`>j zA1_UjnmPE9QL(tQRstn(icpCJocxOBT+r~E68xKGvDl?xq%K5MOzOwA?o8F591-j59pw?>^Ia%np9rph(fab7wW zF><hV%t7{7~*X+)MBKED6ndbUU?o+HSjcP*?Lw>)J+rmJxH@v!?cZ(=r~=1n>4wcK|y z*A-L$m`Pr(6%?Dn$lvkmFX(MaM-LvB@McQX=<#W$hrZ`g{E7Pv^{V)LrbBm*BM+0e z4X|YCuvt9!^aGF7(&ykiuFvZgNk6%nf%r%YYP1s;X!Sp^obaVZv^sq1S}Gb$|67%e zyC8y|4BUQCXofrEc;{^QOo?Y#u`7>$Xr;F;S>dlOO_YsCKfDy1$Nfft(l$5tsj2Zf z`RP%$iK5ouMRnxQ>O6a0llI_n3$B*AJS00U<>3G)@8dV}s7iG!Q6K+YxvjPMgSM)# z#YNhk-_>C~b+HDXo_g#rrz1E9kw_Ix|jCPAG*0 zX!i=82XH< zm!pZcgn;3hs>iXg}KtgrmHuwwdqttM%0AL8rTb| zbp6Aj|Iaby7KqieM!d!2D1oGjvjey;7<+*;g*+h2zFey^W!bh@{-xs(Jo3^!muep@ zSh`mpt?c^MrTOcNGbGFb(KMq0-eLa_?wcfpa&kqoawkY(*5(EUl4tve(M3rASH^$4 zlE_=^wmtm5QL)hTXFg52?OyN9R@(*yo8l^5NfP2xXg9uIj4cZUh}KlQ2H?~z#YCj$ zY6F5DF`FbA0n;x4pqO{9H2Q$OCQ~V}O5L6ejH>`OcmeC0-xVK^1wf@Mi3RxSlI(q} zzArzoQa_RNEMyCdmN_$Mu9xIWRPR$4pJ!y?d4JY-fEWQ8>oJf&|rk zgeOa)$ghOMP-gH|5IW^rVE5)*WYHQ1Odh%EA2Y}_Fp2v)jOoDJd8i%VIc)Y|u79LH z=;;YaZfoMNh}n>QuJR&f((9%_CD}t$=GIcMGwM({WiUJ3d`(u572n>Y_XF+HYgdQw zUxcu(@#I@>kjAP>j%<6*Rf-0GNy=j1bslx06y zo`_M;%cLvU|Db9Fgw3FzsXO16ut&s3Yfnd5ViaQxY1#`!RltCXi(U(I~vNPDK zyE$288*5A8cT)_}hivgL+)nx}DgoJn!%E~WzrGivW6v#kCtGUB5u=D-QL=K+>54Ej zyy?9q9Ljf?}FkJJK3!o)p_3Y|d-_k~!Xbd?sIy7gCBa1;&0+GRG#X{n6C< z#pP`~kF2@%t*>!|n_lTB#$=IX!+9THwM%gRkOj}|w(|(``u?!zjmPSi+mq5lps^&= zbmf(0g`oMwrlIldD?`##B^oIoBGDZAtViT!VuP*&kByL0Uy>s@R6KW=)o|}m4IK=C zhhmUpi#*U4y4g7Rp6i+$kVg}QrfW03cf^s86)V>?yUO$PzEjxJ5x09tkpJFx{4EA3 z1v7s5s)^csm1CqWQt4G@7FtFQ8R^(+^f@EDtQR+SkJpfzWP3}gQ z+HMES)4-TbKh0N1uUZCOxrOZAS5G=V-UwXw$H%|PYmu&B++@`$WxyJ`c;bz1D#G}Y zjtMJn)F3^0`0*3CFz2zr!tIyXrv8RX6i z4HB^aP94o)WH72_H|MbLFfycQga)ra)nH;u%J-#3hyhA=ea;F?J zmfUs%z!Eb|Q{x4K*ig1JQ(N6tsuD2up_fJ*vF3*8+TcSdX|(}m;%_Ki=hyrI|HIj? z(uG_Er3JqhhmQ`B$BADVYp(TUgMI1%p9$cuQzKZwtsMAW7YWci6!T@5W`S4>7l6@} z?hWw^Qtx(aEcx-jq=c2f%m6=|%4O+TG*h31z+#&17aY#GR7mK}^de&Rr7*^bxnhI+ zm*yfN*{s=bi{$YIZ%`H*hmaPK$xO+?E8@OinV*eFruYd*h_P(~UAu7czI|MX;FJiw zzzu(R`QOsd|Gzemy0kl-jt5sLw^QlP)o2IW^`By&iHXDhlWq!Tz6nIOijMTKTN?@q zV(lX*Sp@U{OJL2>S_SiCi6$l_Bgbw_2wyyXgrZ+8RGVrD73cvXCRARIcxWXUhNdj8 z1<&FFs)#BRHmiINwL2F55J{=(nC8>#7*k3br(9V=z!E)-WH8(k zDbE8R?hw-*in!AP9W6ZgA^P-bVIM^60P)SHq^V?ysKJ%AFvtGxI7_5p))xq-Bx*v_ zJ;O)si3RcP06>H(ZS8M703wAFDbeZyhUIHGrL6WzMq!7l(v0tw;dH5k%IAzxvu)@Pr61WL8SMSe-7iSjr#L^$%RzT*FD{@ zu#o}ya9mdcvsgILN|=(C_ z`jM{`d&{e_AeHUW`>Z3tI+q$e?^E~PWsULy8LEF@wpriDAL9T7J+)dWd z3`4s%tfrCqbHlF`4GrxZ>LfpXs>Umt2KH&RnZjLAGc%V0-qAUCP>z-Jc-2Z)Uawp- z!3?`W?EsVt#kw~#(BKL&T>TaL`(1Av(#8S%j#bRTyX0lqc6D7gY|kjuuvl@HhbK6B zhJc4*v2k}Zwfg*(zHnO}B~xErfKSDb(RHN_-8cu4>L0`0GoiS>J2ZOrt2wm3*j3{k zZFxuhRwoW$fgBt^oI0ZFb>_1I4wyV?Zxq_F)P09db7~|;W0GZh;lUDAq!nv z8f)1bnsYSb^Qu)h%F+*?qGntocLrT}ASv3XP(d8kU58AV_s;T9z0;0IQ*9?L6Wo#U zh<=^``yk3P0O*nuDcjz9H4?=<0bC#Ddmnx>hm+5^?2N9x$C}DRgXz1(Tu8xFKwoW1 zD!i7|gJm*H<7$NBD;0j7(VdoWE7X1x{dP%uRAFc!wNe8txOQxFzcDuI$+ki0pC)X( zUYj)hrQ7m$x9OoP;zq2u_F7K+@N02}(Fs8PT|r8lsfGLkJ@g*aQ3K5rZl4NUUX7Gd z52fctiKx;UEJBY={;Z~MGE(zV$7?oWrp;x+_9Z{FuWqBI#FoBRKS}0M3`Z(_oOV(C z-qFeDo}v}Z#QZ#kh-Fkr`t}ySJx3z;K4F=F-H2~C8`EN_#1E+gZ=FklNNQ3hy|hbi zL#k@^@;5)|C8=QEiP#Ciahfm80E9H0Ygz0qQ; zYVfvTOUgw>uSJ=fQMzcu`5NY1A%Ymi5Ide1HLTj24-Rn4T1jGz2u(4=&cRSB`mk!+$_ zWw9%sz0Xs$AvOCbj>Um(&Z-r3ZmGV2g>&0b-I&$?jp!at8UCA2V`?1&wZ({Ybd5{G zAcy*JI(83j$@T=|iao^!u|eQ4HH>_#E!=QkRa^C9V9Z2Xv~DuLxi6fDUnz0enU!hD zI3(b>h-$GC6=BL*xg~OzAcXR`0tEI681P}G3U~E zhI`d7%cnJ!oV5;;XEr%*h1zwCFL^c{Lsl+)*91w_5VUgr*zisn5Idb49CiawDj9Ya zCck-iu-=d=#MJv?ZMQ8Ki2PDEnS5s0@O6$fYd0|Gp`)a~<%n8aXO?3X-zBD8HPeKL zwD$umwBqn9FTYGp1xLa@=Q!Ex?{(6t2aj#9H5JsjBqrYpJ)}MMu}$&(?R}5M zQ3Bi!a-Qq?prI8<`gvfPm18wa{ZXiw8|_vcn4d(z?5f4EkU&szA%nEelEM=7 zr*FgWOT!nA`qW(-H@%B3Ea;iPB0nF@jMQlTJByOuubf27I-)K8YA*6;mgk!|zWNAWlTCz)m9lep`j+HntNmPM(;}qX~w%sSUbx)a-73E=HRk zPTAO6)oJx0rq(G#7BoC`u<;W{Nti{S=nvo!q~tLFlLA3aa)D zzsIHn*_&gx?n2&-Q|8@JOeCslj3xkr4W9kCJ&$jO|InwHMIGiACScJWpGpUTUJ#vB zNWF}QK~i9VC<8Dnn2>ip5wNRvj?%(TYyG8vkQ2Dm?mBl8{fo1w zjVyOXMSE8z82ta;&Hb+`{4b5c#|COBV`nGN7iP)EJhRtsOAB`Xb5={rGuOu^K|byU zmPi1k)DOsqV`TcfEaFEM4dg|tem^h&wcdvD7U$R9QK<0kPe`9s( z8H;KH6H3vL0lET0HKFQx_gp>IRWgJKWduo#AzIJjxEOT+Q6^$>(U%g4@3=@!7OVsz zR9FD!aB7rLb<=y@#-I_HR5UdzdxK~XI$%c{*S-}RwZSxvMcP?ANq`FY#O7-Xj4;@p zzjAQz@;tBZ{=B6A3LrXr*|SiekKx#uk+z76khV;tLQ6NW=9nc8R~Q5UxJK|mXhrcn z@@$kT16A4@u-uPmfV`K!rlfeA%noN38=AKMvz$^0!tL<0I32%mXGB7D{I%KY$ z=$w4-HYfBvI+qrQmSse;D`Ekn@0RdD81a8dD9yfzWn|b;%WisPjoBW2M%U|H1W@&0 z)_~xKa_&AH6{f^$Iwl8`X&nq{)yzCE$+5r z<<#W*!F#}C(+Vq2j%)n>=R@4w{!S-`l8sHfG6IBK=4{Sm~ZP-=lpSq8UtJ zYCvp?x=7;IwSZ9#svLSMNkT~WEL*vENB;nVV|lcx$^2CGLV+ox>mX?dA4vw2+r_Uc zZC>S3wG4UP!%n?ADiyoA=MV$4dDeH7x5@jFb8Z?0vUi8>)UzGOwHYVhAYI=)k`$-a z$3C9EF-3(bqrmb}7AwNaoril`?)=*-Z<9Nz1Sf|AgNB};L@Gplmt?dajcj8pJn=M7a`Qe$l0z%fM8r0Bd@Z^ z?%lMdk%>I+Ld*%l`fg2r9I)uD)54^X0?zF{VL0k=3KAf%H?|+QHKEy)tiHES8?AFd z;(0!8`{;k)7o@tahnKVUr;82|h(@n!0$$d6d_8RW;Z%L-b+h_z(GwkJaD61@>$4!5 z9hWn5g_?FaUmmFSxVBRq!#1(q4egPsVsjM$^Ps|<9z@IB$PHmep@e9#nS9;~hupgF z_xDLOx>5VD8;NY0+N_8a=1PvtxX}3V*ccy#6ob4vom+bJmsnHZ8A~MJNEv?&!HL{R zZ3=HSTdCGSFA)3HX%_wEp%A)VAnn~LaPr%a?P9ac@Ja0&_d+;DU<}D}@9LH-ZC7kl z=2hz}g}%eKQ^p>S4k`oQ7GYU~OW{V_nn&xUQWe`}ZEQhO>%y~YH_|JMW7JF=@0cDR z!`+VPwck7wWq9|Ael>?Ei@U8hnnq8MbuX--c%gK!$D@Q!GP-m< zipn76N_myou;Ph1=(cz-(umv1o}RQ>&Muusa^f!ey1p%h3YR?n%Aapv!n>2$T!a5X zcS86ZO)T^m-l2UCzQuy^-bKhHJxMl)hk#N+pmeo;O8;rafp4G@^dWW%d3l1$V1;&B z)A>j(?$GG7>C7gcmQi&XSM&th(`7yA27X!dpwKmYjdyqu!w<4)qqv z*u|(l5$ct-gv#cL7!P4~nIP|(!;$@ERPHK9V=ejapD=*J@>e(j;XaWH>ueAG(;SzW zZk_WruhD1DuX{&P;S1*n+H`-4p)68&S`nT6yl2x{&xQ}f0uMBEUA*)szlo8TtEt*A zuB(F_;af+y@1RmBB&o|~**j+rRckWa&lwOQ>`Hv!-_%t{0wRb_&;bYNS9=ek_crvn z6A@-e(dnBsOOai*l=Jl@Oc|A&F=VB+ZKVsU`aOTSEQ|&^S;)ox=*UWz2FVsQVl6YJxigY_quCH z>jF2ofi5fUbNrP7I%vu+htt5Vs}ugBa)Ysj=ji;XFc;^o#Ld?)ep680y?nXxwDv zt+mc$L`Jr4LmQEyj(q=NPia3RqZ7hjy4g3}XBNelWOpvPsSV@n+btVa)dD1r=GMV_ zX_Ko8+n3YoCE3^w><-ZAhN=PW`5YujhRKHJUdU)@s;dJ(VZ%~5N8(?y79X$aN5x*P%>RBS9sdI#jc z6CgEKAOWqG4CQZIY1%wisu{PPi-nIpiRGBvX`WvHVvGLaw@owf>#mOX?G8ts}hcRR#>O& z`&)^HQ@L%?y{~xua25v~DLVa3!qj_zfv0m1I#k=w6w1;X^T7lkM&&2Foc^V3ZuHh?G<|xnYl1wR%R{pcIkm{RB<+8YJh+4-q9NPjGa`- zME=GOwF<;oM>HI*y7+N+)okN6()u@f7(zWO>E4f=xwf-RUpPLNfJ1IY~jncst!c|b2 zR_a_Xce0(@gQ+}UqwWg{>-HZ3Ilt43H)Mxz$LCjr{e_hV`SEra+oe`wCvlC)d7P`x zAEZK3r~?)eKv&EZl|9!0m8px2`Xa_8Y8gt}k}$hYPWUuN{rJT5Gf zadD5GErXM1K)peYmj{iDdr@APx2fVVTzI(!lyM;$Okjv_=y!ZM#Qxa*&^@_I^4M7$ z>*}4*%*eY^xbj8C#y%a&PYC(lm{S-^ewfb5vy`lthLp=H)jkhNnK=GfU3frxQM$u$ zSb}RCZcF--xAd~TJIw=MAZT06>5k5EzW&92Y$|P zxsmGmzR{2gI<&2td^)vWXnnv#Yz!cIzRdsIpNTODkvF%NVQA1#`BzgCkr~srI5NKM zMQkCMw&NLR{xHyXMg_E>*W=A$RYUqJHZTGIiiqmv6f@-iNcSAhq4^KLrZ+i(-fw0t zW0Jr957C(@J|YJN0~C0ViZ*wij`o?Z5&3jPe^#|rt+0P;qW z8pNCW0#Id&A%?9Y(&3X~oPz~M{6E+}O&qT0tM>+>HVTY26c(o?vJ={=xGaqc-i>8M zxZql3pi70#0-?2N@R52%On6%?LI9%v1d8#9+h^K*{EvpI)#CQdyrg2D%m^U}E3oR9 z_vuPjd~1q~_>`8lF~MoEXb5H;!1UE|GtY+LskVyde}1qPm_neCZ%dH*E9b9zUyt59 t;Hy8UHYL5)rI#o8UyA+z+Pt*?jMPW=0.5.9, < 0.6"], "netezza": ["nzalchemy>=11.0.2"], "starrocks": ["starrocks>=1.0.0"], + "doris": ["pydoris>=1.0.0, <2.0.0"], }, python_requires="~=3.9", author="Apache Software Foundation", diff --git a/superset-frontend/src/assets/images/doris.png b/superset-frontend/src/assets/images/doris.png new file mode 100644 index 0000000000000000000000000000000000000000..4d88f2a36cf721a1a8817f2bfc8c94eedf4f61c1 GIT binary patch literal 11539 zcma*N1yCG8*EYHY4FpR91h)`^2e;tv?(Tub-DMLzxCeKFEbgw024@$C1ZQyz{%_vz z{&nkCyAGt;S-X3gxmp3lES=1)sN@{XY^>C+%q)FeMyx&q0L+bYl46?POYn3N<^E4^ zhe!vImO}6&Ly#L@A7teKkdWU<@JRC^ZKHkvo{;{otVK*fNA?mR`|A6vBkWN(E1gs; z`pM^ksS|@Ak|+FUY;3u?%ZFlkD2VeVLDx%}>gX6xf!j`s$q@F}l}K8)O3vt_#25=m z1kU6FvRul!J=MXSLNFG?jZ1)^&;3G#$=#fwLt#HFU_n*Rnb)voIjz=v(XR)!(A+ED zdQwNqz>4t{6K?O{?~4y9wP`w|2 zk;PbxZI7FlUi%AVN0xk?eLk5nEmr~DAD8+RJc_a!B}%9h3@xiO8``fAY{tw}U;lmo zA;SVWEbUqfhnG!?(m?J@m1^@}zpOYV1{ z`U{cdoz*FZn-FwsdVfLqQXF&iOveZ3MtJrEK_1tBwn(!$uL!3 z&Jfg<@1{sPXxEsA$RS!m0i@@diPRLLJfd0~wXG>SpU$)}4ll%7wFvrmv{~o+0)^ho zI5RsyuZ7+2>7|>e-AchZz+|KO_JFntqwqXf$<-b$naImrp{T~I09>7N1+DGwjU^!N z-L);x%@BRJk7O&lR>*E&?Xp`_TG=_K0V{m>La7wqA>2kQ94Bu{Ia_sBa|zftzK$aXVahMV5+jzafo-|mQ-8yMtsCEar2+4`Zr;D-$v_Vd~YAX zCo{}}j@7ke7*|-jNM%5Bes|^IHdLNZ4rm+VHmJ-!p;=r9i)X_WqUdMdy>p`c7P_&d zyG)x7rRg*g*t!}NU$FIo!etezdM^q{+=vUBXAX6_UaTa!-gg~U_!qrC0y9&Nt<*tI zs^g;e+hZe_OS*OZW%g|=s(J^LYFb~`mNH#8e{Uw};ca?6NC_{Fx@^%v@|Q9bs-46D zvt)M53CnR(FDkNL2OQm58r^DhZ1AaMzaYhM{AuU)Mb+TOjlZY3XGytE9=O(~9N|=r zI}hAv83^oWXB(LPF}+4j@Vyg8S4NaLoezpqx6Z}cBO(lhGQ8Z2bdHvTgh?+ADzi(MPgnn(Lp9jF7 zR--436}9^V4l(R&g-r8rL|rAZwfR$(PDz8x8_6x(c>Abc`k9?MlSRk*Wj;2=(+!H3 zb}Y%bUj&+ zPoo$Is{IFV3a|0C!x?QW(&s-Dn+%E;9n`@&o-R_+A{Nb8UC>mMyfp(x@M}^ts8X1Y zP<;$|9*vgr)b888Q2HW&-l+xHE4^8lm)pAch;Z)R+&8Lz7q*&EaMDwP#%)Wl4Hihk z+@V`ET~FX+gLOGY{i8GXK;$y8Pz4fW@?<|r zC#yxIhNd{km8}iHcMFR0iA5yTcNO(9f( zC20yOg8gF3VI{kJbWg*g;`ZceEO-YMnuWQHP2K1*@b0W-O^ zvujxjPGk%;=6%_O{_VA>-!Bje?f*SQok1pXcXyl1Qg^$~SbuM;y_^W`J@>L}JZ`>j zA1_UjnmPE9QL(tQRstn(icpCJocxOBT+r~E68xKGvDl?xq%K5MOzOwA?o8F591-j59pw?>^Ia%np9rph(fab7wW zF><hV%t7{7~*X+)MBKED6ndbUU?o+HSjcP*?Lw>)J+rmJxH@v!?cZ(=r~=1n>4wcK|y z*A-L$m`Pr(6%?Dn$lvkmFX(MaM-LvB@McQX=<#W$hrZ`g{E7Pv^{V)LrbBm*BM+0e z4X|YCuvt9!^aGF7(&ykiuFvZgNk6%nf%r%YYP1s;X!Sp^obaVZv^sq1S}Gb$|67%e zyC8y|4BUQCXofrEc;{^QOo?Y#u`7>$Xr;F;S>dlOO_YsCKfDy1$Nfft(l$5tsj2Zf z`RP%$iK5ouMRnxQ>O6a0llI_n3$B*AJS00U<>3G)@8dV}s7iG!Q6K+YxvjPMgSM)# z#YNhk-_>C~b+HDXo_g#rrz1E9kw_Ix|jCPAG*0 zX!i=82XH< zm!pZcgn;3hs>iXg}KtgrmHuwwdqttM%0AL8rTb| zbp6Aj|Iaby7KqieM!d!2D1oGjvjey;7<+*;g*+h2zFey^W!bh@{-xs(Jo3^!muep@ zSh`mpt?c^MrTOcNGbGFb(KMq0-eLa_?wcfpa&kqoawkY(*5(EUl4tve(M3rASH^$4 zlE_=^wmtm5QL)hTXFg52?OyN9R@(*yo8l^5NfP2xXg9uIj4cZUh}KlQ2H?~z#YCj$ zY6F5DF`FbA0n;x4pqO{9H2Q$OCQ~V}O5L6ejH>`OcmeC0-xVK^1wf@Mi3RxSlI(q} zzArzoQa_RNEMyCdmN_$Mu9xIWRPR$4pJ!y?d4JY-fEWQ8>oJf&|rk zgeOa)$ghOMP-gH|5IW^rVE5)*WYHQ1Odh%EA2Y}_Fp2v)jOoDJd8i%VIc)Y|u79LH z=;;YaZfoMNh}n>QuJR&f((9%_CD}t$=GIcMGwM({WiUJ3d`(u572n>Y_XF+HYgdQw zUxcu(@#I@>kjAP>j%<6*Rf-0GNy=j1bslx06y zo`_M;%cLvU|Db9Fgw3FzsXO16ut&s3Yfnd5ViaQxY1#`!RltCXi(U(I~vNPDK zyE$288*5A8cT)_}hivgL+)nx}DgoJn!%E~WzrGivW6v#kCtGUB5u=D-QL=K+>54Ej zyy?9q9Ljf?}FkJJK3!o)p_3Y|d-_k~!Xbd?sIy7gCBa1;&0+GRG#X{n6C< z#pP`~kF2@%t*>!|n_lTB#$=IX!+9THwM%gRkOj}|w(|(``u?!zjmPSi+mq5lps^&= zbmf(0g`oMwrlIldD?`##B^oIoBGDZAtViT!VuP*&kByL0Uy>s@R6KW=)o|}m4IK=C zhhmUpi#*U4y4g7Rp6i+$kVg}QrfW03cf^s86)V>?yUO$PzEjxJ5x09tkpJFx{4EA3 z1v7s5s)^csm1CqWQt4G@7FtFQ8R^(+^f@EDtQR+SkJpfzWP3}gQ z+HMES)4-TbKh0N1uUZCOxrOZAS5G=V-UwXw$H%|PYmu&B++@`$WxyJ`c;bz1D#G}Y zjtMJn)F3^0`0*3CFz2zr!tIyXrv8RX6i z4HB^aP94o)WH72_H|MbLFfycQga)ra)nH;u%J-#3hyhA=ea;F?J zmfUs%z!Eb|Q{x4K*ig1JQ(N6tsuD2up_fJ*vF3*8+TcSdX|(}m;%_Ki=hyrI|HIj? z(uG_Er3JqhhmQ`B$BADVYp(TUgMI1%p9$cuQzKZwtsMAW7YWci6!T@5W`S4>7l6@} z?hWw^Qtx(aEcx-jq=c2f%m6=|%4O+TG*h31z+#&17aY#GR7mK}^de&Rr7*^bxnhI+ zm*yfN*{s=bi{$YIZ%`H*hmaPK$xO+?E8@OinV*eFruYd*h_P(~UAu7czI|MX;FJiw zzzu(R`QOsd|Gzemy0kl-jt5sLw^QlP)o2IW^`By&iHXDhlWq!Tz6nIOijMTKTN?@q zV(lX*Sp@U{OJL2>S_SiCi6$l_Bgbw_2wyyXgrZ+8RGVrD73cvXCRARIcxWXUhNdj8 z1<&FFs)#BRHmiINwL2F55J{=(nC8>#7*k3br(9V=z!E)-WH8(k zDbE8R?hw-*in!AP9W6ZgA^P-bVIM^60P)SHq^V?ysKJ%AFvtGxI7_5p))xq-Bx*v_ zJ;O)si3RcP06>H(ZS8M703wAFDbeZyhUIHGrL6WzMq!7l(v0tw;dH5k%IAzxvu)@Pr61WL8SMSe-7iSjr#L^$%RzT*FD{@ zu#o}ya9mdcvsgILN|=(C_ z`jM{`d&{e_AeHUW`>Z3tI+q$e?^E~PWsULy8LEF@wpriDAL9T7J+)dWd z3`4s%tfrCqbHlF`4GrxZ>LfpXs>Umt2KH&RnZjLAGc%V0-qAUCP>z-Jc-2Z)Uawp- z!3?`W?EsVt#kw~#(BKL&T>TaL`(1Av(#8S%j#bRTyX0lqc6D7gY|kjuuvl@HhbK6B zhJc4*v2k}Zwfg*(zHnO}B~xErfKSDb(RHN_-8cu4>L0`0GoiS>J2ZOrt2wm3*j3{k zZFxuhRwoW$fgBt^oI0ZFb>_1I4wyV?Zxq_F)P09db7~|;W0GZh;lUDAq!nv z8f)1bnsYSb^Qu)h%F+*?qGntocLrT}ASv3XP(d8kU58AV_s;T9z0;0IQ*9?L6Wo#U zh<=^``yk3P0O*nuDcjz9H4?=<0bC#Ddmnx>hm+5^?2N9x$C}DRgXz1(Tu8xFKwoW1 zD!i7|gJm*H<7$NBD;0j7(VdoWE7X1x{dP%uRAFc!wNe8txOQxFzcDuI$+ki0pC)X( zUYj)hrQ7m$x9OoP;zq2u_F7K+@N02}(Fs8PT|r8lsfGLkJ@g*aQ3K5rZl4NUUX7Gd z52fctiKx;UEJBY={;Z~MGE(zV$7?oWrp;x+_9Z{FuWqBI#FoBRKS}0M3`Z(_oOV(C z-qFeDo}v}Z#QZ#kh-Fkr`t}ySJx3z;K4F=F-H2~C8`EN_#1E+gZ=FklNNQ3hy|hbi zL#k@^@;5)|C8=QEiP#Ciahfm80E9H0Ygz0qQ; zYVfvTOUgw>uSJ=fQMzcu`5NY1A%Ymi5Ide1HLTj24-Rn4T1jGz2u(4=&cRSB`mk!+$_ zWw9%sz0Xs$AvOCbj>Um(&Z-r3ZmGV2g>&0b-I&$?jp!at8UCA2V`?1&wZ({Ybd5{G zAcy*JI(83j$@T=|iao^!u|eQ4HH>_#E!=QkRa^C9V9Z2Xv~DuLxi6fDUnz0enU!hD zI3(b>h-$GC6=BL*xg~OzAcXR`0tEI681P}G3U~E zhI`d7%cnJ!oV5;;XEr%*h1zwCFL^c{Lsl+)*91w_5VUgr*zisn5Idb49CiawDj9Ya zCck-iu-=d=#MJv?ZMQ8Ki2PDEnS5s0@O6$fYd0|Gp`)a~<%n8aXO?3X-zBD8HPeKL zwD$umwBqn9FTYGp1xLa@=Q!Ex?{(6t2aj#9H5JsjBqrYpJ)}MMu}$&(?R}5M zQ3Bi!a-Qq?prI8<`gvfPm18wa{ZXiw8|_vcn4d(z?5f4EkU&szA%nEelEM=7 zr*FgWOT!nA`qW(-H@%B3Ea;iPB0nF@jMQlTJByOuubf27I-)K8YA*6;mgk!|zWNAWlTCz)m9lep`j+HntNmPM(;}qX~w%sSUbx)a-73E=HRk zPTAO6)oJx0rq(G#7BoC`u<;W{Nti{S=nvo!q~tLFlLA3aa)D zzsIHn*_&gx?n2&-Q|8@JOeCslj3xkr4W9kCJ&$jO|InwHMIGiACScJWpGpUTUJ#vB zNWF}QK~i9VC<8Dnn2>ip5wNRvj?%(TYyG8vkQ2Dm?mBl8{fo1w zjVyOXMSE8z82ta;&Hb+`{4b5c#|COBV`nGN7iP)EJhRtsOAB`Xb5={rGuOu^K|byU zmPi1k)DOsqV`TcfEaFEM4dg|tem^h&wcdvD7U$R9QK<0kPe`9s( z8H;KH6H3vL0lET0HKFQx_gp>IRWgJKWduo#AzIJjxEOT+Q6^$>(U%g4@3=@!7OVsz zR9FD!aB7rLb<=y@#-I_HR5UdzdxK~XI$%c{*S-}RwZSxvMcP?ANq`FY#O7-Xj4;@p zzjAQz@;tBZ{=B6A3LrXr*|SiekKx#uk+z76khV;tLQ6NW=9nc8R~Q5UxJK|mXhrcn z@@$kT16A4@u-uPmfV`K!rlfeA%noN38=AKMvz$^0!tL<0I32%mXGB7D{I%KY$ z=$w4-HYfBvI+qrQmSse;D`Ekn@0RdD81a8dD9yfzWn|b;%WisPjoBW2M%U|H1W@&0 z)_~xKa_&AH6{f^$Iwl8`X&nq{)yzCE$+5r z<<#W*!F#}C(+Vq2j%)n>=R@4w{!S-`l8sHfG6IBK=4{Sm~ZP-=lpSq8UtJ zYCvp?x=7;IwSZ9#svLSMNkT~WEL*vENB;nVV|lcx$^2CGLV+ox>mX?dA4vw2+r_Uc zZC>S3wG4UP!%n?ADiyoA=MV$4dDeH7x5@jFb8Z?0vUi8>)UzGOwHYVhAYI=)k`$-a z$3C9EF-3(bqrmb}7AwNaoril`?)=*-Z<9Nz1Sf|AgNB};L@Gplmt?dajcj8pJn=M7a`Qe$l0z%fM8r0Bd@Z^ z?%lMdk%>I+Ld*%l`fg2r9I)uD)54^X0?zF{VL0k=3KAf%H?|+QHKEy)tiHES8?AFd z;(0!8`{;k)7o@tahnKVUr;82|h(@n!0$$d6d_8RW;Z%L-b+h_z(GwkJaD61@>$4!5 z9hWn5g_?FaUmmFSxVBRq!#1(q4egPsVsjM$^Ps|<9z@IB$PHmep@e9#nS9;~hupgF z_xDLOx>5VD8;NY0+N_8a=1PvtxX}3V*ccy#6ob4vom+bJmsnHZ8A~MJNEv?&!HL{R zZ3=HSTdCGSFA)3HX%_wEp%A)VAnn~LaPr%a?P9ac@Ja0&_d+;DU<}D}@9LH-ZC7kl z=2hz}g}%eKQ^p>S4k`oQ7GYU~OW{V_nn&xUQWe`}ZEQhO>%y~YH_|JMW7JF=@0cDR z!`+VPwck7wWq9|Ael>?Ei@U8hnnq8MbuX--c%gK!$D@Q!GP-m< zipn76N_myou;Ph1=(cz-(umv1o}RQ>&Muusa^f!ey1p%h3YR?n%Aapv!n>2$T!a5X zcS86ZO)T^m-l2UCzQuy^-bKhHJxMl)hk#N+pmeo;O8;rafp4G@^dWW%d3l1$V1;&B z)A>j(?$GG7>C7gcmQi&XSM&th(`7yA27X!dpwKmYjdyqu!w<4)qqv z*u|(l5$ct-gv#cL7!P4~nIP|(!;$@ERPHK9V=ejapD=*J@>e(j;XaWH>ueAG(;SzW zZk_WruhD1DuX{&P;S1*n+H`-4p)68&S`nT6yl2x{&xQ}f0uMBEUA*)szlo8TtEt*A zuB(F_;af+y@1RmBB&o|~**j+rRckWa&lwOQ>`Hv!-_%t{0wRb_&;bYNS9=ek_crvn z6A@-e(dnBsOOai*l=Jl@Oc|A&F=VB+ZKVsU`aOTSEQ|&^S;)ox=*UWz2FVsQVl6YJxigY_quCH z>jF2ofi5fUbNrP7I%vu+htt5Vs}ugBa)Ysj=ji;XFc;^o#Ld?)ep680y?nXxwDv zt+mc$L`Jr4LmQEyj(q=NPia3RqZ7hjy4g3}XBNelWOpvPsSV@n+btVa)dD1r=GMV_ zX_Ko8+n3YoCE3^w><-ZAhN=PW`5YujhRKHJUdU)@s;dJ(VZ%~5N8(?y79X$aN5x*P%>RBS9sdI#jc z6CgEKAOWqG4CQZIY1%wisu{PPi-nIpiRGBvX`WvHVvGLaw@owf>#mOX?G8ts}hcRR#>O& z`&)^HQ@L%?y{~xua25v~DLVa3!qj_zfv0m1I#k=w6w1;X^T7lkM&&2Foc^V3ZuHh?G<|xnYl1wR%R{pcIkm{RB<+8YJh+4-q9NPjGa`- zME=GOwF<;oM>HI*y7+N+)okN6()u@f7(zWO>E4f=xwf-RUpPLNfJ1IY~jncst!c|b2 zR_a_Xce0(@gQ+}UqwWg{>-HZ3Ilt43H)Mxz$LCjr{e_hV`SEra+oe`wCvlC)d7P`x zAEZK3r~?)eKv&EZl|9!0m8px2`Xa_8Y8gt}k}$hYPWUuN{rJT5Gf zadD5GErXM1K)peYmj{iDdr@APx2fVVTzI(!lyM;$Okjv_=y!ZM#Qxa*&^@_I^4M7$ z>*}4*%*eY^xbj8C#y%a&PYC(lm{S-^ewfb5vy`lthLp=H)jkhNnK=GfU3frxQM$u$ zSb}RCZcF--xAd~TJIw=MAZT06>5k5EzW&92Y$|P zxsmGmzR{2gI<&2td^)vWXnnv#Yz!cIzRdsIpNTODkvF%NVQA1#`BzgCkr~srI5NKM zMQkCMw&NLR{xHyXMg_E>*W=A$RYUqJHZTGIiiqmv6f@-iNcSAhq4^KLrZ+i(-fw0t zW0Jr957C(@J|YJN0~C0ViZ*wij`o?Z5&3jPe^#|rt+0P;qW z8pNCW0#Id&A%?9Y(&3X~oPz~M{6E+}O&qT0tM>+>HVTY26c(o?vJ={=xGaqc-i>8M zxZql3pi70#0-?2N@R52%On6%?LI9%v1d8#9+h^K*{EvpI)#CQdyrg2D%m^U}E3oR9 z_vuPjd~1q~_>`8lF~MoEXb5H;!1UE|GtY+LskVyde}1qPm_neCZ%dH*E9b9zUyt59 t;Hy8UHYL5)rI#o8UyA+z+Pt*?jMPW.*?)'" +) +CONNECTION_INVALID_HOSTNAME_REGEX = re.compile( + "Unknown Doris server host '(?P.*?)'" +) +CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile("Unknown database '(?P.*?)'") +CONNECTION_HOST_DOWN_REGEX = re.compile( + "Can't connect to Doris server on '(?P.*?)'" +) +SYNTAX_ERROR_REGEX = re.compile( + "check the manual that corresponds to your MySQL server " + "version for the right syntax to use near '(?P.*)" +) + +logger = logging.getLogger(__name__) + + +class TINYINT(Integer): + __visit_name__ = "TINYINT" + + +class LARGEINT(Integer): + __visit_name__ = "LARGEINT" + + +class DOUBLE(Float): + __visit_name__ = "DOUBLE" + + +class HLL(Numeric): + __visit_name__ = "HLL" + + +class BITMAP(Numeric): + __visit_name__ = "BITMAP" + + +class QuantileState(Numeric): + __visit_name__ = "QUANTILE_STATE" + + +class AggState(Numeric): + __visit_name__ = "AGG_STATE" + + +class ARRAY(TypeEngine): + __visit_name__ = "ARRAY" + + @property + def python_type(self) -> Optional[type[list[Any]]]: + return list + + +class MAP(TypeEngine): + __visit_name__ = "MAP" + + @property + def python_type(self) -> Optional[type[dict[Any, Any]]]: + return dict + + +class STRUCT(TypeEngine): + __visit_name__ = "STRUCT" + + @property + def python_type(self) -> Optional[type[Any]]: + return None + + +class DorisEngineSpec(MySQLEngineSpec): + engine = "pydoris" + engine_aliases = {"doris"} + engine_name = "Apache Doris" + max_column_name_length = 64 + default_driver = "pydoris" + sqlalchemy_uri_placeholder = ( + "doris://user:password@host:port/catalog.db[?key=value&key=value...]" + ) + encryption_parameters = {"ssl": "0"} + supports_dynamic_schema = True + + column_type_mappings = ( # type: ignore + ( + re.compile(r"^tinyint", re.IGNORECASE), + TINYINT(), + GenericDataType.NUMERIC, + ), + ( + re.compile(r"^largeint", re.IGNORECASE), + LARGEINT(), + GenericDataType.NUMERIC, + ), + ( + re.compile(r"^decimal.*", re.IGNORECASE), + types.DECIMAL(), + GenericDataType.NUMERIC, + ), + ( + re.compile(r"^double", re.IGNORECASE), + DOUBLE(), + GenericDataType.NUMERIC, + ), + ( + re.compile(r"^varchar(\((\d+)\))*$", re.IGNORECASE), + types.VARCHAR(), + GenericDataType.STRING, + ), + ( + re.compile(r"^char(\((\d+)\))*$", re.IGNORECASE), + types.CHAR(), + GenericDataType.STRING, + ), + ( + re.compile(r"^json.*", re.IGNORECASE), + types.JSON(), + GenericDataType.STRING, + ), + ( + re.compile(r"^binary.*", re.IGNORECASE), + types.BINARY(), + GenericDataType.STRING, + ), + ( + re.compile(r"^quantile_state", re.IGNORECASE), + QuantileState(), + GenericDataType.STRING, + ), + ( + re.compile(r"^agg_state.*", re.IGNORECASE), + AggState(), + GenericDataType.STRING, + ), + (re.compile(r"^hll", re.IGNORECASE), HLL(), GenericDataType.STRING), + ( + re.compile(r"^bitmap", re.IGNORECASE), + BITMAP(), + GenericDataType.STRING, + ), + ( + re.compile(r"^array.*", re.IGNORECASE), + ARRAY(), + GenericDataType.STRING, + ), + ( + re.compile(r"^map.*", re.IGNORECASE), + MAP(), + GenericDataType.STRING, + ), + ( + re.compile(r"^struct.*", re.IGNORECASE), + STRUCT(), + GenericDataType.STRING, + ), + ( + re.compile(r"^datetime.*", re.IGNORECASE), + types.DATETIME(), + GenericDataType.STRING, + ), + ( + re.compile(r"^date.*", re.IGNORECASE), + types.DATE(), + GenericDataType.STRING, + ), + ( + re.compile(r"^text.*", re.IGNORECASE), + TEXT(), + GenericDataType.STRING, + ), + ( + re.compile(r"^string.*", re.IGNORECASE), + String(), + GenericDataType.STRING, + ), + ) + + custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { + CONNECTION_ACCESS_DENIED_REGEX: ( + __('Either the username "%(username)s" or the password is incorrect.'), + SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR, + {"invalid": ["username", "password"]}, + ), + CONNECTION_INVALID_HOSTNAME_REGEX: ( + __('Unknown Doris server host "%(hostname)s".'), + SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR, + {"invalid": ["host"]}, + ), + CONNECTION_HOST_DOWN_REGEX: ( + __('The host "%(hostname)s" might be down and can\'t be reached.'), + SupersetErrorType.CONNECTION_HOST_DOWN_ERROR, + {"invalid": ["host", "port"]}, + ), + CONNECTION_UNKNOWN_DATABASE_REGEX: ( + __('Unable to connect to database "%(database)s".'), + SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR, + {"invalid": ["database"]}, + ), + SYNTAX_ERROR_REGEX: ( + __( + 'Please check your query for syntax errors near "%(server_error)s". ' + "Then, try running your query again." + ), + SupersetErrorType.SYNTAX_ERROR, + {}, + ), + } + + @classmethod + def adjust_engine_params( + cls, + uri: URL, + connect_args: dict[str, Any], + catalog: Optional[str] = None, + schema: Optional[str] = None, + ) -> tuple[URL, dict[str, Any]]: + database = uri.database + if schema and database: + schema = parse.quote(schema, safe="") + if "." in database: + database = database.split(".")[0] + "." + schema + else: + database = "internal." + schema + uri = uri.set(database=database) + + return uri, connect_args + + @classmethod + def get_schema_from_engine_params( + cls, + sqlalchemy_uri: URL, + connect_args: dict[str, Any], + ) -> Optional[str]: + """ + Return the configured schema. + + For doris the SQLAlchemy URI looks like this: + + doris://localhost:9030/catalog.database + + """ + database = sqlalchemy_uri.database.strip("/") + + if "." not in database: + return None + + return parse.unquote(database.split(".")[1]) diff --git a/tests/unit_tests/db_engine_specs/test_doris.py b/tests/unit_tests/db_engine_specs/test_doris.py new file mode 100644 index 0000000000..d7444f8d2d --- /dev/null +++ b/tests/unit_tests/db_engine_specs/test_doris.py @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import Any, Optional + +import pytest +from sqlalchemy import JSON, types +from sqlalchemy.engine.url import make_url + +from superset.db_engine_specs.doris import ( + AggState, + ARRAY, + BITMAP, + DOUBLE, + HLL, + LARGEINT, + MAP, + QuantileState, + STRUCT, + TINYINT, +) +from superset.utils.core import GenericDataType +from tests.unit_tests.db_engine_specs.utils import assert_column_spec + + +@pytest.mark.parametrize( + "native_type,sqla_type,attrs,generic_type,is_dttm", + [ + # Numeric + ("tinyint", TINYINT, None, GenericDataType.NUMERIC, False), + ("largeint", LARGEINT, None, GenericDataType.NUMERIC, False), + ("decimal(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, False), + ("decimalv3(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, False), + ("double", DOUBLE, None, GenericDataType.NUMERIC, False), + # String + ("char(10)", types.CHAR, None, GenericDataType.STRING, False), + ("varchar(65533)", types.VARCHAR, None, GenericDataType.STRING, False), + ("binary", types.BINARY, None, GenericDataType.STRING, False), + ("text", types.TEXT, None, GenericDataType.STRING, False), + ("string", types.String, None, GenericDataType.STRING, False), + # Date + ("datetimev2", types.DateTime, None, GenericDataType.STRING, False), + ("datev2", types.Date, None, GenericDataType.STRING, False), + # Complex type + ("array", ARRAY, None, GenericDataType.STRING, False), + ("map", MAP, None, GenericDataType.STRING, False), + ("struct", STRUCT, None, GenericDataType.STRING, False), + ("json", JSON, None, GenericDataType.STRING, False), + ("jsonb", JSON, None, GenericDataType.STRING, False), + ("bitmap", BITMAP, None, GenericDataType.STRING, False), + ("hll", HLL, None, GenericDataType.STRING, False), + ("quantile_state", QuantileState, None, GenericDataType.STRING, False), + ("agg_state", AggState, None, GenericDataType.STRING, False), + ], +) +def test_get_column_spec( + native_type: str, + sqla_type: type[types.TypeEngine], + attrs: Optional[dict[str, Any]], + generic_type: GenericDataType, + is_dttm: bool, +) -> None: + from superset.db_engine_specs.doris import DorisEngineSpec as spec + + assert_column_spec(spec, native_type, sqla_type, attrs, generic_type, is_dttm) + + +@pytest.mark.parametrize( + "sqlalchemy_uri,connect_args,return_schema,return_connect_args", + [ + ( + "doris://user:password@host/db1", + {"param1": "some_value"}, + "db1", + {"param1": "some_value"}, + ), + ( + "pydoris://user:password@host/db1", + {"param1": "some_value"}, + "db1", + {"param1": "some_value"}, + ), + ( + "doris://user:password@host/catalog1.db1", + {"param1": "some_value"}, + "catalog1.db1", + {"param1": "some_value"}, + ), + ( + "pydoris://user:password@host/catalog1.db1", + {"param1": "some_value"}, + "catalog1.db1", + {"param1": "some_value"}, + ), + ], +) +def test_adjust_engine_params( + sqlalchemy_uri: str, + connect_args: dict[str, Any], + return_schema: str, + return_connect_args: dict[str, Any], +) -> None: + from superset.db_engine_specs.doris import DorisEngineSpec + + url = make_url(sqlalchemy_uri) + returned_url, returned_connect_args = DorisEngineSpec.adjust_engine_params( + url, connect_args + ) + assert returned_url.database == return_schema + assert returned_connect_args == return_connect_args + + +def test_get_schema_from_engine_params() -> None: + """ + Test the ``get_schema_from_engine_params`` method. + """ + from superset.db_engine_specs.doris import DorisEngineSpec + + assert ( + DorisEngineSpec.get_schema_from_engine_params( + make_url("doris://localhost:9030/hive.test"), + {}, + ) + == "test" + ) + + assert ( + DorisEngineSpec.get_schema_from_engine_params( + make_url("doris://localhost:9030/hive"), + {}, + ) + is None + )