{"id":465487,"date":"2025-07-01T15:19:48","date_gmt":"2025-07-01T15:19:48","guid":{"rendered":"http:\/\/savepearlharbor.com\/?p=465487"},"modified":"-0001-11-30T00:00:00","modified_gmt":"-0001-11-29T21:00:00","slug":"","status":"publish","type":"post","link":"https:\/\/savepearlharbor.com\/?p=465487","title":{"rendered":"<span>\u0427\u0442\u043e \u043d\u043e\u0432\u043e\u0433\u043e \u0432 Apache Spark 4.0<\/span>"},"content":{"rendered":"<div><!--[--><!--]--><\/div>\n<div id=\"post-content-body\">\n<div>\n<div class=\"article-formatted-body article-formatted-body article-formatted-body_version-2\">\n<div xmlns=\"http:\/\/www.w3.org\/1999\/xhtml\">\n<p>\u041f\u0440\u0438\u0432\u0435\u0442, \u0425\u0430\u0431\u0440! \u042f \u0410\u043b\u0435\u043a\u0441\u0435\u0439 \u0421\u043a\u0430\u0445\u0438\u043d, \u0438\u043d\u0436\u0435\u043d\u0435\u0440 \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 \u00ab\u0414\u0410\u0420\u00bb (\u0413\u041a \u00ab\u041a\u041e\u0420\u0423\u0421 \u041a\u043e\u043d\u0441\u0430\u043b\u0442\u0438\u043d\u0433\u00bb). Apache Spark \u2014 \u044d\u0442\u043e \u043c\u043e\u0449\u043d\u044b\u0439 \u0444\u0440\u0435\u0439\u043c\u0432\u043e\u0440\u043a \u0434\u043b\u044f \u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b\u0451\u043d\u043d\u043e\u0439 \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u0431\u043e\u043b\u044c\u0448\u0438\u0445 \u043e\u0431\u044a\u0451\u043c\u043e\u0432 \u0434\u0430\u043d\u043d\u044b\u0445, \u043f\u043e\u0437\u0432\u043e\u043b\u044f\u044e\u0449\u0438\u0439 \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0442\u044c \u0441\u043b\u043e\u0436\u043d\u044b\u0435 \u0432\u044b\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u044f \u043d\u0430 \u043a\u043b\u0430\u0441\u0442\u0435\u0440\u0430\u0445 \u043a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u043e\u0432 \u0441 \u0432\u044b\u0441\u043e\u043a\u043e\u0439 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c\u044e \u0438 \u0433\u0438\u0431\u043a\u043e\u0441\u0442\u044c\u044e.  <\/p>\n<figure class=\"full-width\"><img decoding=\"async\" src=\"https:\/\/habrastorage.org\/r\/w1560\/getpro\/habr\/upload_files\/c49\/086\/bcc\/c49086bcc5a1f6240ec0ac4e6a9125a1.jpg\" alt=\"Apache Spark 4 released\" title=\"Apache Spark 4 released\" width=\"1280\" height=\"853\" sizes=\"auto, (max-width: 780px) 100vw, 50vw\" srcset=\"https:\/\/habrastorage.org\/r\/w780\/getpro\/habr\/upload_files\/c49\/086\/bcc\/c49086bcc5a1f6240ec0ac4e6a9125a1.jpg 780w,&#10;       https:\/\/habrastorage.org\/r\/w1560\/getpro\/habr\/upload_files\/c49\/086\/bcc\/c49086bcc5a1f6240ec0ac4e6a9125a1.jpg 781w\" loading=\"lazy\" decode=\"async\"\/><\/p>\n<div><figcaption>Apache Spark 4 released<\/figcaption><\/div>\n<\/figure>\n<p>\u0418 \u0432\u043e\u0442 23 \u043c\u0430\u044f 2025 \u0433\u043e\u0434\u0430 \u043a\u043e\u043c\u043f\u0430\u043d\u0438\u044f Apache \u0432\u044b\u043f\u0443\u0441\u0442\u0438\u043b\u0430\u00a0<strong>\u043d\u043e\u0432\u0443\u044e \u0432\u0435\u0440\u0441\u0438\u044e<\/strong>\u00a0Spark 4.<\/p>\n<p>\u0421\u0442\u043e\u0438\u0442 \u043e\u0442\u043c\u0435\u0442\u0438\u0442\u044c, \u0447\u0442\u043e Apache Spark \u2014 \u043c\u0430\u0441\u0448\u0442\u0430\u0431\u043d\u044b\u0439 \u0444\u0440\u0435\u0439\u043c\u0432\u043e\u0440\u043a \u0441 \u0448\u0438\u0440\u043e\u043a\u0438\u043c \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b\u043e\u043c. \u0412 \u0434\u0430\u043d\u043d\u043e\u0439 \u0441\u0442\u0430\u0442\u044c\u0435 \u044f \u0441\u043e\u0441\u0440\u0435\u0434\u043e\u0442\u043e\u0447\u0443\u0441\u044c \u043d\u0430 \u043d\u043e\u0432\u043e\u0432\u0432\u0435\u0434\u0435\u043d\u0438\u044f\u0445, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0432 \u043f\u0435\u0440\u0432\u0443\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c \u0437\u0430\u0442\u0440\u043e\u043d\u0443\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435\u0439 Spark SQL \u0438 PySpark.<\/p>\n<p>\u041f\u043e\u043b\u043d\u044b\u0439 \u0441\u043f\u0438\u0441\u043e\u043a 5100 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u0439 \u043e\u0442 390 \u043a\u043e\u043d\u0442\u0440\u0438\u0431\u044c\u044e\u0442\u043e\u0440\u043e\u0432 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d <a href=\"https:\/\/spark.apache.org\/releases\/spark-release-4-0-0.html\" rel=\"noopener noreferrer nofollow\">\u043d\u0430 \u043e\u0444\u0438\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u043c \u0441\u0430\u0439\u0442\u0435<\/a>.<\/p>\n<h2>\u0418\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u0432 Spark SQL<\/h2>\n<p>\u0421 \u043c\u043e\u0435\u0439 \u0442\u043e\u0447\u043a\u0438 \u0437\u0440\u0435\u043d\u0438\u044f, \u043e\u0441\u043d\u043e\u0432\u043d\u044b\u0435 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u043f\u0440\u043e\u0438\u0437\u043e\u0448\u043b\u0438 \u0432 Spark SQL. \u041f\u043e\u044f\u0432\u043b\u044f\u044e\u0442\u0441\u044f \u0437\u0430\u0447\u0430\u0442\u043a\u0438 \u043f\u0440\u043e\u0446\u0435\u0434\u0443\u0440\u043d\u043e\u0433\u043e \u0440\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u044f SQL.<\/p>\n<h2>[SPARK-42849] Session Variables<\/h2>\n<p> SQL \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0435, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u043c\u043e\u0436\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0432 \u0442\u0435\u0447\u0435\u043d\u0438\u0435 \u0432\u0441\u0435\u0439 spark \u0441\u0435\u0441\u0441\u0438\u0438.<\/p>\n<p>\u0414\u0430\u043d\u043d\u044b\u0439 \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b \u0431\u0443\u0434\u0435\u0442 \u043f\u043e\u043b\u0435\u0437\u0435\u043d \u0434\u043b\u044f \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432, \u0433\u0434\u0435 \u0432 \u043f\u0435\u0440\u0432\u0443\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f Spark SQL \u0434\u043b\u044f \u0440\u0430\u0441\u0447\u0435\u0442\u0430 \u0432\u0438\u0442\u0440\u0438\u043d.<\/p>\n<p>\u041a \u043f\u0440\u0438\u043c\u0435\u0440\u0443 \u043e\u0442\u0447\u0435\u0442, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0445\u0440\u0430\u043d\u0438\u0442\u0441\u044f \u0432 \u0432\u0438\u0434\u0435 Spark SQL \u0444\u0430\u0439\u043b\u0430. \u0422\u0435\u043f\u0435\u0440\u044c \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440 \u0440\u0430\u0441\u0447\u0435\u0442\u0430 \u043c\u043e\u0436\u043d\u043e \u043f\u043e\u043b\u0443\u0447\u0438\u0442\u044c 1 \u0440\u0430\u0437 \u0438 \u043f\u0435\u0440\u0435\u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0435\u0433\u043e \u0432 \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u0438\u0445 \u043c\u0435\u0441\u0442\u0430\u0445:<\/p>\n<pre><code class=\"sql\">--\u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u043a\u0430\u043a \u043a\u043e\u043d\u0441\u0442\u0430\u043d\u0442\u043e\u0439, \u0442\u0430\u043a \u0438 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c \u0437\u0430\u043f\u0440\u043e\u0441\u0430: SET start_date = (select value from settings where name = 'last_copy') ; --\u0434\u0430\u043b\u0435\u0435 \u0444\u0438\u043b\u044c\u0442\u0440\u0443\u0435\u043c \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u0439 select ${start_date} ; --\u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0430 \u0437\u0430\u043a\u0430\u0437\u043e\u0432 SELECT *  FROM orders  WHERE order_date &gt; ${start_date} ; --\u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0430 \u043f\u0440\u043e\u0434\u0430\u0436 SELECT *  FROM sales  WHERE sale_date &gt; ${start_date}<\/code><\/pre>\n<p>\u0412\u044b\u0437\u043e\u0432 \u0434\u0430\u043d\u043d\u043e\u0433\u043e SQL:<\/p>\n<pre><code class=\"python\">[spark.sql(s).show() for s in sql.split(\";\")]  +----------+--------------------+ |       key|               value| +----------+--------------------+ |start_date|(select value fro...| +----------+--------------------+  +----------------+ |scalarsubquery()| +----------------+ |      2023-06-02| +----------------+  +---+----------+ | id|order_date| +---+----------+ |  3|2023-06-03| |  4|2023-06-04| |  5|2023-06-05| +---+----------+  +---+----------+ | id| sale_date| +---+----------+ | 30|2023-06-03| | 40|2023-06-04| | 50|2023-06-05| |  3|2025-06-03| +---+----------+<\/code><\/pre>\n<p>\u0421\u0442\u043e\u0438\u0442 \u0437\u0430\u043c\u0435\u0442\u0438\u0442\u044c, \u0447\u0442\u043e \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440 \u043d\u0435 \u0440\u0430\u0441\u0441\u0447\u0438\u0442\u044b\u0432\u0430\u0435\u0442\u0441\u044f, \u0430 \u043f\u0435\u0440\u0435\u0434\u0430\u0435\u0442\u0441\u044f \u043a\u0430\u0436\u0434\u044b\u0439 \u0440\u0430\u0437 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435\u043c \u0432 \u0437\u0430\u043f\u0440\u043e\u0441, \u0447\u0442\u043e \u0441\u043d\u0438\u0436\u0430\u0435\u0442 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f, \u0435\u0441\u043b\u0438 \u0432 \u0440\u0430\u0441\u0447\u0435\u0442\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u0439 \u0441\u043b\u043e\u0436\u043d\u043e\u0435 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435.<br \/>\u042d\u0442\u043e \u0432\u0438\u0434\u043d\u043e \u043f\u043e \u043f\u043b\u0430\u043d\u0443 \u0437\u0430\u043f\u0440\u043e\u0441\u0435, \u0433\u0434\u0435 \u0432\u043c\u0435\u0441\u0442\u043e \u043a\u043e\u043d\u043a\u0440\u0435\u0442\u043d\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u043f\u043e\u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f subquery<\/p>\n<pre><code class=\"sql\">explain SELECT *  FROM sales  WHERE sale_date &gt; ${start_date} ;  == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- Filter (isnotnull(sale_date#3) AND (sale_date#3 &gt; Subquery subquery#201, [id=#465]))    :  +- Subquery subquery#201, [id=#465]    :     +- AdaptiveSparkPlan isFinalPlan=false    :        +- Project [value#5]    :           +- Filter (isnotnull(name#4) AND (name#4 = last_copy))    :              +- Scan ExistingRDD[name#4,value#5]    +- Scan ExistingRDD[id#2L,sale_date#3]<\/code><\/pre>\n<h2>[SPARK-46246] EXECUTE IMMEDIATESQL support<\/h2>\n<p>\u0412\u044b\u0437\u043e\u0432 SQL \u0441 \u043f\u0435\u0440\u0435\u0434\u0430\u0447\u0435\u0439 \u043f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0445 <a href=\"https:\/\/spark.apache.org\/docs\/4.0.0\/sql-ref-syntax-aux-exec-imm.html\" rel=\"noopener noreferrer nofollow\">\u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u043e\u0432<\/a>:<\/p>\n<pre><code class=\"pgsql\">DECLARE sqlStr1 = 'SELECT SUM(col1) FROM VALUES(?), (?)'; DECLARE arg10 = 5; DECLARE arg20 = 6; EXECUTE IMMEDIATE sqlStr1 USING arg10, arg20;  +---------+ |sum(col1)| +---------+ |       11| +---------+<\/code><\/pre>\n<p>Declare \u0438 set \u043c\u043e\u0436\u043d\u043e \u043a\u043e\u043c\u0431\u0438\u043d\u0438\u0440\u043e\u0432\u0430\u0442\u044c<\/p>\n<pre><code class=\"pgsql\">DECLARE sqlStr1 = 'SELECT SUM(col1) FROM VALUES(?), (?)'; SET test_arg = 123; DECLARE arg1 = ${test_arg}; EXECUTE IMMEDIATE sqlStr1 USING ${test_arg}, arg1;  +---------+ |sum(col1)| +---------+ |      246| +---------+<\/code><\/pre>\n<p>\u0412 \u0431\u0438\u043d\u0434 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0443\u044e \u043d\u0435\u043b\u044c\u0437\u044f \u043f\u0435\u0440\u0435\u0434\u0430\u0442\u044c \u0441\u0435\u0441\u0441\u0438\u043e\u043d\u0443\u044e \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0443\u044e \u0441 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435\u043c:<\/p>\n<pre><code class=\"pgsql\">SET test_arg = (select 1);  EXECUTE IMMEDIATE sqlStr1 USING ${test_arg}, arg1;  # A query parameter contains unsupported expression. # Parameters can either be variables or literals<\/code><\/pre>\n<p>\u0418\u0437-\u0437\u0430 \u0442\u043e\u0433\u043e \u0447\u0442\u043e SQL \u0438 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0435 \u0432 EXECUTE IMMEDIATE \u0434\u043e\u043b\u0436\u043d\u044b \u0431\u044b\u0442\u044c \u0441\u0442\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u043c\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f\u043c\u0438, \u043f\u043e\u043a\u0430 \u0447\u0442\u043e \u044d\u0442\u043e \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435 \u0437\u0430\u0442\u0440\u0443\u0434\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0434\u043b\u044f \u0434\u0438\u043d\u0430\u043c\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u0444\u043e\u0440\u043c\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432.<\/p>\n<p>\u0414\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e \u043d\u0430\u043f\u043e\u043c\u043d\u044e, \u0447\u0442\u043e \u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0431\u0438\u043d\u0434\u044b \u043f\u043e\u044f\u0432\u0438\u043b\u0438\u0441\u044c \u0432 PySpark 3.4 (SPARK-41271, SPARK-42702, SPARK-40281)<\/p>\n<pre><code class=\"python\">spark.sql(     sqlText = \"SELECT * FROM tbl WHERE date &gt; :startDate LIMIT :maxRows\",     args = Map(         \"startDate\" -&gt; \"DATE'2022-12-01'\",         \"maxRows\" -&gt; \"100\"))<\/code><\/pre>\n<p>\u0430 \u043f\u043e\u0437\u0438\u0446\u0438\u043e\u043d\u043d\u044b\u0435 \u0432 3.5<\/p>\n<pre><code class=\"python\">spark.sql(\"SELECT * FROM {df} WHERE {df[B]} &gt; ? and ? &lt; {df[A]}\"           , args=[5, 2], df=mydf).show()<\/code><\/pre>\n<h2>[SPARK-46057] Support SQL user-defined functions<\/h2>\n<p>\u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0430 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c \u0441\u043e\u0437\u0434\u0430\u043d\u0438\u044f \u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0444\u0443\u043d\u0446\u0438\u0439 \u0432 Spark SQL.<\/p>\n<p>\u042d\u0442\u043e \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043a\u0430\u043a \u043f\u0440\u043e\u0441\u0442\u044b\u0435 \u043c\u0430\u0442\u0435\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0435 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u044f<\/p>\n<pre><code class=\"pgsql\">CREATE FUNCTION to_hex(x INT COMMENT 'Any number between 0 - 255')   RETURNS STRING   COMMENT 'Converts a decimal to a hexadecimal'   CONTAINS SQL DETERMINISTIC   RETURN lpad(hex(least(greatest(0, x), 255)), 2, 0) ; SELECT to_hex(id) FROM range(2) ; +--------------------------------+ |spark_catalog.default.to_hex(id)| +--------------------------------+ |                              00| |                              01| +--------------------------------+  EXPLAIN SELECT to_hex(id) FROM range(2); == Physical Plan == *(1) Project [lpad(hex(cast(least(greatest(0, cast(id#225L as int)), 255) as bigint)), 2, 0) AS spark_catalog.default.to_hex(id)#226] +- *(1) Range (0, 2, step=1, splits=20)<\/code><\/pre>\n<p>\u0422\u0430\u043a \u0438 \u043e\u0431\u0440\u0430\u0449\u0435\u043d\u0438\u044f \u043a \u0434\u0440\u0443\u0433\u0438\u043c \u0442\u0430\u0431\u043b\u0438\u0446\u0430\u043c \u0437\u0430 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c.<\/p>\n<p>\u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u0442\u0430\u0431\u043b\u0438\u0446\u0430:<\/p>\n<pre><code class=\"pgsql\">CREATE OR REPLACE TEMPORARY FUNCTION       get_order_name(order_id INT COMMENT 'Order ID')     RETURNS TABLE(name STRING COMMENT 'order name')    READS SQL DATA SQL SECURITY DEFINER    COMMENT 'Get order name by ID'    RETURN SELECT order_name FROM orders WHERE id = get_order_name.order_id ;  SELECT id, get_order_name.name      FROM VALUES(1),                (3) AS orders(id),          LATERAL get_order_name(orders.id) ;  +---+------+ | id|  name| +---+------+ |  1|name 1| |  3|name 3| +---+------+<\/code><\/pre>\n<p>\u041d\u043e \u0441\u0442\u043e\u0438\u0442 \u0437\u0430\u043c\u0435\u0442\u0438\u0442\u044c, \u0447\u0442\u043e \u043a\u0430\u043a \u0438 \u0441\u0435\u0441\u0441\u0438\u043e\u043d\u043d\u044b\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0435 \u043f\u043e \u0441\u0443\u0442\u0438 \u044d\u0442\u043e \u043f\u043e\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u043c\u044b\u0435 \u0448\u0430\u0431\u043b\u043e\u043d\u044b \u043f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0445 SQL:<\/p>\n<pre><code class=\"sql\">explain SELECT id, get_order_name.name      FROM VALUES(1),                (3) AS orders(id),          LATERAL get_order_name(orders.id) ;  == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- Project [id#212, name#218]    +- BroadcastHashJoin [cast(id#212 as bigint)], [id#86L], Inner, BuildLeft, false       :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=532]       :  +- LocalTableScan [id#212]       +- Project [order_name#88 AS name#218, id#86L]          +- Filter isnotnull(id#86L)             +- Scan ExistingRDD[id#86L,order_date#87,order_name#88]<\/code><\/pre>\n<p>\u041d\u0430 \u0442\u0435\u043a\u0443\u0449\u0438\u0439 \u043c\u043e\u043c\u0435\u043d\u0442 \u043d\u0435\u0442 \u0446\u0438\u043a\u043b\u043e\u0432, \u0443\u0441\u043b\u043e\u0432\u0438\u0439 \u0438\u043b\u0438 \u0434\u0438\u043d\u0430\u043c\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e SQL.<\/p>\n<p>\u0414\u0440\u0443\u0433\u0438\u0435 \u0432\u0430\u0440\u0438\u0430\u0446\u0438\u0438 \u0444\u0443\u043d\u043a\u0446\u0438\u0439 \u043c\u043e\u0436\u043d\u043e \u0432\u0438\u0434\u0435\u0442\u044c \u0432 \u0442\u0435\u0441\u0442\u0430\u0445: <a href=\"https:\/\/github.com\/apache\/spark\/blob\/master\/sql\/core\/src\/test\/resources\/sql-tests\/inputs\/sql-udf.sql\" rel=\"noopener noreferrer nofollow\">https:\/\/github.com\/apache\/spark\/blob\/master\/sql\/core\/src\/test\/resources\/sql-tests\/inputs\/sql-udf.sql<\/a><\/p>\n<h2>[SPARK-44444] Use ANSI SQL mode by default<\/h2>\n<p>\u0422\u0435\u043f\u0435\u0440\u044c Spark SQL \u043f\u043e \u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442 <a href=\"https:\/\/spark.apache.org\/docs\/latest\/sql-ref-ansi-compliance.html\" rel=\"noopener noreferrer nofollow\">ANSI \u0441\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0441 SQL<\/a><\/p>\n<p>\u041c\u043d\u043e\u0436\u0435\u0441\u0442\u0432\u043e \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0440\u0430\u043d\u044c\u0448\u0435 \u0434\u0430\u0432\u0430\u043b\u0438 NULL \u043d\u0430 \u0447\u0430\u0441\u0442\u0438 \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432, \u0442\u0435\u043f\u0435\u0440\u044c \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u044e\u0442 exception:<\/p>\n<pre><code class=\"python\">spark.conf.set(\"spark.sql.ansi.enabled\", \"true\") #default  spark.sql(\"select 1\/0\").show() # ArithmeticException: [DIVIDE_BY_ZERO] Division by zero.  # Use try_divide to tolerate divisor being 0 and return NULL instead.  # If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.  # SQLSTATE: 22012  spark.sql(\"select cast('test' as int)\").show() # NumberFormatException: [CAST_INVALID_INPUT] The value 'test' of the type \"STRING\" cannot be cast to \"INT\" because it is malformed.  # Correct the value as per the syntax, or change its target type.  # Use try_cast to tolerate malformed input and return NULL instead.  # SQLSTATE: 22018<\/code><\/pre>\n<p>\u041a\u0430\u043a \u0431\u044b\u043b\u043e \u0440\u0430\u043d\u044c\u0448\u0435:<\/p>\n<pre><code class=\"python\">spark.conf.set(\"spark.sql.ansi.enabled\", \"false\")  spark.sql(\"select 1\/0\").show() +-------+ |(1 \/ 0)| +-------+ |   NULL| +-------+  spark.sql(\"select cast('test' as int)\").show() +-----------------+ |CAST(test AS INT)| +-----------------+ |             NULL| +-----------------+<\/code><\/pre>\n<h2>[SPARK-49555] SQL Pipe syntax<\/h2>\n<p>\u0420\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u0435 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0435\u0439 <a href=\"https:\/\/spark.apache.org\/docs\/latest\/sql-pipe-syntax.html\" rel=\"noopener noreferrer nofollow\">\u0441\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0441\u0430 SQL<\/a>. \u042d\u0442\u043e \u043d\u0435 \u043d\u043e\u0432\u044b\u0439 \u044f\u0437\u044b\u043a, \u0430 \u043f\u0435\u0440\u0435\u043e\u0441\u043c\u044b\u0441\u043b\u0435\u043d\u0438\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0449\u0435\u0433\u043e GoogleSQL<\/p>\n<p>\u041e\u0441\u043d\u043e\u0432\u043d\u044b\u0435 \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430:<\/p>\n<ul>\n<li>\n<p>multi-level aggregations without subqueries<\/p>\n<\/li>\n<li>\n<p>Filtering anywhere<\/p>\n<\/li>\n<li>\n<p>starting from &#8212; \u201cinside-out\u201d structure <\/p>\n<\/li>\n<\/ul>\n<p>\u041f\u0440\u0438\u043c\u0435\u0440 \u043e\u0431\u044b\u0447\u043d\u043e\u0433\u043e SQL \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442 \u0432\u043b\u043e\u0436\u0435\u043d\u043d\u044b\u0439 \u043f\u043e\u0434\u0437\u0430\u043f\u0440\u043e\u0441\u044b \u0438 \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u0442\u0441\u044f \u0441 \u0441\u0435\u043a\u0446\u0438\u0438 select<\/p>\n<pre><code class=\"sql\">SELECT c_count, COUNT(*) AS custdist FROM ( SELECT c_custkey, COUNT(o_orderkey) c_count FROM customer LEFT OUTER JOIN orders ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%' GROUP BY c_custkey ) AS c_orders GROUP BY c_count ORDER BY custdist DESC, c_count DESC;<\/code><\/pre>\n<p>SQL Pipe syntax \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u0442\u0441\u044f \u0441 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 \u0434\u0430\u043d\u043d\u044b\u0445 \u0438 \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u0435\u0442 \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e \u0431\u0435\u0437 \u043f\u043e\u0434\u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432:<\/p>\n<pre><code class=\"sql\">FROM customer |&gt; LEFT OUTER JOIN orders ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%' |&gt; AGGREGATE COUNT(o_orderkey) c_count GROUP BY c_custkey |&gt; AGGREGATE COUNT(*) AS custdist GROUP BY c_count |&gt; ORDER BY custdist DESC, c_count DESC;<\/code><\/pre>\n<p>\u041f\u043e\u0434\u0440\u043e\u0431\u043d\u0435\u0439 \u043e \u043c\u043e\u0442\u0438\u0432\u0430\u0446\u0438\u0438, \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430\u0445 \u0438 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044f\u0445 \u0432 paper \u043e\u0442 Google: <a href=\"https:\/\/research.google\/pubs\/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql\/\" rel=\"noopener noreferrer nofollow\">https:\/\/research.google\/pubs\/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql\/<\/a><\/p>\n<h2>[SPARK-46908] Support star clause in WHERE clause<\/h2>\n<p>\u0440\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u0435 \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b\u0430 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 \u0432\u0441\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043e\u043a *<\/p>\n<pre><code class=\"sql\">-- \u0432\u043e\u0437\u0432\u0440\u0430\u0442 \u0432\u0441\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043e\u043a, \u043a\u0440\u043e\u043c\u0435 TA.c1 and TB.cb SELECT * EXCEPT (c1, cb)  FROM VALUES(1, 2) AS TA(c1, c2), VALUES('a', 'b') AS TB(ca, cb); +---+---+ | c2| ca| +---+---+ |  2|  a| +---+---+  -- \u043f\u043e\u043b\u0443\u0447\u0438\u0442\u044c \u043f\u0435\u0440\u0432\u0443\u044e \u043d\u0435 \u043f\u0443\u0441\u0442\u0443\u044e \u043a\u043e\u043b\u043e\u043d\u043a\u0443 \u0432  TA SELECT coalesce(TA.*)  FROM VALUES(1, 2) AS TA(c1, c2), VALUES('a', 'b') AS TB(ca, cb); +----------------+ |coalesce(c1, c2)| +----------------+ |               1| +----------------+   -- \u0440\u0430\u0437\u0432\u0435\u0440\u043d\u0443\u0442\u044c \u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u0443 \u0432 \u043a\u043e\u043b\u043e\u043d\u043a\u0438 SELECT c1.* FROM VALUES(named_struct('x', 1, 'y', 2)) AS TA(c1); +---+---+ |  x|  y| +---+---+ |  1|  2| +---+---+  -- \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0430 \u0432\u0441\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043e\u043a \u0432 \u0441\u0442\u0440\u043e\u043a\u0435 \u043d\u0430 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0438\u0435 SELECT * FROM VALUES(1, 2, 3, 4, 5),(1, 2, NULL, 4, 5) AS TA(c1, c2, c3, c4, c)  WHERE array(*) = array(1, 2, NULL, 4, 5); +---+---+----+---+---+ | c1| c2|  c3| c4|  c| +---+---+----+---+---+ |  1|  2|NULL|  4|  5| +---+---+----+---+---+  -- \u043d\u0430\u043b\u0438\u0447\u0438\u0435 \u043a\u043e\u043d\u043a\u0440\u0435\u0442\u043d\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432 \u043b\u044e\u0431\u043e\u0439 \u043a\u043e\u043b\u043e\u043d\u043a\u0435 \u0441\u0442\u0440\u043e\u043a\u0438 SELECT * FROM VALUES(1, 2, 3, 4, 5),(1, 2, NULL, -4, 5) AS TA(c1, c2, c3, c4, c)  WHERE 4 IN (*); +---+---+---+---+---+ | c1| c2| c3| c4|  c| +---+---+---+---+---+ |  1|  2|  3|  4|  5| +---+---+---+---+---+<\/code><\/pre>\n<h2>[SPARK-36680] Support dynamic table options via WITH OPTIONS syntax<\/h2>\n<p>\u043a\u0430\u043a \u044d\u0442\u043e \u0432\u044b\u0433\u043b\u044f\u0434\u0438\u0442 \u0432 PySpark<\/p>\n<pre><code class=\"python\">spark.read.format(\"jdbc\").option(\"fetchSize\", 0).load()<\/code><\/pre>\n<p>\u041a\u0430\u043a \u044d\u0442\u043e \u043c\u043e\u0436\u043d\u043e \u0442\u0435\u043f\u0435\u0440\u044c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0432 SQL<\/p>\n<pre><code class=\"pgsql\">SELECT * FROM jdbcTable WITH OPTIONS(fetchSize = 0)<\/code><\/pre>\n<h2>\u0418\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u0432 PySpark<\/h2>\n<h2>[SPARK-46858] Upgrade Pandas to 2<\/h2>\n<p>\u0423\u0432\u0435\u043b\u0438\u0447\u0435\u043d\u0430 \u0432\u0435\u0440\u0441\u0438\u044f Pandas \u0434\u043e 2<\/p>\n<pre><code class=\"python\">pdf = pd.DataFrame({\"id\": [1, 2, 3], \"value\": [\"a\", \"b\", \"c\"]}) sdf = spark.createDataFrame(pdf) result_pdf = sdf.toPandas() print(result_pdf)<\/code><\/pre>\n<h2>[SPARK-49530] Introducing PySpark Plotting API<\/h2>\n<p>\u041e\u0442\u0440\u0438\u0441\u043e\u0432\u043a\u0430 \u0433\u0440\u0430\u0444\u0438\u043a\u043e\u0432 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f pyspark.pandas<\/p>\n<pre><code class=\"python\">import pandas as pd import numpy as np import pyspark.pandas as ps  spark.conf.set(\"spark.sql.ansi.enabled\", \"false\") pser = pd.Series(np.random.randn(1000),                  index=pd.date_range('1\/1\/2000', periods=1000))  psser = ps.Series(pser) psser = psser.cummax() psser.plot()<\/code><\/pre>\n<figure class=\"full-width\"><img decoding=\"async\" src=\"https:\/\/habrastorage.org\/r\/w1560\/getpro\/habr\/upload_files\/bd9\/a0c\/bed\/bd9a0cbedec1c0c777e089682e5fa069.png\" width=\"1166\" height=\"332\" sizes=\"auto, (max-width: 780px) 100vw, 50vw\" srcset=\"https:\/\/habrastorage.org\/r\/w780\/getpro\/habr\/upload_files\/bd9\/a0c\/bed\/bd9a0cbedec1c0c777e089682e5fa069.png 780w,&#10;       https:\/\/habrastorage.org\/r\/w1560\/getpro\/habr\/upload_files\/bd9\/a0c\/bed\/bd9a0cbedec1c0c777e089682e5fa069.png 781w\" loading=\"lazy\" decode=\"async\"\/><\/figure>\n<h2>[SPARK-43797] Python User-defined Table Functions<\/h2>\n<p>\u0420\u0430\u0437\u0432\u0438\u0442\u0438\u0435 \u043d\u0430\u0447\u0430\u0442\u043e\u0433\u043e \u0432 3.5 SPARK-43798 &#8212; <a href=\"https:\/\/spark.apache.org\/docs\/3.5.3\/api\/python\/user_guide\/sql\/python_udtf.html\" rel=\"noopener noreferrer nofollow\">\u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c \u0441\u043e\u0437\u0434\u0430\u043d\u0438\u044f UDTF<\/a>  (\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u0441\u043a\u0430\u044f \u0444\u0443\u043d\u043a\u0446\u0438\u044f, \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u044e\u0449\u0430\u044f \u0442\u0430\u0431\u043b\u0438\u0446\u0443)<\/p>\n<pre><code class=\"python\">from pyspark.sql.functions import udtf  @udtf(returnType=\"word: string\") class WordSplitter:     def eval(self, text: str):         for word in text.split(\" \"):             yield (word.strip(),)  spark.udtf.register(\"split_words\", WordSplitter)  # Example: Using the UDTF in SQL. spark.sql(\"SELECT * FROM split_words('hello world')\").show()  +-----+ | word| +-----+ |hello| |world| +-----+  from pyspark.sql.functions import lit WordSplitter(lit('hello world')).show()  +-----+ | word| +-----+ |hello| |world| +-----+<\/code><\/pre>\n<h2>[SPARK-50075] DataFrame APIs for table-valued functions<\/h2>\n<p>\u0412\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c <a href=\"https:\/\/spark.apache.org\/docs\/latest\/sql-ref-syntax-qry-select-tvf.html\" rel=\"noopener noreferrer nofollow\">\u0433\u0435\u043d\u0435\u0440\u0430\u0446\u0438\u0438 \u0434\u0430\u043d\u043d\u044b\u0445 \u0435\u0441\u0442\u044c \u0432 SQL<\/a><\/p>\n<pre><code class=\"sql\">SELECT * FROM range(5, 10);<\/code><\/pre>\n<p>\u0422\u0435\u043f\u0435\u0440\u044c \u0435\u0441\u0442\u044c \u0430\u043d\u0430\u043b\u043e\u0433\u0438\u0447\u043d\u044b\u0439 \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b tvf \u0432 Spark Dataframe <\/p>\n<pre><code class=\"python\">spark.tvf.range(10).show()<\/code><\/pre>\n<h2>[SPARK-44076] SPIP: Python Data Source API<\/h2>\n<p>\u041f\u043e\u044f\u0432\u0438\u043b\u0430\u0441\u044c \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c \u0441\u043e\u0437\u0434\u0430\u043d\u0438\u044f \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a\u043e\u0432 \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 PySpark \u0431\u0435\u0437 Scala<\/p>\n<pre><code class=\"python\">from pyspark.sql.datasource import DataSource, DataSourceReader from pyspark.sql.types import StructType  # \u043e\u043f\u0438\u0441\u0430\u043d\u0438\u0435 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a\u0430: \u0435\u0433\u043e \u0441\u0445\u0435\u043c\u044b \u0438 \u043a\u043b\u0430\u0441\u0441\u0430 \u0447\u0438\u0442\u0430\u0442\u0435\u043b\u044f class FakeDataSource(DataSource):       @classmethod     def name(cls):         return \"fake\"      def schema(self):         return \"id int, name string\"      def reader(self, schema: StructType):         return FakeDataSourceReader(schema, self.options)  # \u0447\u0438\u0442\u0430\u0442\u0435\u043b\u044c \u0434\u0430\u043d\u043d\u044b\u0445 \u0441 \u0433\u0435\u043d\u0435\u0440\u0430\u0446\u0438\u0435\u0439 \u0444\u0435\u0439\u043a\u043e\u0432\u044b\u0445 3 \u0441\u0442\u0440\u043e\u043a class FakeDataSourceReader(DataSourceReader):     def init(self, schema, options):         self.schema: StructType = schema         self.options = options      def read(self, partition):         for r in range(3):             yield tuple((r, f\"row_{r}\"))  # \u0440\u0435\u0433\u0438\u0441\u0442\u0440\u0438\u0440\u0443\u0435\u043c \u0434\u0430\u0442\u0430 \u0441\u043e\u0440\u0441 spark.dataSource.register(FakeDataSource)  # \u0447\u0438\u0442\u0430\u0435\u043c \u0434\u0430\u043d\u043d\u044b\u0435 \u0438\u0437 \u0434\u0430\u0442\u0430\u0441\u043e\u0440\u0441\u0430 spark.read.format(\"fake\").load().show()  +---+-----+ | id| name| +---+-----+ |  0|row_0| |  1|row_1| |  2|row_2| +---+-----+<\/code><\/pre>\n<h2>[SPARK-51079] Support large variable types in pandas UDF, createDataFrame and toPandas with Arrow<\/h2>\n<p>\u0420\u0430\u0437\u0432\u0438\u0442\u0438\u0435 SPARK-40307 \u0432 Spark 3.5 \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u043a\u0430 \u0431\u043e\u043b\u044c\u0448\u0435\u0433\u043e \u0447\u0438\u0441\u043b\u0430 Arrow \u0442\u0438\u043f\u043e\u0432 \u043f\u0440\u0438 \u0440\u0430\u0431\u043e\u0442\u0435 PySpark \u0438 Pandas<\/p>\n<pre><code class=\"python\">import numpy as np import pandas as pd  # Enable Arrow-based columnar data transfers spark.conf.set(\"spark.sql.execution.arrow.pyspark.enabled\", \"true\")  # Generate a Pandas DataFrame pdf = pd.DataFrame(np.random.rand(100, 3))  # Create a Spark DataFrame from a Pandas DataFrame using Arrow df = spark.createDataFrame(pdf)  # Convert the Spark DataFrame back to a Pandas DataFrame using Arrow result_pdf = df.select(\"*\").toPandas()<\/code><\/pre>\n<h2>SPARK-50130 Add scalar and exists DataFrame APIs <\/h2>\n<p>\u0420\u0430\u0437\u0432\u0438\u0442\u0438\u0435 PySpark \u0434\u0438\u0430\u043b\u0435\u043a\u0442\u0430 DF \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u043a\u043e\u0439 \u043a\u043e\u0440\u0440\u0435\u043b\u044f\u0446\u0438\u043e\u043d\u043d\u044b\u0445 \u043f\u043e\u0434\u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432.<\/p>\n<p>\u041a\u0430\u043a \u044d\u0442\u043e \u0432\u044b\u0433\u043b\u044f\u0434\u0438\u0442 \u0432 SQL<\/p>\n<pre><code class=\"sql\">select  from orders where exists (select  from sales where orders.id = sales.id)<\/code><\/pre>\n<p>\u0438 \u043a\u0430\u043a \u044d\u0442\u043e \u0442\u0435\u043f\u0435\u0440\u044c \u043c\u043e\u0436\u043d\u043e \u043d\u0430\u043f\u0438\u0441\u0430\u0442\u044c \u0432 PySpark<\/p>\n<pre><code class=\"python\">from pyspark.sql import functions as sf  df_result = df_orders.where(     df_sales.where(sf.col(\"o.id\").outer() == sf.col(\"s.id\")).exists() )  df_result.show()  +---+----------+ | id|order_date| +---+----------+ |  3|2023-06-03| +---+----------+<\/code><\/pre>\n<h2>\u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u0435 \u043d\u043e\u0432\u044b\u0445 \u0442\u0438\u043f\u043e\u0432 \u0434\u0430\u043d\u043d\u044b\u0445<\/h2>\n<h2>[SPARK-45827] Add VARIANT data type<\/h2>\n<p>\u0421\u043f\u0435\u0446\u0438\u0430\u043b\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0439 \u0442\u0438\u043f \u0434\u0430\u043d\u043d\u044b\u0445 \u0434\u043b\u044f <a href=\"https:\/\/spark.apache.org\/docs\/4.0.0\/api\/python\/reference\/pyspark.sql\/api\/pyspark.sql.types.VariantType.html\" rel=\"noopener noreferrer nofollow\">\u0445\u0440\u0430\u043d\u0435\u043d\u0438\u044f \u0441\u043b\u043e\u0436\u043d\u044b\u0445 \u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440<\/a>.<\/p>\n<p>\u0412 \u043f\u0435\u0440\u0432\u0443\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f \u0434\u043b\u044f \u0445\u0440\u0430\u043d\u0435\u043d\u0438\u044f JSON \u0432 \u0440\u0430\u0441\u043f\u043e\u0437\u043d\u0430\u043d\u043d\u043e\u043c \u0432\u0438\u0434\u0435 \u0438 \u0431\u044b\u0441\u0442\u0440\u043e\u0439 \u043e\u0431\u0440\u0430\u0442\u043d\u043e\u0439 \u043a\u043e\u043d\u0432\u0435\u0440\u0442\u0430\u0446\u0438\u0438 \u0432 \u043e\u0431\u044a\u0435\u043a\u0442 python \u0438\u043b\u0438 JSON:<\/p>\n<pre><code class=\"python\">spark.sql(\"\"\"SELECT PARSE_JSON('{\"a\": 1}') variant_col\"\"\").first().variant_col.toPython() {'a': 1}  spark.sql(\"\"\"SELECT PARSE_JSON('{\"a\": 1}') variant_col\"\"\").first().variant_col.toJson() '{\"a\":1}'<\/code><\/pre>\n<p>\u0420\u0430\u0431\u043e\u0442\u0430\u0435\u0442 \u0432 8 \u0440\u0430\u0437 \u0431\u044b\u0441\u0442\u0440\u0435\u0439, \u0447\u0435\u043c \u0442\u0435\u043a\u0441\u0442\u043e\u0432\u043e\u0435 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u0438\u0435 JSON.<\/p>\n<p>\u041d\u0435 \u0432\u0441\u0435 \u043e\u043f\u0435\u0440\u0430\u0446\u0438\u0438 \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u044e\u0442\u0441\u044f, \u0432\u044b\u0431\u043e\u0440\u043a\u0430 \u043a\u043b\u044e\u0447\u0435\u0439 \u043f\u043e \u043f\u0443\u0442\u0438 \u043f\u043e\u043a\u0430 \u0432 <a href=\"https:\/\/github.com\/apache\/spark\/pull\/51190\" rel=\"noopener noreferrer nofollow\">\u0440\u0430\u0437\u0440\u0430\u0431\u043e\u0442\u043a\u0435<\/a>.<\/p>\n<p>\u041d\u0430 \u0442\u0435\u043a\u0443\u0449\u0438\u0439 \u043c\u043e\u043c\u0435\u043d\u0442 \u0431\u0443\u0434\u0435\u0442 \u043e\u0448\u0438\u0431\u043a\u0430:<\/p>\n<pre><code class=\"python\">spark.sql(\"\"\"select variant_col:a from (SELECT PARSE_JSON('{\"a\": 1}') variant_col)\"\"\").show() # ParseException:  # [PARSE_SYNTAX_ERROR] Syntax error at or near ':'. SQLSTATE: 42601 (line 1, pos 18)<\/code><\/pre>\n<h2>[SPARK-44265] Built-in XML data source support<\/h2>\n<p>XML \u0442\u0435\u043f\u0435\u0440\u044c <a href=\"https:\/\/spark.apache.org\/docs\/latest\/sql-ref-functions-builtin.html#xml-functions\" rel=\"noopener noreferrer nofollow\">\u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u0435\u0442\u0441\u044f <\/a>\u043d\u0430 \u0442\u043e\u043c \u0436\u0435 \u0443\u0440\u043e\u0432\u043d\u0435 \u043a\u0430\u043a JSON:<\/p>\n<p>\u043f\u0430\u0440\u0441\u0438\u043d\u0433 XML:<\/p>\n<pre><code class=\"python\">spark.sql(\"SELECT from_xml('&lt;p&gt;&lt;a&gt;1&lt;\/a&gt;&lt;b&gt;0.8&lt;\/b&gt;&lt;\/p&gt;', 'a INT, b DOUBLE');\").show()  +-----------------------------------+ |from_xml(&lt;p&gt;&lt;a&gt;1&lt;\/a&gt;&lt;b&gt;0.8&lt;\/b&gt;&lt;\/p&gt;)| +-----------------------------------+ |                           {1, 0.8}| +-----------------------------------+<\/code><\/pre>\n<p>\u041e\u0431\u0440\u0430\u0442\u043d\u0430\u044f \u043e\u043f\u0435\u0440\u0430\u0446\u0438\u044f \u043f\u043e\u043b\u0443\u0447\u0435\u043d\u0438\u044f XML \u0438\u0437 \u043e\u0431\u044a\u0435\u043a\u0442\u0430:<\/p>\n<pre><code class=\"python\">print(spark.sql(\"\"\"SELECT to_xml(named_struct('a', 1, 'b', 2)) as xml\"\"\").first().xml)  &lt;ROW&gt;     &lt;a&gt;1&lt;\/a&gt;     &lt;b&gt;2&lt;\/b&gt; &lt;\/ROW&gt;<\/code><\/pre>\n<p>\u041f\u043e\u0438\u0441\u043a \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0439 \u0432 \u0434\u0435\u0440\u0435\u0432\u0435 XML:<\/p>\n<pre><code class=\"python\">spark.sql(\"\"\"SELECT xpath('&lt;a&gt;&lt;b&gt;b1&lt;\/b&gt;&lt;b&gt;b2&lt;\/b&gt;&lt;b&gt;b3&lt;\/b&gt;&lt;c&gt;c1&lt;\/c&gt;&lt;c&gt;c2&lt;\/c&gt;&lt;\/a&gt;','a\/b\/text()')\"\"\").show()  +-----------------------------------------------------------------------+ |xpath(&lt;a&gt;&lt;b&gt;b1&lt;\/b&gt;&lt;b&gt;b2&lt;\/b&gt;&lt;b&gt;b3&lt;\/b&gt;&lt;c&gt;c1&lt;\/c&gt;&lt;c&gt;c2&lt;\/c&gt;&lt;\/a&gt;, a\/b\/text())| +-----------------------------------------------------------------------+ |                                                           [b1, b2, b3]| +-----------------------------------------------------------------------+<\/code><\/pre>\n<p>XML \u043c\u043e\u0436\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043a\u0430\u043a \u0444\u043e\u0440\u043c\u0430\u0442 \u0434\u043b\u044f \u0437\u0430\u043f\u0438\u0441\u0438, \u0442\u0430\u043a \u0438 \u0434\u043b\u044f \u0447\u0442\u0435\u043d\u0438\u044f:<\/p>\n<pre><code class=\"python\">df = spark.read.format(\"xml\").option(\"rowTag\", \"row\").load(\"\/path\/to\/data.xml\") df.write.format(\"xml\").option(\"rootTag\", \"root\").option(\"rowTag\", \"row\").save(\"\/path\/to\/output.xml\")<\/code><\/pre>\n<h2>[SPARK-46830] String Collation support<\/h2>\n<p>\u0432 Spark \u0441\u0442\u0440\u043e\u043a\u0430\u0445 \u0432\u0441\u0435\u0433\u0434\u0430 \u0442\u0435\u043f\u0435\u0440\u044c \u043f\u0440\u0438\u0441\u0443\u0442\u0441\u0442\u0432\u0443\u0435\u0442 \u043d\u0435\u044f\u0432\u043d\u043e \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u043a\u0430 \u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f \u0432 \u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u043a\u0430\u0445 \u0438 \u0445\u044d\u0448\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u0438.<\/p>\n<p>\u041f\u0440\u0438\u043c\u0435\u0440 \u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u043a\u0438 \u0441\u0442\u0440\u043e\u043a \u0441 \u0443\u0447\u0435\u0442\u043e\u043c \u043f\u043e\u043b\u043e\u0436\u0435\u043d\u0438\u044f \u0431\u0443\u043a\u0432 \u0432 \u0430\u043b\u0444\u0430\u0432\u0438\u0442\u0435<\/p>\n<pre><code class=\"python\"># \u043a\u0430\u043a \u0431\u044b\u043b\u043e - \u0431\u0438\u043d\u0430\u0440\u043d\u043e\u0435 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u0438\u0435 \u0441\u0442\u0440\u043e\u043a spark.sql(\"\"\"select * FROM VALUES('\u0430'),                ('\u042f') AS strcol(binary_str) order by binary_str\"\"\").show()  +----------+ |binary_str| +----------+ |         \u042f| |         \u0430| +----------+  # \u0441\u0442\u0440\u043e\u043a\u0430 \u0441 \u0437\u0430\u0434\u0430\u043d\u043d\u043e\u0439 \u0440\u0443\u0441\u0441\u043a\u043e\u0439 \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u043a\u043e\u0439 spark.sql(\"\"\"select * FROM VALUES('\u0430' COLLATE RU),                ('\u042f' COLLATE RU) AS strcol(ru_str) order by ru_str\"\"\").show()  +------+ |ru_str| +------+ |     \u0430| |     \u042f| +------+<\/code><\/pre>\n<p>\u0427\u0430\u0441\u0442\u044c \u043e\u043f\u0435\u0440\u0430\u0446\u0438\u0439 \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044f \u0442\u0435\u043f\u0435\u0440\u044c \u0440\u0430\u0431\u043e\u0442\u0430\u044e\u0442 \u043d\u0430 \u043f\u043e\u0440\u044f\u0434\u043a\u0438 \u0431\u044b\u0441\u0442\u0440\u0435\u0439, \u0435\u0441\u043b\u0438 \u0443\u043a\u0430\u0437\u0430\u0442\u044c \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u043a\u0443.<\/p>\n<p>\u041f\u0440\u0438\u043c\u0435\u0440 \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u043a\u0438 UTF8_LCASE &#8212; \u0434\u043b\u044f \u0445\u0440\u0430\u043d\u0435\u043d\u0438\u044f \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 \u043d\u0438\u0436\u043d\u0435\u043c \u0440\u0435\u0433\u0438\u0441\u0442\u0440\u0435, \u0434\u0430\u0435\u0442 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c \u0444\u0438\u043b\u044c\u0442\u0440\u043e\u0432\u0430\u0442\u044c \u0434\u0430\u043d\u043d\u044b\u0435 \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0439 \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u043f\u0440\u0438\u0432\u0435\u0434\u0435\u043d\u0438\u044f \u0440\u0435\u0433\u0438\u0441\u0442\u0440\u0430:<\/p>\n<pre><code class=\"python\">spark.sql(\"\"\"select * FROM VALUES('\u0430' COLLATE UTF8_LCASE),                ('\u042f' COLLATE UTF8_LCASE) AS strcol(ru_str) where ru_str='\u044f'\"\"\").show()  +------+ |ru_str| +------+ |     \u042f| +------+<\/code><\/pre>\n<p>\u0427\u0442\u043e \u0443\u0441\u043a\u043e\u0440\u044f\u0435\u0442 \u0442\u0430\u043a\u0438\u0435 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 \u0432 22 \u0440\u0430\u0437\u0430.<\/p>\n<h3>\u0417\u0430\u043a\u043b\u044e\u0447\u0435\u043d\u0438\u0435<\/h3>\n<p>\u0420\u0435\u043b\u0438\u0437 Apache Spark 4 \u043f\u0440\u0438\u043d\u043e\u0441\u0438\u0442 \u0437\u043d\u0430\u0447\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435 \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u044f \u0432 \u0447\u0430\u0441\u0442\u0438 Spark SQL, \u0447\u0442\u043e \u0434\u0435\u043b\u0430\u0435\u0442 \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0443 \u0434\u0430\u043d\u043d\u044b\u0445 \u0431\u043e\u043b\u0435\u0435 \u044d\u0444\u0444\u0435\u043a\u0442\u0438\u0432\u043d\u043e\u0439 \u0438 \u0443\u0434\u043e\u0431\u043d\u043e\u0439. \u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u044b \u043d\u043e\u0432\u044b\u0435  \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0438 \u043f\u0440\u043e\u0446\u0435\u0434\u0443\u0440\u043d\u043e\u0433\u043e \u044f\u0437\u044b\u043a\u0430, \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u043d\u0430\u044f \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u043a\u0430 ANSI SQL, \u0432\u0432\u0435\u0434\u0435\u043d \u043d\u043e\u0432\u044b\u0439 \u0441\u0438\u043d\u0442\u0430\u043a\u0438\u0441 Pipe SQL. \u041d\u0435\u0441\u043c\u043e\u0442\u0440\u044f \u043d\u0430 \u044d\u0442\u043e, \u043d\u0430 \u043c\u043e\u0439 \u0432\u0437\u0433\u043b\u044f\u0434, \u0432\u0441\u0442\u0440\u043e\u0435\u043d\u043d\u044b\u0439 \u043f\u0440\u043e\u0446\u0435\u0434\u0443\u0440\u043d\u044b\u0439 \u0434\u0438\u0430\u043b\u0435\u043a\u0442 SQL \u043f\u043e\u043a\u0430 \u0438\u043c\u0435\u0435\u0442 \u0441\u043b\u0438\u0448\u043a\u043e\u043c \u043c\u0430\u043b\u043e \u0444\u0443\u043d\u043a\u0446\u0438\u0439, \u0447\u0442\u043e\u0431\u044b \u043f\u043e\u043b\u043d\u043e\u0441\u0442\u044c\u044e \u0437\u0430\u043c\u0435\u043d\u0438\u0442\u044c PySpark. <\/p>\n<p>PySpark \u0432 \u0441\u0432\u043e\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0438\u043b \u0443\u043a\u0440\u0435\u043f\u043b\u044f\u0442\u044c \u0432\u0437\u0430\u0438\u043c\u043e\u0441\u0432\u044f\u0437\u0438 \u0441 Pandas \u0438 Arrow. \u041f\u043e\u044f\u0432\u0438\u043b\u0430\u0441\u044c \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u043a\u0430 Variant \u0438 XML \u0442\u0438\u043f\u043e\u0432 \u0434\u0430\u043d\u043d\u044b\u0445.<\/p>\n<p>\u0420\u0435\u0430\u043b\u044c\u043d\u044b\u0439 \u043f\u043e\u0442\u0435\u043d\u0446\u0438\u0430\u043b \u044d\u0442\u0438\u0445 \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u043d\u043e\u0432\u043e\u0432\u0432\u0435\u0434\u0435\u043d\u0438\u0439 Spark 4 \u0432 \u0441\u043a\u043e\u0440\u043e\u043c \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u0443\u0432\u0438\u0434\u0438\u043c \u043d\u0430 \u043d\u043e\u0432\u044b\u0445 \u043f\u0440\u043e\u0435\u043a\u0442\u0430\u0445.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<p><!----><!----><\/div>\n<p><!----><!----><br \/> \u0441\u0441\u044b\u043b\u043a\u0430 \u043d\u0430 \u043e\u0440\u0438\u0433\u0438\u043d\u0430\u043b \u0441\u0442\u0430\u0442\u044c\u0438 <a href=\"https:\/\/habr.com\/ru\/articles\/920766\/\"> https:\/\/habr.com\/ru\/articles\/920766\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<div><!--[--><!--]--><\/div>\n<div id=\"post-content-body\">\n<div>\n<div class=\"article-formatted-body article-formatted-body article-formatted-body_version-2\">\n<div xmlns=\"http:\/\/www.w3.org\/1999\/xhtml\">\n<p>\u041f\u0440\u0438\u0432\u0435\u0442, \u0425\u0430\u0431\u0440! \u042f \u0410\u043b\u0435\u043a\u0441\u0435\u0439 \u0421\u043a\u0430\u0445\u0438\u043d, \u0438\u043d\u0436\u0435\u043d\u0435\u0440 \u0434\u0430\u043d\u043d\u044b\u0445 \u0432 \u00ab\u0414\u0410\u0420\u00bb (\u0413\u041a \u00ab\u041a\u041e\u0420\u0423\u0421 \u041a\u043e\u043d\u0441\u0430\u043b\u0442\u0438\u043d\u0433\u00bb). Apache Spark \u2014 \u044d\u0442\u043e \u043c\u043e\u0449\u043d\u044b\u0439 \u0444\u0440\u0435\u0439\u043c\u0432\u043e\u0440\u043a \u0434\u043b\u044f \u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b\u0451\u043d\u043d\u043e\u0439 \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u0431\u043e\u043b\u044c\u0448\u0438\u0445 \u043e\u0431\u044a\u0451\u043c\u043e\u0432 \u0434\u0430\u043d\u043d\u044b\u0445, \u043f\u043e\u0437\u0432\u043e\u043b\u044f\u044e\u0449\u0438\u0439 \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0442\u044c \u0441\u043b\u043e\u0436\u043d\u044b\u0435 \u0432\u044b\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u044f \u043d\u0430 \u043a\u043b\u0430\u0441\u0442\u0435\u0440\u0430\u0445 \u043a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u043e\u0432 \u0441 \u0432\u044b\u0441\u043e\u043a\u043e\u0439 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c\u044e \u0438 \u0433\u0438\u0431\u043a\u043e\u0441\u0442\u044c\u044e.  <\/p>\n<figure class=\"full-width\">\n<div><figcaption>Apache Spark 4 released<\/figcaption><\/div>\n<\/figure>\n<p>\u0418 \u0432\u043e\u0442 23 \u043c\u0430\u044f 2025 \u0433\u043e\u0434\u0430 \u043a\u043e\u043c\u043f\u0430\u043d\u0438\u044f Apache \u0432\u044b\u043f\u0443\u0441\u0442\u0438\u043b\u0430\u00a0<strong>\u043d\u043e\u0432\u0443\u044e \u0432\u0435\u0440\u0441\u0438\u044e<\/strong>\u00a0Spark 4.<\/p>\n<p>\u0421\u0442\u043e\u0438\u0442 \u043e\u0442\u043c\u0435\u0442\u0438\u0442\u044c, \u0447\u0442\u043e Apache Spark \u2014 \u043c\u0430\u0441\u0448\u0442\u0430\u0431\u043d\u044b\u0439 \u0444\u0440\u0435\u0439\u043c\u0432\u043e\u0440\u043a \u0441 \u0448\u0438\u0440\u043e\u043a\u0438\u043c \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b\u043e\u043c. \u0412 \u0434\u0430\u043d\u043d\u043e\u0439 \u0441\u0442\u0430\u0442\u044c\u0435 \u044f \u0441\u043e\u0441\u0440\u0435\u0434\u043e\u0442\u043e\u0447\u0443\u0441\u044c \u043d\u0430 \u043d\u043e\u0432\u043e\u0432\u0432\u0435\u0434\u0435\u043d\u0438\u044f\u0445, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0432 \u043f\u0435\u0440\u0432\u0443\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c \u0437\u0430\u0442\u0440\u043e\u043d\u0443\u0442 \u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u0435\u043b\u0435\u0439 Spark SQL \u0438 PySpark.<\/p>\n<p>\u041f\u043e\u043b\u043d\u044b\u0439 \u0441\u043f\u0438\u0441\u043e\u043a 5100 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u0439 \u043e\u0442 390 \u043a\u043e\u043d\u0442\u0440\u0438\u0431\u044c\u044e\u0442\u043e\u0440\u043e\u0432 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d <a href=\"https:\/\/spark.apache.org\/releases\/spark-release-4-0-0.html\" rel=\"noopener noreferrer nofollow\">\u043d\u0430 \u043e\u0444\u0438\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u043c \u0441\u0430\u0439\u0442\u0435<\/a>.<\/p>\n<h2>\u0418\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u0432 Spark SQL<\/h2>\n<p>\u0421 \u043c\u043e\u0435\u0439 \u0442\u043e\u0447\u043a\u0438 \u0437\u0440\u0435\u043d\u0438\u044f, \u043e\u0441\u043d\u043e\u0432\u043d\u044b\u0435 \u0438\u0437\u043c\u0435\u043d\u0435\u043d\u0438\u044f \u043f\u0440\u043e\u0438\u0437\u043e\u0448\u043b\u0438 \u0432 Spark SQL. \u041f\u043e\u044f\u0432\u043b\u044f\u044e\u0442\u0441\u044f \u0437\u0430\u0447\u0430\u0442\u043a\u0438 \u043f\u0440\u043e\u0446\u0435\u0434\u0443\u0440\u043d\u043e\u0433\u043e \u0440\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u044f SQL.<\/p>\n<h2>[SPARK-42849] Session Variables<\/h2>\n<p> SQL \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0435, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u043c\u043e\u0436\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0432 \u0442\u0435\u0447\u0435\u043d\u0438\u0435 \u0432\u0441\u0435\u0439 spark \u0441\u0435\u0441\u0441\u0438\u0438.<\/p>\n<p>\u0414\u0430\u043d\u043d\u044b\u0439 \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b \u0431\u0443\u0434\u0435\u0442 \u043f\u043e\u043b\u0435\u0437\u0435\u043d \u0434\u043b\u044f \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0432, \u0433\u0434\u0435 \u0432 \u043f\u0435\u0440\u0432\u0443\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f Spark SQL \u0434\u043b\u044f \u0440\u0430\u0441\u0447\u0435\u0442\u0430 \u0432\u0438\u0442\u0440\u0438\u043d.<\/p>\n<p>\u041a \u043f\u0440\u0438\u043c\u0435\u0440\u0443 \u043e\u0442\u0447\u0435\u0442, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0445\u0440\u0430\u043d\u0438\u0442\u0441\u044f \u0432 \u0432\u0438\u0434\u0435 Spark SQL \u0444\u0430\u0439\u043b\u0430. \u0422\u0435\u043f\u0435\u0440\u044c \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440 \u0440\u0430\u0441\u0447\u0435\u0442\u0430 \u043c\u043e\u0436\u043d\u043e \u043f\u043e\u043b\u0443\u0447\u0438\u0442\u044c 1 \u0440\u0430\u0437 \u0438 \u043f\u0435\u0440\u0435\u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0435\u0433\u043e \u0432 \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u0438\u0445 \u043c\u0435\u0441\u0442\u0430\u0445:<\/p>\n<pre><code class=\"sql\">--\u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0430\u044f \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u043a\u0430\u043a \u043a\u043e\u043d\u0441\u0442\u0430\u043d\u0442\u043e\u0439, \u0442\u0430\u043a \u0438 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c \u0437\u0430\u043f\u0440\u043e\u0441\u0430: SET start_date = (select value from settings where name = 'last_copy') ; --\u0434\u0430\u043b\u0435\u0435 \u0444\u0438\u043b\u044c\u0442\u0440\u0443\u0435\u043c \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u0439 select ${start_date} ; --\u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0430 \u0437\u0430\u043a\u0430\u0437\u043e\u0432 SELECT *  FROM orders  WHERE order_date &gt; ${start_date} ; --\u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0430 \u043f\u0440\u043e\u0434\u0430\u0436 SELECT *  FROM sales  WHERE sale_date &gt; ${start_date}<\/code><\/pre>\n<p>\u0412\u044b\u0437\u043e\u0432 \u0434\u0430\u043d\u043d\u043e\u0433\u043e SQL:<\/p>\n<pre><code class=\"python\">[spark.sql(s).show() for s in sql.split(\";\")]  +----------+--------------------+ |       key|               value| +----------+--------------------+ |start_date|(select value fro...| +----------+--------------------+  +----------------+ |scalarsubquery()| +----------------+ |      2023-06-02| +----------------+  +---+----------+ | id|order_date| +---+----------+ |  3|2023-06-03| |  4|2023-06-04| |  5|2023-06-05| +---+----------+  +---+----------+ | id| sale_date| +---+----------+ | 30|2023-06-03| | 40|2023-06-04| | 50|2023-06-05| |  3|2025-06-03| +---+----------+<\/code><\/pre>\n<p>\u0421\u0442\u043e\u0438\u0442 \u0437\u0430\u043c\u0435\u0442\u0438\u0442\u044c, \u0447\u0442\u043e \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440 \u043d\u0435 \u0440\u0430\u0441\u0441\u0447\u0438\u0442\u044b\u0432\u0430\u0435\u0442\u0441\u044f, \u0430 \u043f\u0435\u0440\u0435\u0434\u0430\u0435\u0442\u0441\u044f \u043a\u0430\u0436\u0434\u044b\u0439 \u0440\u0430\u0437 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435\u043c \u0432 \u0437\u0430\u043f\u0440\u043e\u0441, \u0447\u0442\u043e \u0441\u043d\u0438\u0436\u0430\u0435\u0442 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f, \u0435\u0441\u043b\u0438 \u0432 \u0440\u0430\u0441\u0447\u0435\u0442\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u0439 \u0441\u043b\u043e\u0436\u043d\u043e\u0435 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435.<br \/>\u042d\u0442\u043e \u0432\u0438\u0434\u043d\u043e \u043f\u043e \u043f\u043b\u0430\u043d\u0443 \u0437\u0430\u043f\u0440\u043e\u0441\u0435, \u0433\u0434\u0435 \u0432\u043c\u0435\u0441\u0442\u043e \u043a\u043e\u043d\u043a\u0440\u0435\u0442\u043d\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u043f\u043e\u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f subquery<\/p>\n<pre><code class=\"sql\">explain SELECT *  FROM sales  WHERE sale_date &gt; ${start_date} ;  == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- Filter (isnotnull(sale_date#3) AND (sale_date#3 &gt; Subquery subquery#201, [id=#465]))    :  +- Subquery subquery#201, [id=#465]    :     +- AdaptiveSparkPlan isFinalPlan=false    :        +- Project [value#5]    :           +- Filter (isnotnull(name#4) AND (name#4 = last_copy))    :              +- Scan ExistingRDD[name#4,value#5]    +- Scan ExistingRDD[id#2L,sale_date#3]<\/code><\/pre>\n<h2>[SPARK-46246] EXECUTE IMMEDIATESQL support<\/h2>\n<p>\u0412\u044b\u0437\u043e\u0432 SQL \u0441 \u043f\u0435\u0440\u0435\u0434\u0430\u0447\u0435\u0439 \u043f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0445 <a href=\"https:\/\/spark.apache.org\/docs\/4.0.0\/sql-ref-syntax-aux-exec-imm.html\" rel=\"noopener noreferrer nofollow\">\u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u043e\u0432<\/a>:<\/p>\n<pre><code class=\"pgsql\">DECLARE sqlStr1 = 'SELECT SUM(col1) FROM VALUES(?), (?)'; DECLARE arg10 = 5; DECLARE arg20 = 6; EXECUTE IMMEDIATE sqlStr1 USING arg10, arg20;  +---------+ |sum(col1)| +---------+ |       11| +---------+<\/code><\/pre>\n<p>Declare \u0438 set \u043c\u043e\u0436\u043d\u043e \u043a\u043e\u043c\u0431\u0438\u043d\u0438\u0440\u043e\u0432\u0430\u0442\u044c<\/p>\n<pre><code class=\"pgsql\">DECLARE sqlStr1 = 'SELECT SUM(col1) FROM VALUES(?), (?)'; SET test_arg = 123; DECLARE arg1 = ${test_arg}; EXECUTE IMMEDIATE sqlStr1 USING ${test_arg}, arg1;  +---------+ |sum(col1)| +---------+ |      246| +---------+<\/code><\/pre>\n<p>\u0412 \u0431\u0438\u043d\u0434 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0443\u044e \u043d\u0435\u043b\u044c\u0437\u044f \u043f\u0435\u0440\u0435\u0434\u0430\u0442\u044c \u0441\u0435\u0441\u0441\u0438\u043e\u043d\u0443\u044e \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u0443\u044e \u0441 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435\u043c:<\/p>\n<pre><code class=\"pgsql\">SET test_arg = (select 1);  EXECUTE IMMEDIATE sqlStr1 USING ${test_arg}, arg1;  # A query parameter contains unsupported expression. # Parameters can either be variables or literals<\/code><\/pre>\n<p>\u0418\u0437-\u0437\u0430 \u0442\u043e\u0433\u043e \u0447\u0442\u043e SQL \u0438 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0435 \u0432 EXECUTE IMMEDIATE \u0434\u043e\u043b\u0436\u043d\u044b \u0431\u044b\u0442\u044c \u0441\u0442\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u043c\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f\u043c\u0438, \u043f\u043e\u043a\u0430 \u0447\u0442\u043e \u044d\u0442\u043e \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u0435 \u0437\u0430\u0442\u0440\u0443\u0434\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0434\u043b\u044f \u0434\u0438\u043d\u0430\u043c\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u0444\u043e\u0440\u043c\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432.<\/p>\n<p>\u0414\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e \u043d\u0430\u043f\u043e\u043c\u043d\u044e, \u0447\u0442\u043e \u0438\u043c\u0435\u043d\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0431\u0438\u043d\u0434\u044b \u043f\u043e\u044f\u0432\u0438\u043b\u0438\u0441\u044c \u0432 PySpark 3.4 (SPARK-41271, SPARK-42702, SPARK-40281)<\/p>\n<pre><code class=\"python\">spark.sql(     sqlText = \"SELECT * FROM tbl WHERE date &gt; :startDate LIMIT :maxRows\",     args = Map(         \"startDate\" -&gt; \"DATE'2022-12-01'\",         \"maxRows\" -&gt; \"100\"))<\/code><\/pre>\n<p>\u0430 \u043f\u043e\u0437\u0438\u0446\u0438\u043e\u043d\u043d\u044b\u0435 \u0432 3.5<\/p>\n<pre><code class=\"python\">spark.sql(\"SELECT * FROM {df} WHERE {df[B]} &gt; ? and ? &lt; {df[A]}\"           , args=[5, 2], df=mydf).show()<\/code><\/pre>\n<h2>[SPARK-46057] Support SQL user-defined functions<\/h2>\n<p>\u0414\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0430 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c \u0441\u043e\u0437\u0434\u0430\u043d\u0438\u044f \u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0444\u0443\u043d\u0446\u0438\u0439 \u0432 Spark SQL.<\/p>\n<p>\u042d\u0442\u043e \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043a\u0430\u043a \u043f\u0440\u043e\u0441\u0442\u044b\u0435 \u043c\u0430\u0442\u0435\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0435 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u044f<\/p>\n<pre><code class=\"pgsql\">CREATE FUNCTION to_hex(x INT COMMENT 'Any number between 0 - 255')   RETURNS STRING   COMMENT 'Converts a decimal to a hexadecimal'   CONTAINS SQL DETERMINISTIC   RETURN lpad(hex(least(greatest(0, x), 255)), 2, 0) ; SELECT to_hex(id) FROM range(2) ; +--------------------------------+ |spark_catalog.default.to_hex(id)| +--------------------------------+ |                              00| |                              01| +--------------------------------+  EXPLAIN SELECT to_hex(id) FROM range(2); == Physical Plan == *(1) Project [lpad(hex(cast(least(greatest(0, cast(id#225L as int)), 255) as bigint)), 2, 0) AS spark_catalog.default.to_hex(id)#226] +- *(1) Range (0, 2, step=1, splits=20)<\/code><\/pre>\n<p>\u0422\u0430\u043a \u0438 \u043e\u0431\u0440\u0430\u0449\u0435\u043d\u0438\u044f \u043a \u0434\u0440\u0443\u0433\u0438\u043c \u0442\u0430\u0431\u043b\u0438\u0446\u0430\u043c \u0437\u0430 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c.<\/p>\n<p>\u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u0442\u0430\u0431\u043b\u0438\u0446\u0430:<\/p>\n<pre><code class=\"pgsql\">CREATE OR REPLACE TEMPORARY FUNCTION       get_order_name(order_id INT COMMENT 'Order ID')     RETURNS TABLE(name STRING COMMENT 'order name')    READS SQL DATA SQL SECURITY DEFINER    COMMENT 'Get order name by ID'    RETURN SELECT order_name FROM orders WHERE id = get_order_name.order_id ;  SELECT id, get_order_name.name      FROM VALUES(1),                (3) AS orders(id),          LATERAL get_order_name(orders.id) ;  +---+------+ | id|  name| +---+------+ |  1|name 1| |  3|name 3| +---+------+<\/code><\/pre>\n<p>\u041d\u043e \u0441\u0442\u043e\u0438\u0442 \u0437\u0430\u043c\u0435\u0442\u0438\u0442\u044c, \u0447\u0442\u043e \u043a\u0430\u043a \u0438 \u0441\u0435\u0441\u0441\u0438\u043e\u043d\u043d\u044b\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0435 \u043f\u043e \u0441\u0443\u0442\u0438 \u044d\u0442\u043e \u043f\u043e\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u043c\u044b\u0435 \u0448\u0430\u0431\u043b\u043e\u043d\u044b \u043f\u043e\u0434\u0433\u043e\u0442\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0445 SQL:<\/p>\n<pre><code class=\"sql\">explain SELECT id, get_order_name.name      FROM VALUES(1),                (3) AS orders(id),          LATERAL get_order_name(orders.id) ;  == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- Project [id#212, name#218]    +- BroadcastHashJoin [cast(id#212 as bigint)], [id#86L], Inner, BuildLeft, false       :- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=532]       :  +- LocalTableScan [id#212]       +- Project [order_name#88 AS name#218, id#86L]          +- Filter isnotnull(id#86L)             +- Scan ExistingRDD[id#86L,order_date#87,order_name#88]<\/code><\/pre>\n<p>\u041d\u0430 \u0442\u0435\u043a\u0443\u0449\u0438\u0439 \u043c\u043e\u043c\u0435\u043d\u0442 \u043d\u0435\u0442 \u0446\u0438\u043a\u043b\u043e\u0432, \u0443\u0441\u043b\u043e\u0432\u0438\u0439 \u0438\u043b\u0438 \u0434\u0438\u043d\u0430\u043c\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e SQL.<\/p>\n<p>\u0414\u0440\u0443\u0433\u0438\u0435 \u0432\u0430\u0440\u0438\u0430\u0446\u0438\u0438 \u0444\u0443\u043d\u043a\u0446\u0438\u0439 \u043c\u043e\u0436\u043d\u043e \u0432\u0438\u0434\u0435\u0442\u044c \u0432 \u0442\u0435\u0441\u0442\u0430\u0445: <a href=\"https:\/\/github.com\/apache\/spark\/blob\/master\/sql\/core\/src\/test\/resources\/sql-tests\/inputs\/sql-udf.sql\" rel=\"noopener noreferrer nofollow\">https:\/\/github.com\/apache\/spark\/blob\/master\/sql\/core\/src\/test\/resources\/sql-tests\/inputs\/sql-udf.sql<\/a><\/p>\n<h2>[SPARK-44444] Use ANSI SQL mode by default<\/h2>\n<p>\u0422\u0435\u043f\u0435\u0440\u044c Spark SQL \u043f\u043e \u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442 <a href=\"https:\/\/spark.apache.org\/docs\/latest\/sql-ref-ansi-compliance.html\" rel=\"noopener noreferrer nofollow\">ANSI \u0441\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0441 SQL<\/a><\/p>\n<p>\u041c\u043d\u043e\u0436\u0435\u0441\u0442\u0432\u043e \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432, \u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0440\u0430\u043d\u044c\u0448\u0435 \u0434\u0430\u0432\u0430\u043b\u0438 NULL \u043d\u0430 \u0447\u0430\u0441\u0442\u0438 \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432, \u0442\u0435\u043f\u0435\u0440\u044c \u0432\u043e\u0437\u0432\u0440\u0430\u0449\u0430\u044e\u0442 exception:<\/p>\n<pre><code class=\"python\">spark.conf.set(\"spark.sql.ansi.enabled\", \"true\") #default  spark.sql(\"select 1\/0\").show() # ArithmeticException: [DIVIDE_BY_ZERO] Division by zero.  # Use try_divide to tolerate divisor being 0 and return NULL instead.  # If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.  # SQLSTATE: 22012  spark.sql(\"select cast('test' as int)\").show() # NumberFormatException: [CAST_INVALID_INPUT] The value 'test' of the type \"STRING\" cannot be cast to \"INT\" because it is malformed.  # Correct the value as per the syntax, or change its target type.  # Use try_cast to tolerate malformed input and return NULL instead.  # SQLSTATE: 22018<\/code><\/pre>\n<p>\u041a\u0430\u043a \u0431\u044b\u043b\u043e \u0440\u0430\u043d\u044c\u0448\u0435:<\/p>\n<pre><code class=\"python\">spark.conf.set(\"spark.sql.ansi.enabled\", \"false\")  spark.sql(\"select 1\/0\").show() +-------+ |(1 \/ 0)| +-------+ |   NULL| +-------+  spark.sql(\"select cast('test' as int)\").show() +-----------------+ |CAST(test AS INT)| +-----------------+ |             NULL| +-----------------+<\/code><\/pre>\n<h2>[SPARK-49555] SQL Pipe syntax<\/h2>\n<p>\u0420\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u0435 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0435\u0439 <a href=\"https:\/\/spark.apache.org\/docs\/latest\/sql-pipe-syntax.html\" rel=\"noopener noreferrer nofollow\">\u0441\u0438\u043d\u0442\u0430\u043a\u0441\u0438\u0441\u0430 SQL<\/a>. \u042d\u0442\u043e \u043d\u0435 \u043d\u043e\u0432\u044b\u0439 \u044f\u0437\u044b\u043a, \u0430 \u043f\u0435\u0440\u0435\u043e\u0441\u043c\u044b\u0441\u043b\u0435\u043d\u0438\u0435 \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u044e\u0449\u0435\u0433\u043e GoogleSQL<\/p>\n<p>\u041e\u0441\u043d\u043e\u0432\u043d\u044b\u0435 \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430:<\/p>\n<ul>\n<li>\n<p>multi-level aggregations without subqueries<\/p>\n<\/li>\n<li>\n<p>Filtering anywhere<\/p>\n<\/li>\n<li>\n<p>starting from &#8212; \u201cinside-out\u201d structure <\/p>\n<\/li>\n<\/ul>\n<p>\u041f\u0440\u0438\u043c\u0435\u0440 \u043e\u0431\u044b\u0447\u043d\u043e\u0433\u043e SQL \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442 \u0432\u043b\u043e\u0436\u0435\u043d\u043d\u044b\u0439 \u043f\u043e\u0434\u0437\u0430\u043f\u0440\u043e\u0441\u044b \u0438 \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u0442\u0441\u044f \u0441 \u0441\u0435\u043a\u0446\u0438\u0438 select<\/p>\n<pre><code class=\"sql\">SELECT c_count, COUNT(*) AS custdist FROM ( SELECT c_custkey, COUNT(o_orderkey) c_count FROM customer LEFT OUTER JOIN orders ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%' GROUP BY c_custkey ) AS c_orders GROUP BY c_count ORDER BY custdist DESC, c_count DESC;<\/code><\/pre>\n<p>SQL Pipe syntax \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u0442\u0441\u044f \u0441 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 \u0434\u0430\u043d\u043d\u044b\u0445 \u0438 \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u0435\u0442 \u0438\u043d\u0441\u0442\u0440\u0443\u043a\u0446\u0438\u0438 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e \u0431\u0435\u0437 \u043f\u043e\u0434\u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432:<\/p>\n<pre><code class=\"sql\">FROM customer |&gt; LEFT OUTER JOIN orders ON c_custkey = o_custkey AND o_comment NOT LIKE '%unusual%packages%' |&gt; AGGREGATE COUNT(o_orderkey) c_count GROUP BY c_custkey |&gt; AGGREGATE COUNT(*) AS custdist GROUP BY c_count |&gt; ORDER BY custdist DESC, c_count DESC;<\/code><\/pre>\n<p>\u041f\u043e\u0434\u0440\u043e\u0431\u043d\u0435\u0439 \u043e \u043c\u043e\u0442\u0438\u0432\u0430\u0446\u0438\u0438, \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430\u0445 \u0438 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044f\u0445 \u0432 paper \u043e\u0442 Google: <a href=\"https:\/\/research.google\/pubs\/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql\/\" rel=\"noopener noreferrer nofollow\">https:\/\/research.google\/pubs\/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql\/<\/a><\/p>\n<h2>[SPARK-46908] Support star clause in WHERE clause<\/h2>\n<p>\u0440\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u0435 \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b\u0430 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 \u0432\u0441\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043e\u043a *<\/p>\n<pre><code class=\"sql\">-- \u0432\u043e\u0437\u0432\u0440\u0430\u0442 \u0432\u0441\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043e\u043a, \u043a\u0440\u043e\u043c\u0435 TA.c1 and TB.cb SELECT * EXCEPT (c1, cb)  FROM VALUES(1, 2) AS TA(c1, c2), VALUES('a', 'b') AS TB(ca, cb); +---+---+ | c2| ca| +---+---+ |  2|  a| +---+---+  -- \u043f\u043e\u043b\u0443\u0447\u0438\u0442\u044c \u043f\u0435\u0440\u0432\u0443\u044e \u043d\u0435 \u043f\u0443\u0441\u0442\u0443\u044e \u043a\u043e\u043b\u043e\u043d\u043a\u0443 \u0432  TA SELECT coalesce(TA.*)  FROM VALUES(1, 2) AS TA(c1, c2), VALUES('a', 'b') AS TB(ca, cb); +----------------+ |coalesce(c1, c2)| +----------------+ |               1| +----------------+   -- \u0440\u0430\u0437\u0432\u0435\u0440\u043d\u0443\u0442\u044c \u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u0443 \u0432 \u043a\u043e\u043b\u043e\u043d\u043a\u0438 SELECT c1.* FROM VALUES(named_struct('x', 1, 'y', 2)) AS TA(c1); +---+---+ |  x|  y| +---+---+ |  1|  2| +---+---+  -- \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0430 \u0432\u0441\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043e\u043a \u0432 \u0441\u0442\u0440\u043e\u043a\u0435 \u043d\u0430 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0438\u0435 SELECT * FROM VALUES(1, 2, 3, 4, 5),(1, 2, NULL, 4, 5) AS TA(c1, c2, c3, c4, c)  WHERE array(*) = array(1, 2, NULL, 4, 5); +---+---+----+---+---+ | c1| c2|  c3| c4|  c| +---+---+----+---+---+ |  1|  2|NULL|  4|  5| +---+---+----+---+---+  -- \u043d\u0430\u043b\u0438\u0447\u0438\u0435 \u043a\u043e\u043d\u043a\u0440\u0435\u0442\u043d\u043e\u0433\u043e <\/code><\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[],"class_list":["post-465487","post","type-post","status-publish","format-standard","hentry"],"_links":{"self":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/465487","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=465487"}],"version-history":[{"count":0,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/465487\/revisions"}],"wp:attachment":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=465487"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=465487"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=465487"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}