Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion sqlglot/dialects/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
return f"{prop_name}={self.sql(e, 'this')}"


def _file_format_property_sql(self: Athena.Generator, e: exp.FileFormatProperty) -> str:
this = e.args.get("this")
if not this:
return "format=''"
return f"format={exp.Literal.string(this.name)}"
Comment on lines +74 to +78
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems a bit more verbose than necessary, let's inline it in TRANSFORMS:

            exp.FileFormatProperty: lambda self, e: f"format={self.sql(exp.Literal.string(e.name)}",



class Athena(Trino):
"""
Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific modifications and then
Expand Down Expand Up @@ -148,7 +155,7 @@ class Generator(Trino.Generator):

TRANSFORMS = {
**Trino.Generator.TRANSFORMS,
exp.FileFormatProperty: lambda self, e: f"format={self.sql(e, 'this')}",
exp.FileFormatProperty: _file_format_property_sql,
exp.PartitionedByProperty: _partitioned_by_property_sql,
exp.LocationProperty: _location_property_sql,
}
Expand Down
19 changes: 19 additions & 0 deletions tests/dialects/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,22 @@ def test_parse_partitioned_by_returns_iceberg_transforms(self):
assert isinstance(parsed.this, exp.Schema)
assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionedByBucket))
assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionByTruncate))

def test_ctas_uses_string_for_format(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a validate_all test as well to demonstrate that Spark -> Athena is now covered?

# When transpiling, sometimes the FileFormatProperty is a Var, not a
# string literal and we need to ensure the athena dialect uses a string
# literal for the format
ctas_hive = exp.Create(
this=exp.to_table("foo.bar"),
kind="TABLE",
properties=exp.Properties(
expressions=[exp.FileFormatProperty(this=exp.Var(this="parquet"))]
),
expression=exp.select("1"),
)

# Even if identify=True, the column names should not be quoted within the string literals in the partitioned_by ARRAY[]
self.assertEqual(
ctas_hive.sql(dialect=self.dialect, identify=True),
'CREATE TABLE "foo"."bar" WITH (format=\'parquet\') AS SELECT 1',
)
3 changes: 2 additions & 1 deletion tests/dialects/test_dialect.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from typing import Optional

from sqlglot import (
Dialect,
Expand All @@ -16,7 +17,7 @@


class Validator(unittest.TestCase):
dialect = None
dialect: Optional[str] = None
Comment on lines -19 to +20
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did this change? The rest of the file is not typed.


def parse_one(self, sql, **kwargs):
return parse_one(sql, read=self.dialect, **kwargs)
Expand Down