admin管理员组文章数量:1123694
from sqlalchemy import create_engine, text
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import create_sql_agent
from langchain.agents.agent_types import AgentType
from google.cloud import bigquery
from langchain.prompts import PromptTemplate
# Define custom prompt template
CUSTOM_PREFIX = """You are an agent designed to interact with only BigQuery database.
IMPORTANT: Always use the full path format 'project_id.dataset.table' when referring to tables or views.
For example, use 'project_id.dataset.my_table' instead of just 'my_table'.
Given an input question, create a syntactically correct BigQuery query to run, then look at the results and return the answer.
Unless the user specifies a specific number of results, always limit your query to 1000 rows by using LIMIT 1000.
Some examples of full path format:
- Correct: SELECT * FROM `project_id.dataset.my_table`
- Correct: SELECT * FROM `project_id.dataset.my_view`
- Incorrect: SELECT * FROM my_table
- Incorrect: SELECT * FROM gold.my_table
Example :
User question: Get me 10 account_ids
BigQuery query: SELECT * FROM 'project-id.dataset.table' LIMIT 10;
Remember:
1. Always use backticks (`) around the full table path
2. Always include the project_id, dataset, and table name
3. Use standard BigQuery SQL syntax
4. Dont look for any SET statement or add any in the query.
5. Be very specefic for the BigQuery syntax.
6. Do not set any session variable since its not necessary in BigQuery.
7. Remove unnecessary parameterization.
8. Ensure you're using standard BigQuery syntax.
NOTE: Avoid using session variables like SET @@dataset_id=?, as BigQuery does not require this.
"""
def create_bigquery_agent_with_views(
project_id: str,
dataset: str,
llm,
include_tables: bool = True,
include_views: bool = True
):
"""
Creates a SQL agent that can access both tables and views in BigQuery.
Args:
project_id: Google Cloud project ID
dataset: BigQuery dataset name
llm: Language model instance
include_tables: Whether to include tables in inspection
include_views: Whether to include views in inspection
"""
connection_string = f"bigquery://{project_id}/{dataset}"
# Modified inspection query to get both tables and views
inspection_query = f"""
SELECT
t.table_name,
t.table_type,
c.column_name,
c.data_type,
c.is_nullable
FROM `{project_id}.{dataset}.INFORMATION_SCHEMA.COLUMNS` c
JOIN `{project_id}.{dataset}.INFORMATION_SCHEMA.TABLES` t
ON c.table_name = t.table_name
WHERE 1=1
{" AND t.table_type = 'BASE TABLE'" if include_tables and not include_views else ""}
{" AND t.table_type = 'VIEW'" if include_views and not include_tables else ""}
ORDER BY t.table_name, c.ordinal_position
"""
engine = create_engine(connection_string)
# Build the table info dictionary
custom_table_info = {}
with engine.connect() as connection:
result = connection.execute(text(inspection_query))
rows = result.fetchall()
for row in result:
table_name = row.table_name
full_table_name = f"{project_id}.{dataset}.{table_name}"
if full_table_name not in custom_table_info:
custom_table_info[full_table_name] = {
'columns': [],
'table_type': row.table_type
}
custom_table_info[full_table_name]['columns'].append({
'name': row.column_name,
'type': row.data_type,
'nullable': row.is_nullable
})
# Convert the custom_table_info to the format expected by SQLDatabase
formatted_table_info = {}
for full_table_name, info in custom_table_info.items():
column_str = ", ".join(
f"{col['name']} {col['type']} {'NULL' if col['nullable'] == 'YES' else 'NOT NULL'}"
for col in info['columns']
)
formatted_table_info[full_table_name] = f"CREATE TABLE {full_table_name} ({column_str})"
# Create the SQLDatabase instance with custom prefix
db = SQLDatabase(
engine=engine,
schema=dataset,
custom_table_info=formatted_table_info,
include_tables=[], # We're using custom_table_info instead
view_support=True
)
# Create and return the SQL agent with custom prompt
agent_executor = create_sql_agent(
llm=llm,
db=db,
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
prefix=CUSTOM_PREFIX,
top_k=1000,
verbose=True
)
return agent_executor
# Example usage
if __name__ == "__main__":
from vertexai.language_models import ChatModel
from langchain_google_vertexai import ChatVertexAI
# Initialize the LLM
llm = ChatVertexAI(model="gemini-1.5-flash")
# Create agent with view support
agent = create_bigquery_agent_with_views(
project_id="project_id",
dataset="dataset",
llm=llm
)'''
Error I am getting for the above code :/Thought: I should query the account table for 10 account_id values Action: sql_db_query Action Input: SELECT account_id FROM
gold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: I need to remove theSET @@dataset_id=gold
part from the query. Action: sql_db_query Action Input: SELECT account_id FROMgold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: I need to remove theSET @@dataset_id=gold
part from the query again, and run the query again. Action: sql_db_query Action Input: SELECT account_id FROMgold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: It seems like the query is failing because thesql_db_query
function is trying to set the dataset_id, but the query doesn't have any parameters. I'm going to remove theSET @@dataset_id=gold
from my query. Action: sql_db_query Action Input: SELECT account_id FROMgold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: I need to manually remove theSET @@dataset_id=gold
part from the query and try running it again. Action: sql_db_query Action Input: SELECT account_id FROMgold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: I need to manually remove theSET @@dataset_id=gold
part from the query and try running it again. Action: sql_db_query Action Input: SELECT account_id FROMgold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: It seems like the tool is not able to run the query correctly. I'm going to manually remove theSET @@dataset_id=gold
from my query. Action: sql_db_query Action Input: SELECT account_id FROMgold
.account
LIMIT 10Error: (google.cloud.bigquery.dbapi.exceptions.ProgrammingError) Parameters were provided, but 'SET @@dataset_id=?' has no placeholders. [SQL: SET @@dataset_id=?] [parameters: ('gold',)] (Background on this error at: )Thought: It seems like the tool is not able to run the query correctly. I'm going to try manually running the query in the BigQuery console. Final Answer: I don't know.Finished chain.
本文标签:
版权声明:本文标题:sqlalchemy - I want to create a sql agent that can communicate with my Bigquery data but facing continously error regarding the 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1736587135a1945026.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论