我想使用Python和Dataflow在BigQuery中读取一个表。我事先不知道桌子的名字。我正在使用模板来传递表名,如下所示: 但是当我启动工作时,出现以下错误: 我读过this answer,但到目前为止2017年还没有? 答案 0 :(得分:0) 在here中提到的文档中,.
.
.
from apache_beam.options.pipeline_options import PipelineOptions
class DataflowOptions(PipelineOptions):
@classmethod
def _add_argparse_args(cls, parser):
parser.add_value_provider_argument(
'--table_name',
help='Name of table on BigQuery')
def run(argv=None):
pipeline_options = PipelineOptions()
dataflow_options = pipeline_options.view_as(DataflowOptions)
with beam.Pipeline(options=pipeline_options) as pipeline:
table_spec = bigquery.TableReference(
projectId='MyProyectId',
datasetId='MyDataset',
tableId=str(dataflow_options.table_name))
p = (pipeline | 'Read Table' >> beam.io.Read(beam.io.BigQuerySource(table_spec)))
if __name__ == '__main__':
run()
Workflow failed. Causes: S01:Read Table+Batch Users/ParDo(_GlobalWindowsBatchingDoFn)+Hash Users+Upload to Ads failed., BigQuery getting table "RuntimeValueProvider(option: table_name, type: str, default_value: None)" from dataset "MyDataset" in project "MyProject" failed., BigQuery execution failed., Error:
Message: Invalid table ID "RuntimeValueProvider(option: table_name, type: str, default_value: None)".
HTTP Code: 400
1 个答案:
TableReference
采用以下参数(dataset_ref, table_id)
。从您的代码段来看,括号似乎放置不正确。 with beam.Pipeline(options=pipeline_options) as pipeline:
dataset_ref = bigquery.DatasetReference('my-project-id', 'some_dataset')
table_spec = bigquery.TableReference(dataset_ref,
tableId=str(dataflow_options.table_name)