@@ -35,90 +35,10 @@ def test_parse_args(self):
3535 "--gcs.bigtable.input.location=gs://test" ,
3636 "--spark.bigtable.project.id=GCP_PROJECT" ,
3737 "--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID" ,
38- "--gcs.bigtable.catalog.json={key:value} " ])
38+ "--gcs.bigtable.catalog.json=gs://dataproc-templates/conf/employeecatalog.json " ])
3939
4040 assert parsed_args ["gcs.bigtable.input.format" ] == "parquet"
4141 assert parsed_args ["gcs.bigtable.input.location" ] == "gs://test"
4242 assert parsed_args ["spark.bigtable.project.id" ] == "GCP_PROJECT"
4343 assert parsed_args ["spark.bigtable.instance.id" ] == "BIGTABLE_INSTANCE_ID"
44- assert parsed_args ["gcs.bigtable.catalog.json" ] == '{key:value}'
45-
46- @mock .patch .object (pyspark .sql , 'SparkSession' )
47- def test_run (self , mock_spark_session ):
48- """Tests GCSToBigTableTemplate runs"""
49-
50- gcs_to_bigtable_template = GCSToBigTableTemplate ()
51- mock_parsed_args = gcs_to_bigtable_template .parse_args (
52- ["--gcs.bigtable.input.format=parquet" ,
53- "--gcs.bigtable.input.location=gs://test" ,
54- "--spark.bigtable.project.id=GCP_PROJECT" ,
55- "--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID" ,
56- "--gcs.bigtable.catalog.json={key:value}" ])
57- mock_spark_session .read .parquet .return_value = mock_spark_session .dataframe .DataFrame
58- gcs_to_bigtable_template .run (mock_spark_session , mock_parsed_args )
59-
60- mock_spark_session .read .parquet .assert_called_once_with ("gs://test" )
61- mock_spark_session .dataframe .DataFrame .write .format . \
62- assert_called_once_with (constants .FORMAT_BIGTABLE )
63- mock_spark_session .dataframe .DataFrame .write .format ().options . \
64- assert_called_with (catalog = '{key:value}' )
65-
66- @mock .patch .object (pyspark .sql , 'SparkSession' )
67- def test_run_csv1 (self , mock_spark_session ):
68- """Tests GCSToBigTableTemplate runs with csv format"""
69-
70- gcs_to_bigtable_template = GCSToBigTableTemplate ()
71- mock_parsed_args = gcs_to_bigtable_template .parse_args (
72- ["--gcs.bigtable.input.format=csv" ,
73- "--gcs.bigtable.input.location=gs://test" ,
74- "--gcs.bigtable.input.header=false" ,
75- "--spark.bigtable.project.id=GCP_PROJECT" ,
76- "--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID" ,
77- "--gcs.bigtable.catalog.json={key:value}" ])
78- mock_spark_session .read .format ().options ().load .return_value = mock_spark_session .dataframe .DataFrame
79- gcs_to_bigtable_template .run (mock_spark_session , mock_parsed_args )
80-
81- mock_spark_session .read .format .assert_called_with (
82- constants .FORMAT_CSV )
83- mock_spark_session .read .format ().options .assert_called_with (** {
84- constants .CSV_HEADER : 'false' ,
85- constants .CSV_INFER_SCHEMA : 'true' ,
86- })
87- mock_spark_session .read .format ().options ().load .assert_called_once_with ("gs://test" )
88- mock_spark_session .dataframe .DataFrame .write .format . \
89- assert_called_once_with (constants .FORMAT_BIGTABLE )
90- mock_spark_session .dataframe .DataFrame .write .format ().options . \
91- assert_called_with (catalog = '{key:value}' )
92-
93- @mock .patch .object (pyspark .sql , 'SparkSession' )
94- def test_run_csv2 (self , mock_spark_session ):
95- """Tests GCSToBigTableTemplate runs with csv format and some optional csv options"""
96-
97- gcs_to_bigtable_template = GCSToBigTableTemplate ()
98- mock_parsed_args = gcs_to_bigtable_template .parse_args (
99- ["--gcs.bigtable.input.format=csv" ,
100- "--gcs.bigtable.input.location=gs://test" ,
101- "--gcs.bigtable.input.inferschema=false" ,
102- "--gcs.bigtable.input.sep=|" ,
103- "--gcs.bigtable.input.comment=#" ,
104- "--gcs.bigtable.input.timestampntzformat=yyyy-MM-dd'T'HH:mm:ss" ,
105- "--spark.bigtable.project.id=GCP_PROJECT" ,
106- "--spark.bigtable.instance.id=BIGTABLE_INSTANCE_ID" ,
107- "--gcs.bigtable.catalog.json={key:value}" ])
108- mock_spark_session .read .format ().options ().load .return_value = mock_spark_session .dataframe .DataFrame
109- gcs_to_bigtable_template .run (mock_spark_session , mock_parsed_args )
110-
111- mock_spark_session .read .format .assert_called_with (
112- constants .FORMAT_CSV )
113- mock_spark_session .read .format ().options .assert_called_with (** {
114- constants .CSV_HEADER : 'true' ,
115- constants .CSV_INFER_SCHEMA : 'false' ,
116- constants .CSV_SEP : "|" ,
117- constants .CSV_COMMENT : "#" ,
118- constants .CSV_TIMESTAMPNTZFORMAT : "yyyy-MM-dd'T'HH:mm:ss" ,
119- })
120- mock_spark_session .read .format ().options ().load .assert_called_once_with ("gs://test" )
121- mock_spark_session .dataframe .DataFrame .write .format . \
122- assert_called_once_with (constants .FORMAT_BIGTABLE )
123- mock_spark_session .dataframe .DataFrame .write .format ().options . \
124- assert_called_with (catalog = '{key:value}' )
44+ assert parsed_args ["gcs.bigtable.catalog.json" ] == 'gs://dataproc-templates/conf/employeecatalog.json'
0 commit comments