Merge pull request #110 from DFE-Digital/update-table-suffix-to-latest

Allow BQ tables to be suffixed with _latest
DFE-Digital · Jul 5, 2024 · b478efd · b478efd
2 parents e03d542 + 3c8f4e8
commit b478efd
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 21 deletions.
diff --git a/Rakefile b/Rakefile
@@ -82,13 +82,8 @@ task :update_bigquery_tables do
     config.version = `bundle exec ruby -e 'puts DfE::ReferenceData::VERSION'`.chomp
     config.commit = `git rev-parse HEAD`.chomp
 
-    # Suffix table names with the major version number, so multiple release
-    # branches can coexist peacefully
-    version_parts = config.version.split('.')
-    major_version = version_parts[0]
-    minor_version = version_parts[1] || '0'
-
-    config.table_name_suffix = "_v#{major_version}_#{minor_version}"
+    # Suffix table names with '_latest' to push to the same table
+    config.table_name_suffix = '_latest'
 
     puts "Updating #{config.project}.#{config.dataset} with version #{config.version}:"
   end

diff --git a/docs/bigquery.md b/docs/bigquery.md
@@ -13,9 +13,7 @@ The following lists are, on each release, automatically written into BigQuery un
 Edit `BIGQUERY_TABLES` in the Rakefile to change this list.
 
 The table name is made by taking the base table name from the table above, and
-appending a version based on the major version of the release, for instance
-`_v1`. This is to allow existing users of older releases to continue operating,
-rather than changing the data structure out underneath them.
+appending `_latest`. This ensures that the data is always pushed to the same table, providing a consistent and up-to-date reference.
 
 ### Internals
 
@@ -38,5 +36,5 @@ If you have a `dfe-reference-data_bigquery_api_key.json` file or the
 test of the BigQuery importer will be run. This will create a randomly-named
 table in the `cross-teacher-services.dfe_reference_data_dev` dataset and,
 hopefully, delete it afterwards. The project and dataset used for the test can
-be overriden with the `BIGQUERY_QA_PROJECT` and `BIGQUERY_QA_DATASET`
+be overridden with the `BIGQUERY_QA_PROJECT` and `BIGQUERY_QA_DATASET`
 environment variables, respectively.
diff --git a/docs/lists_qualifications.md b/docs/lists_qualifications.md
@@ -28,4 +28,3 @@ This list is [autocomplete compatible](autocomplete_compatability.md).
 | `match_synonyms` | string array | A list of common alternative names that are equivalent to this type. An answer matching a match synonym can be safely matched to this type.|
 | `level` | enumerated string | The level of the qualification. `entry`, `1`, `2`, `3`, `4`, `5`, `6`, or `7`. |
 | `degree` | optional enumerated symbol | If the qualification type is a degree, the level of degree - `foundation`, `bachelor`, `master` or `doctor` |
-
diff --git a/lib/dfe/reference_data/bigquery/importer.rb b/lib/dfe/reference_data/bigquery/importer.rb
@@ -21,7 +21,7 @@ class Config
         self.max_retry_sleep = 60 # seconds
         self.bigquery_retries = 10
         self.bigquery_timeout = 10
-        self.table_name_suffix = ''
+        self.table_name_suffix = '_latest' # Default suffix
 
         def self.configure
           yield(self)
@@ -236,7 +236,7 @@ def convert_list_to_bigquery_format(list)
         end
 
         def update_reference_list_into_bigquery_table(dataset, table_name, list)
-          table = dataset.table table_name
+          table = dataset.table(table_name)
 
           # NOTE: TO FUTURE SELF: If we start to deal with very large reference
           # lists we might start to hit BigQuery API limits documented here:

diff --git a/spec/lib/dfe/reference_data/bigquery/importer_spec.rb b/spec/lib/dfe/reference_data/bigquery/importer_spec.rb
@@ -5,7 +5,8 @@
 BIGQUERY_PROJECT = (ENV['BIGQUERY_QA_PROJECT'] || 'cross-teacher-services').freeze
 BIGQUERY_DATASET = (ENV['BIGQUERY_QA_DATASET'] || 'dfe_reference_data_dev').freeze
 
-TEST_TABLE_NAME = "test_#{SecureRandom.uuid}".freeze
+TEST_TABLE_NAME_PREFIX = "test_#{SecureRandom.uuid}".freeze
+TEST_TABLE_NAME = "#{TEST_TABLE_NAME_PREFIX}_latest".freeze
 
 FAKE_VERSION = '1.2.3'.freeze
 FAKE_COMMIT = '22596363b3de40b06f981fb85d82312e8c0ed511'.freeze
@@ -122,18 +123,19 @@
 
     it 'imported OK' do
       DfE::ReferenceData::BigQuery::Config.configure do |config|
-        config.tables = [[TEST_TABLE_NAME, test_data]]
+        config.tables = [[TEST_TABLE_NAME_PREFIX, test_data]]
       end
 
       # Just need to check it doesn't throw an error
       DfE::ReferenceData::BigQuery.update_tables
     end
 
-    # This is probably not very idiomatic RSpec, suggestions welcome!
-
     it 'reads back OK' do
       results = []
-      dataset.table(TEST_TABLE_NAME).data.all do |row|
+      table = dataset.table(TEST_TABLE_NAME)
+      expect(table).not_to be_nil, "Table #{TEST_TABLE_NAME} should exist but it does not."
+
+      table.data.all do |row|
         results.append(row)
       end
 
@@ -203,8 +205,11 @@
     end
 
     it 'metadata reads back OK' do
-      expect(dataset.table(TEST_TABLE_NAME).description).to eq 'A list of dummy data (see https://github.com/DFE-Digital/dfe-reference-data/blob/main/spec/lib/dfe/reference_data/bigquery/importer_spec.rb for more details)'
-      expect(dataset.table(TEST_TABLE_NAME).fields.map do |x|
+      table = dataset.table(TEST_TABLE_NAME)
+      expect(table).not_to be_nil, "Table #{TEST_TABLE_NAME} should exist but it does not."
+
+      expect(table.description).to eq 'A list of dummy data (see https://github.com/DFE-Digital/dfe-reference-data/blob/main/spec/lib/dfe/reference_data/bigquery/importer_spec.rb for more details)'
+      expect(table.fields.map do |x|
                [x.name, x.description]
              end).to contain_exactly(['array_boolean', nil],
                                      ['array_integer', nil],