diff --git a/data_labeling_examples/.gitignore b/data_labeling_examples/.gitignore
index 508f1c9d..cc578b3c 100644
--- a/data_labeling_examples/.gitignore
+++ b/data_labeling_examples/.gitignore
@@ -2,4 +2,3 @@
.classpath
/target/
.settings/
-src/main/java/com/oracle/.DS_Store
diff --git a/data_labeling_examples/README.md b/data_labeling_examples/README.md
index c2e1dd47..261920c8 100644
--- a/data_labeling_examples/README.md
+++ b/data_labeling_examples/README.md
@@ -56,9 +56,44 @@ Result of CUSTOM_LABELS_MATCH algorithm:
For more information [SDK for Java](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/javasdk.htm)
+### Running the Utility
+1. Open Terminal on your system.
+2. Verify that Java 8 or higher is installed in the system. In case you do not have java installed on your system, download it from https://www.oracle.com/java/technologies/downloads/
+
+```
+java -version
+```
+3. Clone the repository.
+
+```
+git clone https://github.com/oracle-samples/oci-data-science-ai-samples.git
+```
+4. Go to data_labeling_examples directory
+
+```
+cd data_labeling_examples
+```
+5. Run the below command to bulk label by "FIRST_LETTER_MATCH" labeling algorithm.
+
+```
+java -DCONFIG_FILE_PATH='~/.oci/config' -DCONFIG_PROFILE=DEFAULT -DDLS_DP_URL=https://dlsprod-dp.us-ashburn-1.oci.oraclecloud.com -DTHREAD_COUNT=20 -DDATASET_ID=ocid1.compartment.oc1..aaaaaaaawob4faujxaqxqzrb555b44wxxrfkcpapjxwp4s4hwjthu46idr5a -DLABELING_ALGORITHM=FIRST_LETTER_MATCH -DLABELS=cat,dog -cp libs/bulklabelutility-v1.jar com.oracle.datalabelingservicesamples.scripts.SingleLabelDatasetBulkLabelingScript
+```
+6. Run the below command to bulk label by "FIRST_REGEX_MATCH" labeling algorithm.
+
+```
+java -DCONFIG_FILE_PATH='~/.oci/config' -DCONFIG_PROFILE=DEFAULT -DDLS_DP_URL=https://dlsprod-dp.us-ashburn-1.oci.oraclecloud.com -DTHREAD_COUNT=20 -DDATASET_ID=ocid1.compartment.oc1..aaaaaaaawob4faujxaqxqzrb555b44wxxrfkcpapjxwp4s4hwjthu46idr5a -DLABELING_ALGORITHM=FIRST_REGEX_MATCH -DFIRST_MATCH_REGEX_PATTERN=^abc* -DLABELS=cat,dog -cp libs/bulklabelutility-v1.jar com.oracle.datalabelingservicesamples.scripts.SingleLabelDatasetBulkLabelingScript
+```
+7. Run the below command to bulk label by "CUSTOM_LABELS_MATCH" labeling algorithm.
+
+```
+java -DCONFIG_FILE_PATH='~/.oci/config' -DCONFIG_PROFILE=DEFAULT -DDLS_DP_URL=https://dlsprod-dp.us-ashburn-1.oci.oraclecloud.com -DTHREAD_COUNT=20 -DDATASET_ID=ocid1.compartment.oc1..aaaaaaaawob4faujxaqxqzrb555b44wxxrfkcpapjxwp4s4hwjthu46idr5a -DLABELING_ALGORITHM=CUSTOM_LABELS_MATCH -DCUSTOM_LABELS='{"dog/": ["dog"], "cat/": ["cat"] }' -cp libs/bulklabelutility-v1.jar com.oracle.datalabelingservicesamples.scripts.CustomBulkLabelingScript
+```
+
+Note: You can override any config using -D followed by the configuration name. The list of all configurations are mentioned in following section.
+
### Configurations
-Add the following configurations in config.properties file in the project to run the scripts:
+Following is the list of all configurations (src/main/resources/config.properties file) supported by the bulk labeling script:
```
#Path of Config File
@@ -68,10 +103,7 @@ CONFIG_FILE_PATH=~/.oci/config
CONFIG_PROFILE=DEFAULT
#DLS DP URL
-DLS_DP_URL=https://dlstest-dp.${REGION}.oci.oraclecloud.com
-
-#Region where dataset is created
-REGION=uk-london-1
+DLS_DP_URL=https://dlsprod-dp.uk-london-1.oci.oraclecloud.com
#Dataset Id whose record you want to bulk label
DATASET_ID=ocid1.compartment.oc1..aaaaaaaawob4faujxaqxqzrb555b44wxxrfkcpapjxwp4s4hwjthu46idr5a
diff --git a/data_labeling_examples/libs/bulklabelutility-v1.jar b/data_labeling_examples/libs/bulklabelutility-v1.jar
new file mode 100644
index 00000000..77645ca5
Binary files /dev/null and b/data_labeling_examples/libs/bulklabelutility-v1.jar differ
diff --git a/data_labeling_examples/pom.xml b/data_labeling_examples/pom.xml
index 7d30cdae..d1b99e61 100644
--- a/data_labeling_examples/pom.xml
+++ b/data_labeling_examples/pom.xml
@@ -7,7 +7,8 @@
0.0.1-SNAPSHOT
OCI Data Labeling Service Examples
This repository contains code samples for OCI Data
- Labeling Service
+ Labeling Service
+
com.oracle.oci.sdk
@@ -19,12 +20,6 @@
oci-java-sdk-datalabelingservicedataplane
2.19.0
-
- org.projectlombok
- lombok
- 1.18.22
- provided
-
org.apache.logging.log4j
log4j-api
@@ -40,5 +35,43 @@
slf4j-api
1.7.32
+
+ org.projectlombok
+ lombok
+ 1.18.22
+ provided
+
+
+ 8
+ 8
+
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+ package
+
+ single
+
+
+
+
+
+ com.oracle.datalabelingservicesamples.scripts.SingleLabelDatasetBulkLabelingScript
+
+
+
+
+ jar-with-dependencies
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/constants/DataLabelingConstants.java b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/constants/DataLabelingConstants.java
index 8973fb15..40508fac 100644
--- a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/constants/DataLabelingConstants.java
+++ b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/constants/DataLabelingConstants.java
@@ -2,5 +2,17 @@
public class DataLabelingConstants {
- public static final int MAX_LIST_RECORDS_LIMITS = 1000;
+ public static final int MAX_LIST_RECORDS_LIMITS = 1000;
+ public static final int DEFAULT_THREAD_COUNT = 30;
+
+ public static final String CONFIG_FILE_PATH = "CONFIG_FILE_PATH";
+ public static final String CONFIG_PROFILE = "CONFIG_PROFILE";
+ public static final String DLS_DP_URL = "DLS_DP_URL";
+ public static final String DATASET_ID = "DATASET_ID";
+ public static final String REGION = "REGION";
+ public static final String LABELING_ALGORITHM = "LABELING_ALGORITHM";
+ public static final String THREAD_COUNT = "THREAD_COUNT";
+ public static final String LABELS = "LABELS";
+ public static final String CUSTOM_LABELS="CUSTOM_LABELS";
+ public static final String FIRST_MATCH_REGEX_PATTERN = "FIRST_MATCH_REGEX_PATTERN";
}
diff --git a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/labelingstrategies/FirstRegexMatch.java b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/labelingstrategies/FirstRegexMatch.java
index 9badb682..a3ccd583 100644
--- a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/labelingstrategies/FirstRegexMatch.java
+++ b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/labelingstrategies/FirstRegexMatch.java
@@ -3,18 +3,15 @@
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import com.oracle.bmc.datalabelingservicedataplane.model.RecordSummary;
import com.oracle.datalabelingservicesamples.requests.Config;
public class FirstRegexMatch implements LabelingStrategy {
- private static final Pattern pattern = Pattern.compile(Config.INSTANCE.getRegexPattern());
-
@Override
public List getLabel(RecordSummary record) {
- Matcher m = pattern.matcher(record.getName());
+ Matcher m = Config.INSTANCE.getPattern().matcher(record.getName());
if (m.find()) {
String firstGroup = m.group(0);
for (String label : Config.INSTANCE.getLabels()) {
diff --git a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/requests/Config.java b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/requests/Config.java
index ecec5046..76d06660 100644
--- a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/requests/Config.java
+++ b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/requests/Config.java
@@ -5,7 +5,9 @@
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.regex.Pattern;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
@@ -14,6 +16,7 @@
import com.oracle.bmc.auth.AuthenticationDetailsProvider;
import com.oracle.bmc.auth.ConfigFileAuthenticationDetailsProvider;
import com.oracle.bmc.datalabelingservicedataplane.DataLabelingClient;
+import com.oracle.datalabelingservicesamples.constants.DataLabelingConstants;
import com.oracle.datalabelingservicesamples.labelingstrategies.CustomLabelMatch;
import com.oracle.datalabelingservicesamples.labelingstrategies.FirstLetterMatch;
import com.oracle.datalabelingservicesamples.labelingstrategies.FirstRegexMatch;
@@ -33,77 +36,95 @@ public enum Config {
private String configProfile;
private String dpEndpoint;
private String datasetId;
- private String region;
private List labels;
private Map> customLabels;
private String labelingAlgorithm;
private LabelingStrategy labelingStrategy;
private String regexPattern;
+ private Pattern pattern;
private int threadCount;
private Config() {
try {
Properties config = new Properties();
config.load(getClass().getClassLoader().getResourceAsStream("config.properties"));
- configFilePath = config.getProperty("CONFIG_FILE_PATH");
- configProfile = config.getProperty("CONFIG_PROFILE");
- dpEndpoint = config.getProperty("DLS_DP_URL");
- datasetId = config.getProperty("DATASET_ID");
- region = config.getProperty("REGION");
- labelingAlgorithm = config.getProperty("LABELING_ALGORITHM");
- String threadConfig = config.getProperty("THREAD_COUNT");
- if (!threadConfig.isEmpty()) {
- threadCount = Integer.parseInt(threadConfig);
- } else {
- threadCount = 20;
- }
+ configFilePath = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.CONFIG_FILE_PATH))
+ ? config.getProperty(DataLabelingConstants.CONFIG_FILE_PATH)
+ : System.getProperty(DataLabelingConstants.CONFIG_FILE_PATH);
+ configProfile = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.CONFIG_PROFILE))
+ ? config.getProperty(DataLabelingConstants.CONFIG_PROFILE)
+ : System.getProperty(DataLabelingConstants.CONFIG_PROFILE);
+ dpEndpoint = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.DLS_DP_URL))
+ ? config.getProperty(DataLabelingConstants.DLS_DP_URL)
+ : System.getProperty(DataLabelingConstants.DLS_DP_URL);
+ datasetId = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.DATASET_ID))
+ ? config.getProperty(DataLabelingConstants.DATASET_ID)
+ : System.getProperty(DataLabelingConstants.DATASET_ID);
+ labelingAlgorithm = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.LABELING_ALGORITHM))
+ ? config.getProperty(DataLabelingConstants.LABELING_ALGORITHM)
+ : System.getProperty(DataLabelingConstants.LABELING_ALGORITHM);
+ String threadConfig = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.THREAD_COUNT))
+ ? config.getProperty(DataLabelingConstants.THREAD_COUNT)
+ : System.getProperty(DataLabelingConstants.THREAD_COUNT);
+ threadCount = (!threadConfig.isEmpty()) ? Integer.parseInt(threadConfig)
+ : DataLabelingConstants.DEFAULT_THREAD_COUNT;
performAssertionOninput();
initializeLabelingStrategy();
validateAndInitializeLabels(config);
- dpEndpoint = dpEndpoint.replace("${REGION}", region);
dlsDpClient = initializeDpClient();
} catch (IOException ex) {
ExceptionUtils.wrapAndThrow(ex);
}
}
+ private void initializeLabelingStrategy() {
+ switch (labelingAlgorithm) {
+ case "FIRST_LETTER_MATCH":
+ labelingStrategy = new FirstLetterMatch();
+ break;
+
+ case "FIRST_REGEX_MATCH":
+ labelingStrategy = new FirstRegexMatch();
+ break;
+
+ case "CUSTOM_LABELS_MATCH":
+ labelingStrategy = new CustomLabelMatch();
+ break;
+ }
+ }
+
@SuppressWarnings("unchecked")
private void validateAndInitializeLabels(Properties config) {
switch (labelingAlgorithm) {
case "FIRST_LETTER_MATCH":
case "FIRST_REGEX_MATCH":
- labels = Arrays.asList(config.getProperty("LABELS").split(","));
+ String inputlLabels = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.LABELS))
+ ? config.getProperty(DataLabelingConstants.LABELS)
+ : System.getProperty(DataLabelingConstants.LABELS);
+ labels = Arrays.asList(inputlLabels.split(","));
assert null != labels && labels.isEmpty() == false : "Labels Cannot be empty";
break;
+
case "CUSTOM_LABELS_MATCH":
try {
+ String customLabel = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.CUSTOM_LABELS))
+ ? config.getProperty(DataLabelingConstants.CUSTOM_LABELS)
+ : System.getProperty(DataLabelingConstants.CUSTOM_LABELS);
ObjectMapper mapper = new ObjectMapper();
- customLabels = mapper.readValue(config.getProperty("CUSTOM_LABELS"), Map.class);
+ customLabels = mapper.readValue(customLabel, Map.class);
} catch (JsonProcessingException e) {
log.error("Invalid Custom Labels Provided as Input");
ExceptionUtils.wrapAndThrow(e);
}
-
- }
- if (labelingAlgorithm.equals("FIRST_REGEX_MATCH")) {
- regexPattern = config.getProperty("FIRST_MATCH_REGEX_PATTERN");
- }
- }
-
- private void initializeLabelingStrategy() {
- switch (labelingAlgorithm) {
- case "FIRST_LETTER_MATCH":
- labelingStrategy = new FirstLetterMatch();
break;
+ }
- case "FIRST_REGEX_MATCH":
- labelingStrategy = new FirstRegexMatch();
- break;
-
- case "CUSTOM_LABELS_MATCH":
- labelingStrategy = new CustomLabelMatch();
- break;
+ if (labelingAlgorithm.equals("FIRST_REGEX_MATCH")) {
+ regexPattern = StringUtils.isEmpty(System.getProperty(DataLabelingConstants.FIRST_MATCH_REGEX_PATTERN))
+ ? config.getProperty(DataLabelingConstants.FIRST_MATCH_REGEX_PATTERN)
+ : System.getProperty(DataLabelingConstants.FIRST_MATCH_REGEX_PATTERN);
+ pattern = Pattern.compile(regexPattern);
}
}
@@ -118,7 +139,6 @@ private DataLabelingClient initializeDpClient() {
final AuthenticationDetailsProvider configFileProvider = new ConfigFileAuthenticationDetailsProvider(
configFile);
dlsDpClient = new DataLabelingClient(configFileProvider);
- dlsDpClient.setRegion(region);
dlsDpClient.setEndpoint(dpEndpoint);
return dlsDpClient;
}
@@ -128,7 +148,6 @@ private void performAssertionOninput() {
assert configProfile != null : "Config Profile cannot be empty";
assert dpEndpoint != null : "DLS DP URL cannot be empty";
assert datasetId != null : "Dataset Id cannot be empty";
- assert region != null : "Region Cannot be empty";
assert labelingAlgorithm != null : "Labeling Strategy cannot be empty";
assert threadCount >= 1 : "Invalid Thread Count Passed";
}
diff --git a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/CustomBulkLabelingScript.java b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/CustomBulkLabelingScript.java
index 85486f0c..f1c927e1 100644
--- a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/CustomBulkLabelingScript.java
+++ b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/CustomBulkLabelingScript.java
@@ -88,6 +88,7 @@ public static void main(String[] args) throws InterruptedException, ExecutionExc
.runAsync(() -> processAnnotationForRecord(record, label), executorService);
completableFutures.add(future);
} else {
+ log.error("Label is null for record {}",record);
failedRecordIds.add(record.getId());
}
}
diff --git a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/SingleLabelDatasetBulkLabelingScript.java b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/SingleLabelDatasetBulkLabelingScript.java
index 7a95eb7e..3ad550f9 100644
--- a/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/SingleLabelDatasetBulkLabelingScript.java
+++ b/data_labeling_examples/src/main/java/com/oracle/datalabelingservicesamples/scripts/SingleLabelDatasetBulkLabelingScript.java
@@ -78,6 +78,7 @@ public static void main(String[] args) throws InterruptedException, ExecutionExc
.runAsync(() -> processAnnotationForRecord(record, label), executorService);
completableFutures.add(future);
} else {
+ log.error("Label is null for record {}",record);
failedRecordIds.add(record.getId());
}
}
diff --git a/data_labeling_examples/src/main/resources/config.properties b/data_labeling_examples/src/main/resources/config.properties
index bfdb4a23..86c65eb7 100644
--- a/data_labeling_examples/src/main/resources/config.properties
+++ b/data_labeling_examples/src/main/resources/config.properties
@@ -1,7 +1,6 @@
CONFIG_FILE_PATH=~/.oci/config
CONFIG_PROFILE=DEFAULT
-DLS_DP_URL=https://dlstest-dp.${REGION}.oci.oraclecloud.com
-REGION=uk-london-1
+DLS_DP_URL=https://dlsprod-dp.uk-london-1.oci.oraclecloud.com
THREAD_COUNT=30
DATASET_ID=ocid1.datalabelingdatasetint.oc1.uk-london-1.amaaaaaaniob46iarr2zttq7c5th3jfqwab7d3vrq4daa52tcnnwhkgrowca
@@ -16,9 +15,4 @@ LABELS=cat,dog
FIRST_MATCH_REGEX_PATTERN=^abc*
#Used for CUSTOM_LABELS_MATCH labeling algorithm
-CUSTOM_LABELS={ "dog/": ["dog","pup"], "cat/": ["cat","kitten"] }
-
-
-
-
-
+CUSTOM_LABELS={ "dog/": ["dog","pup"], "cat/": ["cat","kitten"] }
\ No newline at end of file