Skip to content

Commit

Permalink
support preflight api
Browse files Browse the repository at this point in the history
  • Loading branch information
emanueldima committed Apr 20, 2022
1 parent f3fcdc1 commit f54946c
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 10 deletions.
6 changes: 6 additions & 0 deletions backend/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ switchboard:
cleanupPeriod: ${DATASTORE_CLEANPERIOD:-13}
cleanupPeriodUnit: ${DATASTORE_CLEANPERIOD_UNIT:-minutes}
urlResolver:
# preflight = used for the preflight API, see https://github.com/clarin-eric/switchboard-doc/blob/master/documentation/IntegrationProvider.md#preflight-api
preflightConnectRequestTimeout: ${URLRESOLVER_PREFLIGHT_CONNECTREQUESTTIMEOUT:-1}
preflightConnectTimeout: ${URLRESOLVER_PREFLIGHT_CONNECTTIMEOUT:-1}
preflightReadTimeout: ${URLRESOLVER_PREFLIGHT_READTIMEOUT:-1}
# connectRequestTimeout = until the http client library can provide a thread for the request; 0 is infinite
connectRequestTimeout: ${URLRESOLVER_CONNECTREQUESTTIMEOUT:-1}
connectTimeout: ${URLRESOLVER_CONNECTTIMEOUT:-3}
readTimeout: ${URLRESOLVER_READTIMEOUT:-3}
unit: ${URLRESOLVER_TIMEOUT_UNIT:-seconds}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
import eu.clarin.switchboard.core.xc.SwitchboardExceptionMapper;
import eu.clarin.switchboard.health.AppHealthCheck;
import eu.clarin.switchboard.profiler.DefaultProfiler;
import eu.clarin.switchboard.profiler.api.Profiler;
import eu.clarin.switchboard.resources.DataResource;
import eu.clarin.switchboard.resources.InfoResource;
import eu.clarin.switchboard.resources.MainResource;
import eu.clarin.switchboard.resources.ToolsResource;
import eu.clarin.switchboard.resources.UrlMatchResource;
import io.dropwizard.Application;
import io.dropwizard.assets.AssetsBundle;
import io.dropwizard.configuration.EnvironmentVariableSubstitutor;
Expand Down Expand Up @@ -98,12 +98,14 @@ public void run(RootConfig configuration, Environment environment) throws IOExce
InfoResource infoResource = new InfoResource(toolRegistry, gitProperties, switchboardConfig);
DataResource dataResource = new DataResource(mediaLibrary);
ToolsResource toolsResource = new ToolsResource(toolRegistry, switchboardConfig.getTools());
UrlMatchResource urlMatchResource = new UrlMatchResource(mediaLibrary, toolRegistry, switchboardConfig.getTools());

environment.jersey().register(SwitchboardExceptionMapper.class);
environment.jersey().register(MultiPartFeature.class);
environment.jersey().register(infoResource);
environment.jersey().register(dataResource);
environment.jersey().register(toolsResource);
environment.jersey().register(urlMatchResource);
environment.jersey().register(new MainResource(mediaLibrary));

environment.healthChecks().register("switchboard", new AppHealthCheck());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,24 @@
import java.time.temporal.ChronoUnit;

public class UrlResolverConfig {
@NotNull
private int connectRequestTimeout;

@NotNull
private int connectTimeout;

@NotNull
private int readTimeout;

@NotNull
private int preflightConnectRequestTimeout;

@NotNull
private int preflightConnectTimeout;

@NotNull
private int preflightReadTimeout;

@Valid
@NotNull
@JsonProperty
Expand All @@ -33,6 +45,11 @@ public UrlResolverConfig(int connectTimeout, int readTimeout, String unit, int m
this.maxHttpCacheEntries = maxHttpCacheEntries;
}

public int getConnectRequestTimeout() {
ChronoUnit u = ChronoUnit.valueOf(unit.trim().toUpperCase());
return (int) Duration.of(connectRequestTimeout, u).getNano() / 1000 / 1000;
}

public int getConnectTimeout() {
ChronoUnit u = ChronoUnit.valueOf(unit.trim().toUpperCase());
return (int) Duration.of(connectTimeout, u).getNano() / 1000 / 1000;
Expand All @@ -43,6 +60,21 @@ public int getReadTimeout() {
return (int) Duration.of(readTimeout, u).getNano() / 1000 / 1000;
}

public int getPreflightConnectRequestTimeout() {
ChronoUnit u = ChronoUnit.valueOf(unit.trim().toUpperCase());
return (int) Duration.of(preflightConnectRequestTimeout, u).getNano() / 1000 / 1000;
}

public int getPreflightConnectTimeout() {
ChronoUnit u = ChronoUnit.valueOf(unit.trim().toUpperCase());
return (int) Duration.of(preflightConnectTimeout, u).getNano() / 1000 / 1000;
}

public int getPreflightReadTimeout() {
ChronoUnit u = ChronoUnit.valueOf(unit.trim().toUpperCase());
return (int) Duration.of(preflightReadTimeout, u).getNano() / 1000 / 1000;
}

public int getMaxHttpCacheEntries() {
return maxHttpCacheEntries;
}
Expand Down
33 changes: 28 additions & 5 deletions backend/src/main/java/eu/clarin/switchboard/core/MediaLibrary.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package eu.clarin.switchboard.core;

import com.google.common.io.ByteStreams;
import eu.clarin.switchboard.app.config.DataStoreConfig;
import eu.clarin.switchboard.app.config.UrlResolverConfig;
import eu.clarin.switchboard.core.xc.CommonException;
Expand Down Expand Up @@ -41,6 +42,7 @@ public class MediaLibrary {
private final TextExtractor textExtractor;
private final StoragePolicy storagePolicy;
private final CloseableHttpClient cachingClient;
private final CloseableHttpClient preflightCachingClient;
private final ExecutorService executorService;

Map<UUID, FileInfoFuture> fileInfoFutureMap = Collections.synchronizedMap(new HashMap<>());
Expand All @@ -61,6 +63,7 @@ public MediaLibrary(DataStore dataStore,
.setMaxObjectSize(dataStoreConfig.getMaxSize())
.build();
RequestConfig requestConfig = RequestConfig.custom()
.setConnectionRequestTimeout(urlResolverConfig.getConnectRequestTimeout())
.setConnectTimeout(urlResolverConfig.getConnectTimeout())
.setSocketTimeout(urlResolverConfig.getReadTimeout())
.setMaxRedirects(MAX_ALLOWED_REDIRECTS)
Expand All @@ -73,6 +76,18 @@ public MediaLibrary(DataStore dataStore,
.addInterceptorFirst(Quirks.QUIRKS_REQUEST_INTERCEPTOR)
.build();

RequestConfig preflightRequestConfig = RequestConfig.custom()
.setConnectionRequestTimeout(urlResolverConfig.getPreflightConnectRequestTimeout())
.setConnectTimeout(urlResolverConfig.getPreflightConnectTimeout())
.setSocketTimeout(urlResolverConfig.getPreflightReadTimeout())
.setMaxRedirects(MAX_ALLOWED_REDIRECTS)
.setRedirectsEnabled(true)
.build();
preflightCachingClient = CachingHttpClients.custom()
.setCacheConfig(cacheConfig)
.setDefaultRequestConfig(preflightRequestConfig)
.build();

executorService = Executors.newCachedThreadPool();

ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
Expand All @@ -90,7 +105,14 @@ private void addFileInfoFuture(FileInfoFuture fif) throws StoragePolicyException

public FileInfo addByUrl(String originalUrlOrDoiOrHandle, Profile profile) throws CommonException, ProfilingException {
UUID id = UUID.randomUUID();
FileInfo fileInfo = addByUrl(cachingClient, dataStore, profiler, storagePolicy, id, originalUrlOrDoiOrHandle, profile);
FileInfo fileInfo = addByUrl(cachingClient, dataStore, profiler, storagePolicy, id, originalUrlOrDoiOrHandle, profile, null);
addFileInfoFuture(new FileInfoFuture(id, wrap(fileInfo)));
return fileInfo;
}

public FileInfo addByUrlPreflight(String originalUrlOrDoiOrHandle, Profile profile) throws CommonException, ProfilingException {
UUID id = UUID.randomUUID();
FileInfo fileInfo = addByUrl(preflightCachingClient, dataStore, profiler, storagePolicy, id, originalUrlOrDoiOrHandle, profile, 4096L);
addFileInfoFuture(new FileInfoFuture(id, wrap(fileInfo)));
return fileInfo;
}
Expand All @@ -106,7 +128,7 @@ public FileInfo addFile(String filename, InputStream inputStream, Profile profil
public UUID addByUrlAsync(String originalUrlOrDoiOrHandle, Profile profile) throws StoragePolicyException {
UUID id = UUID.randomUUID();
Future<FileInfo> future = executorService.submit(() ->
addByUrl(cachingClient, dataStore, profiler, storagePolicy, id, originalUrlOrDoiOrHandle, profile));
addByUrl(cachingClient, dataStore, profiler, storagePolicy, id, originalUrlOrDoiOrHandle, profile, null));
addFileInfoFuture(new FileInfoFuture(id, future));
return id;
}
Expand Down Expand Up @@ -202,13 +224,14 @@ private static Future<FileInfo> wrap(FileInfo fileInfo) {

private static FileInfo addByUrl(CloseableHttpClient cachingClient,
DataStore dataStore, Profiler profiler, StoragePolicy storagePolicy,
UUID id, String originalUrlOrDoiOrHandle, Profile profile)
UUID id, String originalUrlOrDoiOrHandle, Profile profile, Long dataLimit)
throws CommonException, ProfilingException {
LinkMetadata.LinkInfo linkInfo = LinkMetadata.getLinkData(cachingClient, originalUrlOrDoiOrHandle);
try {
storagePolicy.acceptSize(linkInfo.response.getEntity().getContentLength());
FileInfo fileInfo = addFile(dataStore, profiler, storagePolicy,
id, linkInfo.filename, linkInfo.response.getEntity().getContent(), null);
InputStream dataStream = dataLimit == null ? linkInfo.response.getEntity().getContent() :
ByteStreams.limit(linkInfo.response.getEntity().getContent(), dataLimit);
FileInfo fileInfo = addFile(dataStore, profiler, storagePolicy, id, linkInfo.filename, dataStream, null);
fileInfo.setLinksInfo(originalUrlOrDoiOrHandle, linkInfo.downloadLink, linkInfo.redirects);
Quirks.specializeProfile(fileInfo, profile);
return fileInfo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ public String toString() {
}

public List<ToolMatches> filterTools(List<Profile> profiles, Predicate<Tool> filter) {
long startTime = System.nanoTime();
List<ToolMatches> results = new ArrayList<>();

for (Tool tool : tools.get()) {
Expand Down Expand Up @@ -194,6 +195,8 @@ public List<ToolMatches> filterTools(List<Profile> profiles, Predicate<Tool> fil
return tm1.getTool().getName().compareToIgnoreCase(tm2.getTool().getName());
});

LOGGER.debug("matched " + profiles.size() + " profile(s) against " + tools.get().size() + " tools in " +
(System.nanoTime() - startTime)/1000000 + "ms");
return results;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public ToolsResource(ToolRegistry toolRegistry, ToolConfig toolConfig) {
this.toolConfig = toolConfig;
}

/**
* PUBLIC API
*/
@GET
@Path("tools")
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
Expand All @@ -44,30 +47,38 @@ public Response getTools(@QueryParam("onlyProductionTools") String onlyProductio
return Response.ok(tools).build();
}

/**
* PUBLIC API
*/
@GET
@Path("tools/{id}")
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response getToolByID(@PathParam("id") Integer id) {
Predicate<Tool> filter = tool-> tool.getId() != null && tool.getId().equals(id);
Predicate<Tool> filter = tool -> tool.getId() != null && tool.getId().equals(id);
List<Tool> tools = toolRegistry.filterTools(filter);
if (tools == null || tools.isEmpty()) {
return Response.status(Response.Status.NOT_FOUND).build();
}
return Response.ok(tools.get(0)).build();
}

/**
* PUBLIC API
*/
@POST
@Path("tools/match")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response getToolsByProfile(@QueryParam("onlyProductionTools") String onlyProductionTools,
List<Profile.Flat> flat) {
long startTime = System.nanoTime();
Predicate<Tool> filter = toolConfig.getShowOnlyProductionTools() ? ToolRegistry.ONLY_PRODUCTION_TOOLS : ToolRegistry.ALL_TOOLS;
if (onlyProductionTools != null && !onlyProductionTools.isEmpty()) {
filter = Boolean.parseBoolean(onlyProductionTools) ? ToolRegistry.ONLY_PRODUCTION_TOOLS : ToolRegistry.ALL_TOOLS;
}
List<Profile> profiles = flat.stream().map(Profile.Flat::toProfile).collect(Collectors.toList());
List<ToolRegistry.ToolMatches> toolMatches = toolRegistry.filterTools(profiles, filter);
LOGGER.debug("getToolsByProfile finished in {}ms", ((System.nanoTime() - startTime) / 1000));

return Response.ok(toolMatches).build();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package eu.clarin.switchboard.resources;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.clarin.switchboard.app.FileAsset;
import eu.clarin.switchboard.app.config.ToolConfig;
import eu.clarin.switchboard.core.MediaLibrary;
import eu.clarin.switchboard.core.ToolRegistry;
import eu.clarin.switchboard.core.xc.CommonException;
import eu.clarin.switchboard.profiler.api.Profile;
import eu.clarin.switchboard.profiler.api.ProfilingException;
import eu.clarin.switchboard.tool.Tool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.ws.rs.*;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Request;
import javax.ws.rs.core.Response;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.stream.Collectors;

@Path("/api/urlmatch")
public class UrlMatchResource {
private static final Logger LOGGER = LoggerFactory.getLogger(UrlMatchResource.class);

MediaLibrary mediaLibrary;
ToolRegistry toolRegistry;
ToolConfig toolConfig;

public UrlMatchResource(MediaLibrary mediaLibrary, ToolRegistry toolRegistry, ToolConfig toolConfig) {
this.mediaLibrary = mediaLibrary;
this.toolRegistry = toolRegistry;
this.toolConfig = toolConfig;
}

/**
* PUBLIC API
*/
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response doUrlsMatch(@QueryParam("onlyProductionTools") String onlyProductionTools, List<String> urlList) throws CommonException, ProfilingException {
long startTime = System.nanoTime();
List<Profile> profiles = urlList.stream().map(url -> {
try {
return mediaLibrary.addByUrlPreflight(url, null).getProfile().toProfile();
} catch (Exception e) {
LOGGER.info("Exception in urlmatch addByUrlPreflight: "+e.getMessage());
return null;
}
}).filter(Objects::nonNull).collect(Collectors.toList());

Predicate<Tool> filter = toolConfig.getShowOnlyProductionTools() ? ToolRegistry.ONLY_PRODUCTION_TOOLS : ToolRegistry.ALL_TOOLS;
if (onlyProductionTools != null && !onlyProductionTools.isEmpty()) {
filter = Boolean.parseBoolean(onlyProductionTools) ? ToolRegistry.ONLY_PRODUCTION_TOOLS : ToolRegistry.ALL_TOOLS;
}

boolean timeout = profiles.isEmpty();
int matches = timeout ? 0 : toolRegistry.filterTools(profiles, filter).size();
LOGGER.debug("urlmatch finished in " + ((System.nanoTime() - startTime) / 1000000) + "ms");

return Response.ok(new JsonResponse(timeout, matches, profiles)).build();
}

public static class JsonResponse {
public final boolean timeout;
public final int matches;
public final List<Profile.Flat> detectedProfiles;

public JsonResponse(boolean timeout, int matches, List<Profile> profiles) {
this.timeout = timeout;
this.matches = matches;
this.detectedProfiles = profiles.stream().map(Profile::flat).collect(Collectors.toList());
}
}
}
Loading

0 comments on commit f54946c

Please sign in to comment.