Skip to content

Commit

Permalink
f
Browse files Browse the repository at this point in the history
  • Loading branch information
ttizze committed Jul 27, 2024
1 parent 278c3eb commit 79ea683
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@ export async function fetchLatestPageVersionWithTranslations(
targetLanguage: string,
): Promise<LatestPageVersionWithTranslations | null> {
const pageVersion = await prisma.pageVersion.findFirst({
where: {
url: {
contains: url,
},
},
where: { url },
orderBy: { createdAt: "desc" },
select: {
title: true,
Expand Down
6 changes: 3 additions & 3 deletions web/app/routes/reader.$encodedUrl/route.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { ActionFunctionArgs, LoaderFunctionArgs } from "@remix-run/node";
import { useParams } from "@remix-run/react";
import { typedjson, useTypedLoaderData } from "remix-typedjson";
import { Header } from "~/components/Header";
import { prepareUrlForSearchFromRawInput } from "~/utils/normalize-and-sanitize-url.server";
import { normalizeAndSanitizeUrl } from "~/utils/normalize-and-sanitize-url.server";
import { getTargetLanguage } from "~/utils/target-language.server";
import { authenticator } from "../../utils/auth.server";
import { ContentWithTranslations } from "./components/ContentWithTranslations";
Expand All @@ -24,9 +24,9 @@ export const loader = async ({ params, request }: LoaderFunctionArgs) => {

const safeUser = await authenticator.isAuthenticated(request);
const safeUserId = safeUser?.id;
const searchUrl = prepareUrlForSearchFromRawInput(encodedUrl);
const normalizedUrl = normalizeAndSanitizeUrl(encodedUrl);
const pageData = await fetchLatestPageVersionWithTranslations(
searchUrl,
normalizedUrl,
safeUserId ?? 0,
targetLanguage,
);
Expand Down
14 changes: 0 additions & 14 deletions web/app/utils/normalize-and-sanitize-url.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,3 @@ export function normalizeAndSanitizeUrl(inputUrl: string): string {
removeQueryParameters: true,
});
}

export function prepareUrlForSearch(inputUrl: string): string {
let url = inputUrl;
url = url.replace(/^https?:\/\//, "");
url = url.replace(/^www\./, "");
url = url.split(/[?#]/)[0];
url = url.replace(/\/$/, "");
return url.toLowerCase();
}

export function prepareUrlForSearchFromRawInput(inputUrl: string): string {
const normalizedUrl = normalizeAndSanitizeUrl(inputUrl);
return prepareUrlForSearch(normalizedUrl);
}
70 changes: 70 additions & 0 deletions web/scripts/normalize-page-urls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { PrismaClient } from "@prisma/client";
import { normalizeAndSanitizeUrl } from "../app/utils/normalize-and-sanitize-url.server";

const prisma = new PrismaClient();

async function normalizeUrls() {
const batchSize = 100;
let processedPageCount = 0;
let processedPageVersionCount = 0;

try {
// Normalize Page URLs
while (true) {
const pages = await prisma.page.findMany({
take: batchSize,
skip: processedPageCount,
select: { id: true, url: true },
});

if (pages.length === 0) break;

for (const page of pages) {
const normalizedUrl = normalizeAndSanitizeUrl(page.url);
if (normalizedUrl !== page.url) {
await prisma.page.update({
where: { id: page.id },
data: { url: normalizedUrl },
});
console.log(`Updated Page URL: ${page.url} -> ${normalizedUrl}`);
}
}

processedPageCount += pages.length;
console.log(`Processed ${processedPageCount} pages`);
}

// Normalize PageVersion URLs
while (true) {
const pageVersions = await prisma.pageVersion.findMany({
take: batchSize,
skip: processedPageVersionCount,
select: { id: true, url: true },
});

if (pageVersions.length === 0) break;

for (const pageVersion of pageVersions) {
const normalizedUrl = normalizeAndSanitizeUrl(pageVersion.url);
if (normalizedUrl !== pageVersion.url) {
await prisma.pageVersion.update({
where: { id: pageVersion.id },
data: { url: normalizedUrl },
});
console.log(`Updated PageVersion URL: ${pageVersion.url} -> ${normalizedUrl}`);
}
}

processedPageVersionCount += pageVersions.length;
console.log(`Processed ${processedPageVersionCount} page versions`);
}

console.log("URL normalization complete");
} catch (error) {
console.error("Error during URL normalization:", error);
} finally {
await prisma.$disconnect();
}
}

normalizeUrls();

0 comments on commit 79ea683

Please sign in to comment.