Skip to content

Commit

Permalink
algorithm change
Browse files Browse the repository at this point in the history
  • Loading branch information
4kimov committed Sep 10, 2023
1 parent 60d9f76 commit f8a03b4
Show file tree
Hide file tree
Showing 9 changed files with 232 additions and 226 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# CHANGELOG

**v0.3.0:** **⚠️ BREAKING CHANGE**
- **Breaking change**: IDs change. Algorithm has been fine-tuned for better performance [[Issue #11](https://github.com/sqids/sqids-spec/issues/11)]
- `alphabet` cannot contain multibyte characters
- `min_length` was changed from `usize` to `u8`
- Max blocklist re-encoding attempts has been capped at the length of the alphabet - 1
- Minimum alphabet length has changed from 5 to 3
- `min_value()` and `max_value()` functions have been removed

**v0.2.1:**
- Bug fix: spec update (PR #7): blocklist filtering in uppercase-only alphabet [[PR #7](https://github.com/sqids/sqids-spec/pull/7)]
- Updating Github Actions to use stable toolchain instead of nightly
Expand Down
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ description = "Generate YouTube-like ids from numbers."
repository = "https://github.com/sqids/sqids-rust"
documentation = "https://docs.rs/sqids"
homepage = "https://sqids.org/rust"
version = "0.2.1"
version = "0.3.0"
license = "MIT"
edition = "2021"
readme = "README.md"
keywords = ["ids", "encode", "sqids", "hashids"]
keywords = ["ids", "encode", "short", "sqids", "hashids"]

[dependencies]
derive_more = "0.99.17"
serde = "1.0.188"
serde_json = "1.0.105"
serde_json = "1.0.106"
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,34 +44,34 @@ Simple encode & decode:

```rust
let sqids = Sqids::default();
let id = sqids.encode(&[1, 2, 3])?; // "8QRLaD"
let id = sqids.encode(&[1, 2, 3])?; // "86Rf07"
let numbers = sqids.decode(&id); // [1, 2, 3]
```

> **Note**
> 🚧 Because of the algorithm's design, **multiple IDs can decode back into the same sequence of numbers**. If it's important to your design that IDs are canonical, you have to manually re-encode decoded numbers and check that the generated ID matches.
Randomize IDs by providing a custom alphabet:
Enforce a *minimum* length for IDs:

```rust
let sqids = Sqids::new(Some(Options::new(
Some("FxnXM1kBN6cuhsAvjW3Co7l2RePyY8DwaU04Tzt9fHQrqSVKdpimLGIJOgb5ZE".to_string()),
None,
Some(10),
None,
)))?;
let id = sqids.encode(&[1, 2, 3])?; // "B5aMa3"
let id = sqids.encode(&[1, 2, 3])?; // "86Rf07xd4z"
let numbers = sqids.decode(&id); // [1, 2, 3]
```

Enforce a *minimum* length for IDs:
Randomize IDs by providing a custom alphabet:

```rust
let sqids = Sqids::new(Some(Options::new(
Some("FxnXM1kBN6cuhsAvjW3Co7l2RePyY8DwaU04Tzt9fHQrqSVKdpimLGIJOgb5ZE".to_string()),
None,
Some(10),
None,
)))?;
let id = sqids.encode(&[1, 2, 3])?; // "75JT1cd0dL"
let id = sqids.encode(&[1, 2, 3])?; // "B4aajs"
let numbers = sqids.decode(&id); // [1, 2, 3]
```

Expand All @@ -81,9 +81,9 @@ Prevent specific words from appearing anywhere in the auto-generated IDs:
let sqids = Sqids::new(Some(Options::new(
None,
None,
Some(HashSet::from(["word1".to_string(), "word2".to_string()])),
Some(HashSet::from(["86Rf07".to_string()])),
)))?;
let id = sqids.encode(&[1, 2, 3])?; // "8QRLaD"
let id = sqids.encode(&[1, 2, 3])?; // "se8ojk"
let numbers = sqids.decode(&id); // [1, 2, 3]
```

Expand Down
164 changes: 54 additions & 110 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
use derive_more::Display;
use std::{collections::HashSet, result};
use std::{cmp::min, collections::HashSet, result};

#[derive(Display, Debug, Eq, PartialEq)]
pub enum Error {
#[display(fmt = "Alphabet length must be at least 5")]
#[display(fmt = "Alphabet cannot contain multibyte characters")]
AlphabetMultibyteCharacters,
#[display(fmt = "Alphabet length must be at least 3")]
AlphabetLength,
#[display(fmt = "Alphabet must contain unique characters")]
AlphabetUniqueCharacters,
#[display(fmt = "Minimum length has to be between {min} and {max}")]
MinLength { min: usize, max: usize },
#[display(fmt = "Encoding supports numbers between {min} and {max}")]
EncodingRange { min: u64, max: u64 },
#[display(fmt = "Ran out of range checking against the blocklist")]
BlocklistOutOfRange,
#[display(fmt = "Reached max attempts to re-generate the ID")]
BlocklistMaxAttempts,
}

pub type Result<T> = result::Result<T, Error>;
Expand All @@ -24,14 +22,14 @@ pub fn default_blocklist() -> HashSet<String> {
#[derive(Debug)]
pub struct Options {
pub alphabet: String,
pub min_length: usize,
pub min_length: u8,
pub blocklist: HashSet<String>,
}

impl Options {
pub fn new(
alphabet: Option<String>,
min_length: Option<usize>,
min_length: Option<u8>,
blocklist: Option<HashSet<String>>,
) -> Self {
let mut options = Options::default();
Expand Down Expand Up @@ -63,7 +61,7 @@ impl Default for Options {
#[derive(Debug)]
pub struct Sqids {
alphabet: Vec<char>,
min_length: usize,
min_length: u8,
blocklist: HashSet<String>,
}

Expand All @@ -78,7 +76,13 @@ impl Sqids {
let options = options.unwrap_or_default();
let alphabet: Vec<char> = options.alphabet.chars().collect();

if alphabet.len() < 5 {
for c in alphabet.iter() {
if c.len_utf8() > 1 {
return Err(Error::AlphabetMultibyteCharacters);
}
}

if alphabet.len() < 3 {
return Err(Error::AlphabetLength);
}

Expand All @@ -87,15 +91,6 @@ impl Sqids {
return Err(Error::AlphabetUniqueCharacters);
}

if options.min_length < Self::min_value() as usize
|| options.min_length > options.alphabet.len()
{
return Err(Error::MinLength {
min: Self::min_value() as usize,
max: options.alphabet.len(),
});
}

let lowercase_alphabet: Vec<char> =
alphabet.iter().map(|c| c.to_ascii_lowercase()).collect();
let filtered_blocklist: HashSet<String> = options
Expand All @@ -111,29 +106,19 @@ impl Sqids {
})
.collect();

let mut sqids =
Sqids { alphabet, min_length: options.min_length, blocklist: filtered_blocklist };

sqids.alphabet = sqids.shuffle(&sqids.alphabet);
Ok(sqids)
Ok(Sqids {
alphabet: Self::shuffle(&alphabet),
min_length: options.min_length,
blocklist: filtered_blocklist,
})
}

pub fn encode(&self, numbers: &[u64]) -> Result<String> {
if numbers.is_empty() {
return Ok(String::new());
}

let in_range_numbers: Vec<u64> = numbers
.iter()
.copied()
.filter(|&n| n >= Self::min_value() && n <= Self::max_value())
.collect();

if in_range_numbers.len() != numbers.len() {
return Err(Error::EncodingRange { min: Self::min_value(), max: Self::max_value() });
}

self.encode_numbers(&in_range_numbers, false)
self.encode_numbers(numbers, 0)
}

pub fn decode(&self, id: &str) -> Vec<u64> {
Expand All @@ -153,38 +138,25 @@ impl Sqids {
let mut alphabet: Vec<char> =
self.alphabet.iter().cycle().skip(offset).take(self.alphabet.len()).copied().collect();

let partition = alphabet[1];

alphabet.remove(1);
alphabet.remove(0);
alphabet = alphabet.into_iter().rev().collect();

let mut id = id[1..].to_string();

let partition_index = id.find(partition);
if let Some(idx) = partition_index {
if idx > 0 && idx < id.len() - 1 {
id = id.split_off(idx + 1);
alphabet = self.shuffle(&alphabet);
}
}

while !id.is_empty() {
let separator = alphabet[alphabet.len() - 1];
let chunks: Vec<&str> = id.split(separator).collect();
let separator = alphabet[0];

let chunks: Vec<&str> = id.split(separator).collect();
if !chunks.is_empty() {
let alphabet_without_separator: Vec<char> =
alphabet.iter().copied().take(alphabet.len() - 1).collect();
for c in chunks[0].chars() {
if !alphabet_without_separator.contains(&c) {
return vec![];
}
if chunks[0].is_empty() {
return ret;
}
let num = self.to_number(chunks[0], &alphabet_without_separator);
ret.push(num);

let alphabet_without_separator: Vec<char> =
alphabet.iter().copied().skip(1).collect();
ret.push(self.to_number(chunks[0], &alphabet_without_separator));

if chunks.len() > 1 {
alphabet = self.shuffle(&alphabet);
alphabet = Self::shuffle(&alphabet);
}
}

Expand All @@ -194,80 +166,52 @@ impl Sqids {
ret
}

pub fn min_value() -> u64 {
0
}

pub fn max_value() -> u64 {
u64::MAX
}
fn encode_numbers(&self, numbers: &[u64], increment: usize) -> Result<String> {
if increment > self.alphabet.len() {
return Err(Error::BlocklistMaxAttempts);
}

fn encode_numbers(&self, numbers: &[u64], partitioned: bool) -> Result<String> {
let offset = numbers.iter().enumerate().fold(numbers.len(), |a, (i, &v)| {
let mut offset = numbers.iter().enumerate().fold(numbers.len(), |a, (i, &v)| {
self.alphabet[v as usize % self.alphabet.len()] as usize + i + a
}) % self.alphabet.len();

offset = (offset + increment) % self.alphabet.len();

let mut alphabet: Vec<char> =
self.alphabet.iter().cycle().skip(offset).take(self.alphabet.len()).copied().collect();

let prefix = alphabet[0];
let partition = alphabet[1];

alphabet.remove(1);
alphabet.remove(0);
alphabet = alphabet.into_iter().rev().collect();

let mut ret: Vec<String> = vec![prefix.to_string()];

for (i, &num) in numbers.iter().enumerate() {
let alphabet_without_separator: Vec<char> =
alphabet.iter().copied().take(alphabet.len() - 1).collect();
ret.push(self.to_id(num, &alphabet_without_separator));
ret.push(self.to_id(num, &alphabet[1..]));

if i < numbers.len() - 1 {
let separator = alphabet[alphabet.len() - 1];

if partitioned && i == 0 {
ret.push(partition.to_string());
} else {
ret.push(separator.to_string());
}

alphabet = self.shuffle(&alphabet);
ret.push(alphabet[0].to_string());
alphabet = Self::shuffle(&alphabet);
}
}

let mut id = ret.join("");

if self.min_length > id.len() {
if !partitioned {
let mut new_numbers = vec![0];
new_numbers.extend_from_slice(numbers);
id = self.encode_numbers(&new_numbers, true)?;
}
if self.min_length as usize > id.len() {
id += &alphabet[0].to_string();

if self.min_length > id.len() {
id = id[..1].to_string()
+ &alphabet[..(self.min_length - id.len())].iter().collect::<String>()
+ &id[1..]
}
}
while self.min_length as usize - id.len() > 0 {
alphabet = Self::shuffle(&alphabet);

if self.is_blocked_id(&id) {
let mut new_numbers;
let slice_len = min(self.min_length as usize - id.len(), alphabet.len());
let slice: Vec<char> = alphabet.iter().take(slice_len).cloned().collect();

if partitioned {
if numbers[0] + 1 > Self::max_value() {
return Err(Error::BlocklistOutOfRange);
} else {
new_numbers = numbers.to_vec();
new_numbers[0] += 1;
}
} else {
new_numbers = vec![0];
new_numbers.extend_from_slice(numbers);
id += &slice.iter().collect::<String>();
}
}

id = self.encode_numbers(&new_numbers, true)?;
if self.is_blocked_id(&id) {
id = self.encode_numbers(numbers, increment + 1)?;
}

Ok(id)
Expand Down Expand Up @@ -301,7 +245,7 @@ impl Sqids {
result
}

fn shuffle(&self, alphabet: &[char]) -> Vec<char> {
fn shuffle(alphabet: &[char]) -> Vec<char> {
let mut chars: Vec<char> = alphabet.to_vec();

for i in 0..(chars.len() - 1) {
Expand Down
Loading

0 comments on commit f8a03b4

Please sign in to comment.