diff --git a/CHANGES.rst b/CHANGES.rst index 40dec15..15f514f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,9 +1,27 @@ 0.0.5 (Unreleased) ================== +Features +-------- + +- ROMSearch now has two modes: the first is `filter_then_download` (default), which will use the dat file to filter, + then only download relevant files. The second is `download_then_filter`, which will download everything and then + filter. For data hoarders! + Fixes ----- +GameFinder +~~~~~~~~~~ + +- Ensure includes/excludes works the same as it does for ROMDownloader +- Includes/excludes will now search dupes as well, for consistency + +ROMDownloader +~~~~~~~~~~~~~ + +- Ensure output directory exists before downloading files + General ~~~~~~~ diff --git a/docs/configs/config.rst b/docs/configs/config.rst index d255012..da9f892 100644 --- a/docs/configs/config.rst +++ b/docs/configs/config.rst @@ -4,6 +4,9 @@ config The ``config.yml`` file defines how ROMSearch will do the run. As such, it has quite a number of options. +As a note on includes, this will match something from the start of the string. So "Game Title VII" would include +"Game Title VII", "Game Title VIII", but not "Game Title Anthology - Game Title VII", for example. + Syntax: :: dirs: @@ -31,6 +34,8 @@ Syntax: :: - [game] romsearch: # ROMSearch specific options + method: 'filter_then_download' # OPTIONAL. Method to use, option are 'filter_then_download', or + # 'download_then_filter'. Defaults to 'filter_then_download' run_romdownloader: true # OPTIONAL. Whether to run ROMDownloader. Defaults to true run_datparser: true # OPTIONAL. Whether to run DATParser. Defaults to true run_dupeparser: true # OPTIONAL. Whether to run DupeParsed. Defaults to true @@ -58,15 +63,15 @@ Syntax: :: romchooser: # ROMChooser specific options dry_run: false # OPTIONAL. Set to true to not make any changes to filesystem. Defaults to false use_best_version: true # OPTIONAL. Whether to choose only what ROMChooser decides is the best version. - Defaults to true + # Defaults to true allow_multiple_regions: false # OPTIONAL. If true, will allow files from multiple regions, else will choose the - highest region in the list. Defaults to false + # highest region in the list. Defaults to false filter_regions: true # OPTIONAL. Whether to filter by region or not. Defaults to true filter_languages: true # OPTIONAL. Whether to filter by language or not. Defaults to true bool_filters: "all_but_games" # OPTIONAL. Can filter out non-games by various dat categories. If you want to - include e.g. just games and applications, set to - ['games', 'applications']. Defaults to 'all_but_games', which will - remove everything except games + # include e.g. just games and applications, set to + # ['games', 'applications']. Defaults to 'all_but_games', which will + # remove everything except games discord: # OPTIONAL. If defined, supply a webhook URL so that ROMSearch can post Discord webhook_url: [webhook_url] # notifications diff --git a/docs/intro.rst b/docs/intro.rst index d151270..ffb8f85 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -27,6 +27,11 @@ ROMSearch offers the ability to: To get started, see the :doc:`installation ` and :doc:`configuration ` pages. For the philosophy behind how ROMSearch chooses a ROM, see :doc:`1G1R <1g1r>`. +ROMSearch offers two modes: the default is "filter, then download" which will use the .dat file to find the best ROMs +and only download those. For data hoarders, we also offer a "download, then filter" option, which will download +everything and then filter from the downloaded files. For more details, see the +:doc:`ROMSearch module docs `. + Currently, ROMSearch is in early development, and so many features may be added over time. At the moment, ROMSearch has the capability for: diff --git a/docs/known_issues.rst b/docs/known_issues.rst index dbd8e5f..1c4a6c4 100644 --- a/docs/known_issues.rst +++ b/docs/known_issues.rst @@ -2,6 +2,10 @@ Known Issues ############ +* In GameFinder, includes/excludes can cause some unexpected behavior since it will also search through duplicate files. + For example, having an include of "Crash Bandicoot" for the PS1 will also grab "Crash Bash" and + "CTR - Crash Team Racing" since at least one of their duplicates starts with "Crash Bandicoot". + * Currently, the code is not aware of ``retool``'s supersets or compilations array. * Occasionally, multiple ROMs will be found with the same priority. diff --git a/docs/modules/romsearch.rst b/docs/modules/romsearch.rst index ec340f8..52679b0 100644 --- a/docs/modules/romsearch.rst +++ b/docs/modules/romsearch.rst @@ -5,6 +5,10 @@ ROMSearch This is the main part that controls the various other modules. It essentially calls everything (given user preferences), and so while it doesn't really do all that much on its own, is the interface to everything else. +ROMSearch has 2 modes, the default will parse from the .dat file then download relevant files, to minimize disc space +used (`filter_then_download`). For completionists/data hoarders, there's also a `download_then_filter` option, which +will download and then filter from the downloaded files. + For more details on the ROMSearch arguments, see the :doc:`config file documentation <../configs/config>`. API diff --git a/romsearch/configs/sample_config.yml b/romsearch/configs/sample_config.yml index 308db66..7b91595 100644 --- a/romsearch/configs/sample_config.yml +++ b/romsearch/configs/sample_config.yml @@ -21,6 +21,7 @@ include_games: - "Chrono Trigger" romsearch: + method: "filter_then_download" run_romdownloader: true run_datparser: true run_dupeparser: true diff --git a/romsearch/modules/gamefinder.py b/romsearch/modules/gamefinder.py index 84a5241..c75f50f 100644 --- a/romsearch/modules/gamefinder.py +++ b/romsearch/modules/gamefinder.py @@ -133,24 +133,6 @@ def get_game_dict(self, regex_config=self.regex_config, ) - # Remove any excluded files - if self.exclude_games is not None: - games_to_remove = self.get_game_matches(games, - self.exclude_games, - ) - - if games_to_remove is not None: - for i in sorted(games_to_remove, reverse=True): - games.pop(i) - - # Include only included files - if self.include_games is not None: - games_to_include = self.get_game_matches(games, - self.include_games, - ) - if games_to_include is not None: - games = np.asarray(games)[games_to_include] - # We need to trim down dupes here. Otherwise, the # dict is just the list we already have game_dict = None @@ -166,10 +148,41 @@ def get_game_dict(self, game_dict[game] = {"priority": 1, } + # Remove any excluded files + if self.exclude_games is not None: + games_to_remove = self.get_game_matches(game_dict, + self.exclude_games, + ) + if games_to_remove is not None: + for g in games_to_remove: + del game_dict[g] + + # Include only included files + if self.include_games is not None: + + games_to_include = self.get_game_matches(game_dict, + self.include_games, + ) + if games_to_include is not None: + + filtered_game_dict = {} + for g in games_to_include: + filtered_game_dict[g] = game_dict[g] + + game_dict = copy.deepcopy(filtered_game_dict) + return game_dict - def get_game_matches(self, files, games_to_match): - """Get files that match an input list (games_to_match)""" + def get_game_matches(self, + game_dict, + games_to_match, + ): + """Get files that match an input dictionary (so as to properly handle dupes + + Args: + - game_dict (dict): Dictionary of games to match against + - games_to_match (list): List of values to match against + """ games_matched = [] if isinstance(games_to_match, dict): @@ -182,22 +195,41 @@ def get_game_matches(self, files, games_to_match): games_matched.extend(games_to_match) - idx = [] - for i, f in enumerate(files): + game_dict_keys = [] + for g in game_dict: found_f = False - # Search within each item since the matches might not be exact + for game_matched in games_matched: if found_f: continue - re_find = re.findall(f"{game_matched}*", f) + # Look in the group name + re_find = re.findall(f"^({re.escape(game_matched)}).*", g) if len(re_find) > 0: - idx.append(i) + game_dict_keys.append(g) found_f = True - return idx + # If not found, look in the dupe names + if not found_f: + for g_d in game_dict[g]: + + if found_f: + continue + + re_find = re.findall(f"^({re.escape(game_matched)}).*", g_d) + + if len(re_find) > 0: + game_dict_keys.append(g) + found_f = True + + game_dict_keys = np.unique(game_dict_keys) + + if len(game_dict_keys) == 0: + game_dict_keys = None + + return game_dict_keys def get_filter_dupes(self, games): """Parse down a list of files based on an input dupe list""" diff --git a/romsearch/modules/romdownloader.py b/romsearch/modules/romdownloader.py index 7716f2b..be52c9d 100644 --- a/romsearch/modules/romdownloader.py +++ b/romsearch/modules/romdownloader.py @@ -13,8 +13,8 @@ def add_rclone_filter(pattern=None, - # bracketed_pattern=None, filter_type="include", + include_wildcard=True, ): if filter_type == "include": filter_str = "+" @@ -26,14 +26,11 @@ def add_rclone_filter(pattern=None, # rclone wants double curly braces which we need to escape in python strings (yum) filter_pattern = "" - # # Add in non-bracketed stuff (i.e. game names) at the start - # if non_bracketed_pattern is not None: - # filter_pattern += f"{{{{{non_bracketed_pattern}}}}}" - # - # filter_pattern += "*" - if pattern is not None: - filter_pattern += f"{{{{{pattern}}}}}*" + filter_pattern += f"{{{{{pattern}}}}}" + + if include_wildcard: + filter_pattern += "*" cmd = f' --filter "{filter_str} {filter_pattern}"' @@ -63,6 +60,9 @@ def __init__(self, config=None, platform_config=None, logger=None, + override_includes=None, + override_excludes=None, + include_filter_wildcard=True, ): """Downloader tool via rclone @@ -74,6 +74,12 @@ def __init__(self, config (dict, optional): Configuration dictionary. Defaults to None platform_config (dict, optional): Platform configuration dictionary. Defaults to None logger (logging.Logger, optional): Logger instance. Defaults to None + override_includes (list, optional): If set, will override the config includes with custom + ones. Defaults to None. + override_excludes (list, optional): If set, will override the config excludes with custom + ones. Defaults to None. + include_filter_wildcard (bool, optional): If set, will include wildcards in rclone filters. Defaults to + True. """ if platform is None: @@ -107,6 +113,10 @@ def __init__(self, include_games = include_games.get(platform, None) else: include_games = copy.deepcopy(include_games) + + if override_includes is not None: + include_games = copy.deepcopy(override_includes) + self.include_games = include_games exclude_games = self.config.get("exclude_games", None) @@ -114,6 +124,10 @@ def __init__(self, exclude_games = exclude_games.get(platform, None) else: exclude_games = copy.deepcopy(exclude_games) + + if override_excludes is not None: + exclude_games = copy.deepcopy(override_excludes) + self.exclude_games = exclude_games remote_name = self.config.get("romdownloader", {}).get("remote_name", None) @@ -122,8 +136,15 @@ def __init__(self, self.remote_name = remote_name sync_all = self.config.get("romdownloader", {}).get("sync_all", True) + + # If we have includes or excludes, force sync all False + if self.include_games is not None or self.exclude_games is not None: + sync_all = False + self.sync_all = sync_all + self.include_filter_wildcard = include_filter_wildcard + # Read in the specific platform configuration mod_dir = os.path.dirname(romsearch.__file__) @@ -213,6 +234,7 @@ def rclone_sync(self, if pattern: cmd += add_rclone_filter(pattern=pattern, filter_type="exclude", + include_wildcard=self.include_wildcard, ) # Now onto positive filters @@ -230,6 +252,7 @@ def rclone_sync(self, if pattern: cmd += add_rclone_filter(pattern=pattern, filter_type="include", + include_wildcard=self.include_filter_wildcard, ) cmd += ' --filter "- *"' @@ -238,7 +261,10 @@ def rclone_sync(self, self.logger.info(f"Dry run, would rclone_sync with:") self.logger.info(cmd) else: - # os.system(cmd) + + if not os.path.exists(self.out_dir): + os.makedirs(self.out_dir) + # Execute the command and capture the output with subprocess.Popen(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as process: for line in process.stdout: @@ -267,6 +293,8 @@ def post_to_discord(self, if len(items_added) > 0: + items_added.sort() + for items_split in split(items_added, chunk_size=max_per_message): fields = [] @@ -284,6 +312,8 @@ def post_to_discord(self, if len(items_deleted) > 0: + items_deleted.sort() + for items_split in split(items_deleted, chunk_size=max_per_message): fields = [] diff --git a/romsearch/modules/romparser.py b/romsearch/modules/romparser.py index e1fb3fd..4a8e8a3 100644 --- a/romsearch/modules/romparser.py +++ b/romsearch/modules/romparser.py @@ -1,12 +1,10 @@ import copy import os import re -import time -from datetime import datetime import romsearch from ..util import (setup_logger, - + get_file_time, load_yml, load_json, get_game_name, @@ -70,21 +68,6 @@ def get_pattern_val(regex, return pattern_val -def get_file_time(f, - datetime_format, - ): - """Get created file time from the file itself""" - - if os.path.exists(f): - ti_m = os.path.getmtime(f) - date_ti_m = datetime.strptime(time.ctime(ti_m), "%a %b %d %H:%M:%S %Y") - else: - date_ti_m = datetime(year=1900, month=1, day=1, hour=0, minute=0, second=0) - date_ti_m_str = date_ti_m.strftime(format=datetime_format) - - return date_ti_m_str - - class ROMParser: def __init__(self, diff --git a/romsearch/modules/romsearch.py b/romsearch/modules/romsearch.py index 63dcf4e..1b8f29e 100644 --- a/romsearch/modules/romsearch.py +++ b/romsearch/modules/romsearch.py @@ -1,5 +1,9 @@ +import copy import glob import os +import re + +import numpy as np import romsearch from .datparser import DATParser @@ -10,12 +14,19 @@ from .rommover import ROMMover from .romparser import ROMParser from ..util import (load_yml, + load_json, setup_logger, discord_push, split, get_short_name, + get_file_time, ) +ALLOWED_ROMSEARCH_METHODS = [ + "filter_then_download", + "download_then_filter", +] + class ROMSearch: @@ -80,6 +91,8 @@ def __init__(self, raise ValueError(f"Platforms should be any of {self.default_config['platforms']}, not {platform}") self.platforms = platforms + self.romsearch_method = self.config.get("romsearch", {}).get("method", "filter_then_download") + # Which modules to run self.run_romdownloader = self.config.get("romsearch", {}).get("run_romdownloader", True) self.run_datparser = self.config.get("romsearch", {}).get("run_datparser", True) @@ -96,8 +109,6 @@ def run(self): self.logger.info(f"Looping over platforms: {', '.join(self.platforms)}") - all_roms_per_platform = {} - for platform in self.platforms: self.logger.info(f"Running ROMSearch for {platform}") @@ -108,37 +119,6 @@ def run(self): raw_dir = os.path.join(self.raw_dir, platform) - # Run the rclone sync - if self.run_romdownloader: - downloader = ROMDownloader(platform=platform, - config=self.config, - platform_config=platform_config, - logger=self.logger, - ) - downloader.run() - - # Get the original directory, so we can safely move back after - orig_dir = os.getcwd() - os.chdir(raw_dir) - - all_files = glob.glob("*.zip") - all_files.sort() - - # Parse this into a dictionary with some useful info for each file - all_file_dict = {} - for f in all_files: - short_name = get_short_name(f, - regex_config=self.regex_config, - default_config=self.default_config, - ) - - all_file_dict[f] = { - "short_name": short_name, - "matched": False, - } - - os.chdir(orig_dir) - # Parse DAT files here, if we're doing that if self.run_datparser: dat_parser = DATParser(platform=platform, @@ -158,6 +138,58 @@ def run(self): ) dupe_parser.run() + if self.romsearch_method == "download_then_filter": + # Run the rclone sync + if self.run_romdownloader: + downloader = ROMDownloader(platform=platform, + config=self.config, + platform_config=platform_config, + logger=self.logger, + ) + downloader.run() + + # Get the original directory, so we can safely move back after + orig_dir = os.getcwd() + os.chdir(raw_dir) + + all_files = glob.glob("*.zip") + all_files.sort() + + os.chdir(orig_dir) + + elif self.romsearch_method == "filter_then_download": + + if not self.run_datparser: + raise ValueError("If using filter, then download method, you must run DATParser") + + parsed_dat_dir = self.config.get("dirs", {}).get("parsed_dat_dir", None) + if parsed_dat_dir is None: + raise ValueError("parsed_dat_dir needs to be defined in config") + + parsed_dat_file = os.path.join(parsed_dat_dir, f"{platform} (dat parsed).json") + parsed_dat = load_json(parsed_dat_file) + + all_files = [f"{f}.zip" for f in parsed_dat] + all_files = np.unique(all_files) + all_files.sort() + + else: + + raise ValueError(f"ROMSearch method should be one of {ALLOWED_ROMSEARCH_METHODS}") + + # Parse this into a dictionary with some useful info for each file + all_file_dict = {} + for f in all_files: + short_name = get_short_name(f, + regex_config=self.regex_config, + default_config=self.default_config, + ) + + all_file_dict[f] = { + "short_name": short_name, + "matched": False, + } + # Find files finder = GameFinder(platform=platform, config=self.config, @@ -170,6 +202,7 @@ def run(self): self.logger.info(f"Searching through {len(all_games)} game(s)") all_roms_moved = [] + all_roms_dict = {} for i, game in enumerate(all_games): @@ -222,13 +255,56 @@ def run(self): rom_files = [f for f in rom_dict] self.logger.info(f"Found ROM file(s): {', '.join(rom_files)}") - if self.dry_run: - self.logger.info("Dry run, will not move any files") - continue + # Save to a big dictionary, since we'll move all at once + all_roms_dict[game] = rom_dict - if not self.run_rommover: - self.logger.debug("ROMMover is not running, will not move anything") - continue + for f in all_file_dict: + if not all_file_dict[f]["matched"]: + self.logger.debug(f"{f} not matched to anything") + + if self.dry_run: + self.logger.info("Dry run, will not move any files") + continue + + if not self.run_rommover: + self.logger.debug("ROMMover is not running, will not move anything") + continue + + # If we filter then download, this is where we download + if self.romsearch_method == "filter_then_download": + all_files = [] + for game in all_roms_dict: + + # Make sure we're using the regex escaped version for all the strings + fs = [re.escape(f) for f in all_roms_dict[game]] + + all_files.extend(fs) + + if self.run_romdownloader: + downloader = ROMDownloader(platform=platform, + config=self.config, + platform_config=platform_config, + logger=self.logger, + override_includes=all_files, + override_excludes=[], + include_filter_wildcard=False, + ) + downloader.run() + + # Replace the file time with the correct one on disk + for game in all_roms_dict: + + for f in all_roms_dict[game]: + full_filename = os.path.join(self.raw_dir, platform, f) + + file_mod_time = get_file_time(full_filename, + datetime_format=self.default_config["datetime_format"], + ) + all_roms_dict[game][f]["file_mod_time"] = file_mod_time + + for game in all_roms_dict: + + rom_dict = all_roms_dict[game] mover = ROMMover(platform=platform, game=game, @@ -239,9 +315,6 @@ def run(self): roms_moved = mover.run(rom_dict) all_roms_moved.extend(roms_moved) - if len(all_roms_moved) > 0: - all_roms_per_platform[platform] = all_roms_moved - # Post these to Discord in chunks of 10 if self.discord_url is not None and len(all_roms_moved) > 0: @@ -260,8 +333,4 @@ def run(self): fields=fields, ) - for f in all_file_dict: - if not all_file_dict[f]["matched"]: - self.logger.warning(f"{f} not matched to anything") - return True diff --git a/romsearch/util/__init__.py b/romsearch/util/__init__.py index e84a27e..0eecc83 100644 --- a/romsearch/util/__init__.py +++ b/romsearch/util/__init__.py @@ -1,5 +1,5 @@ from .discord import discord_push -from .general import split, get_parent_name +from .general import split, get_parent_name, get_file_time from .io import load_yml, unzip_file, load_json, save_json from .logger import setup_logger from .regex_matching import get_file_pattern, get_bracketed_file_pattern, get_game_name, get_short_name @@ -17,4 +17,5 @@ "discord_push", "split", "get_parent_name", + "get_file_time", ] diff --git a/romsearch/util/general.py b/romsearch/util/general.py index de8af2e..b343ff7 100644 --- a/romsearch/util/general.py +++ b/romsearch/util/general.py @@ -1,4 +1,8 @@ import copy +import os +import time +from datetime import datetime + def split(full_list, chunk_size=10): """Split a list in chunks of size chunk_size @@ -53,3 +57,18 @@ def get_parent_name(game_name, raise ValueError("Could not find a parent name!") return found_parent_name + + +def get_file_time(f, + datetime_format, + ): + """Get created file time from the file itself""" + + if os.path.exists(f): + ti_m = os.path.getmtime(f) + date_ti_m = datetime.strptime(time.ctime(ti_m), "%a %b %d %H:%M:%S %Y") + else: + date_ti_m = datetime(year=1900, month=1, day=1, hour=0, minute=0, second=0) + date_ti_m_str = date_ti_m.strftime(format=datetime_format) + + return date_ti_m_str