Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add non blocker script #25

Open
wants to merge 23 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
02518b1
add non blocker script
tomasdiaztoro Jul 29, 2024
df2a4f3
add logs tracking
tomasdiaztoro Aug 1, 2024
60edb30
Merge branch 'develop' into discover_entries_for_categories_non_blocker
tomasdiaztoro Aug 6, 2024
a2801b1
add product scraper
tomasdiaztoro Aug 6, 2024
cb4f1a2
add dummy scraper and dummy data
tomasdiaztoro Aug 8, 2024
4ad4039
Merge branch 'develop' into discover_entries_for_categories_non_blocker
tomasdiaztoro Aug 8, 2024
8a738bc
fix ES query
tomasdiaztoro Aug 8, 2024
0b3541e
add scrape_products arg and product count summary
tomasdiaztoro Aug 8, 2024
34ece74
Merge branch 'develop' into discover_entries_for_categories_non_blocker
tomasdiaztoro Aug 9, 2024
bb8102b
update
tomasdiaztoro Aug 9, 2024
d81cbcb
Merge branch 'develop' into discover_entries_for_categories_non_blocker
Aug 12, 2024
3460851
add positions and weight to logs
tomasdiaztoro Aug 12, 2024
6995c66
add products_non_blocker.py
tomasdiaztoro Aug 13, 2024
70730e8
update .gitignore
tomasdiaztoro Aug 13, 2024
94c2824
change ES query strategy
tomasdiaztoro Aug 14, 2024
a32d60c
change ES search logic
tomasdiaztoro Aug 14, 2024
3b08c67
Merge branch 'develop' into discover_entries_for_categories_non_blocker
tomasdiaztoro Aug 16, 2024
317a694
Change logs count strategy
tomasdiaztoro Aug 19, 2024
9d98433
fix duplicated log deletion
tomasdiaztoro Aug 19, 2024
a5b13a3
Add chord and with_async param
tomasdiaztoro Aug 20, 2024
9db0669
remove unused code
tomasdiaztoro Aug 20, 2024
5463ce5
add update log id
tomasdiaztoro Sep 3, 2024
8bba428
Add isolated mode
tomasdiaztoro Sep 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,8 @@ env
**/__pycache__
.idea
*.crt
<<<<<<< HEAD
.DS_Store
=======
.DS_Store
>>>>>>> develop
11 changes: 5 additions & 6 deletions storescraper/bin/celeryconfig/defaults.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import sys
sys.path.append('../..')

broker_url = 'amqp://storescraper:storescraper@localhost/storescraper'
result_backend = 'rpc://'
sys.path.append("../..")

imports = (
'storescraper.store'
)
broker_url = "redis://localhost:6379/0"
result_backend = "redis://localhost:6379/0"

imports = "storescraper.store"
50 changes: 50 additions & 0 deletions storescraper/bin/discover_entries_for_categories_non_blocker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import argparse
import json
import logging
import sys

sys.path.append("../..")

from storescraper.utils import get_store_class_by_name # noqa


def main():
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
logging.basicConfig(level=logging.WARNING, stream=sys.stdout)

parser = argparse.ArgumentParser(
description="Discovers the URLs of the given store and (optional) " "categories"
)
parser.add_argument("store", type=str, help="The name of the store to be parsed")
parser.add_argument(
"--categories", type=str, nargs="*", help="Specific categories to be parsed"
)
parser.add_argument(
"--with_async",
type=bool,
nargs="?",
default=False,
const=True,
help="Use asynchronous tasks (celery)",
)
parser.add_argument(
"--extra_args",
type=json.loads,
nargs="?",
default={},
help="Optional arguments to pass to the parser "
"(usually username/password) for private sites)",
)

args = parser.parse_args()
store = get_store_class_by_name(args.store)

store.discover_entries_for_categories_non_blocker(
categories=args.categories,
use_async=args.with_async,
extra_args=args.extra_args,
)


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions storescraper/bin/dummy-cell.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"urls": [
"https://dummy.com/producto-A1B2C3D",
"https://dummy.com/producto-4E5F6G7",
"https://dummy.com/producto-H8I9J0K",
"https://dummy.com/producto-L1M2N3O",
"https://dummy.com/producto-P4Q5R6S",
"https://dummy.com/producto-T7U8V9W",
"https://dummy.com/producto-X0Y1Z2A",
"https://dummy.com/producto-B3C4D5E",
"https://dummy.com/producto-F6G7H8I",
"https://dummy.com/producto-J9K0L1M",
"https://dummy.com/producto-N2O3P4Q",
"https://dummy.com/producto-R5S6T7U",
"https://dummy.com/producto-V8W9X0Y",
"https://dummy.com/producto-Z1A2B3C",
"https://dummy.com/producto-D4E5F6G",
"https://dummy.com/producto-H7I8J9K",
"https://dummy.com/producto-L0M1N2O",
"https://dummy.com/producto-P3Q4R5S",
"https://dummy.com/producto-T6U7V8W",
"https://dummy.com/producto-X9Y0Z1A"
]
}

105 changes: 105 additions & 0 deletions storescraper/bin/dummy-groceries.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"urls": [
"https://dummy.com/producto-abcde01",
"https://dummy.com/producto-fghij23",
"https://dummy.com/producto-klmno45",
"https://dummy.com/producto-pqrst67",
"https://dummy.com/producto-uvwxy89",
"https://dummy.com/producto-zabcd12",
"https://dummy.com/producto-efghi34",
"https://dummy.com/producto-jklmn56",
"https://dummy.com/producto-opqrs78",
"https://dummy.com/producto-tuvwx90",
"https://dummy.com/producto-yzabc23",
"https://dummy.com/producto-defgh45",
"https://dummy.com/producto-ijklm67",
"https://dummy.com/producto-nopqr89",
"https://dummy.com/producto-stuvw01",
"https://dummy.com/producto-xyzab23",
"https://dummy.com/producto-cdefg45",
"https://dummy.com/producto-hijkl67",
"https://dummy.com/producto-mnopq89",
"https://dummy.com/producto-rstuv01",
"https://dummy.com/producto-wxyza23",
"https://dummy.com/producto-bcdef45",
"https://dummy.com/producto-ghijk67",
"https://dummy.com/producto-lmnop89",
"https://dummy.com/producto-opqrs01",
"https://dummy.com/producto-tuvwx23",
"https://dummy.com/producto-yzabc45",
"https://dummy.com/producto-defgh67",
"https://dummy.com/producto-ijklm89",
"https://dummy.com/producto-nopqr01",
"https://dummy.com/producto-stuvw23",
"https://dummy.com/producto-xyzab45",
"https://dummy.com/producto-cdefg67",
"https://dummy.com/producto-hijkl89",
"https://dummy.com/producto-mnopq01",
"https://dummy.com/producto-rstuv23",
"https://dummy.com/producto-wxyza45",
"https://dummy.com/producto-bcdef67",
"https://dummy.com/producto-ghijk89",
"https://dummy.com/producto-lmnop01",
"https://dummy.com/producto-opqrs23",
"https://dummy.com/producto-tuvwx45",
"https://dummy.com/producto-yzabc67",
"https://dummy.com/producto-defgh89",
"https://dummy.com/producto-ijklm01",
"https://dummy.com/producto-nopqr23",
"https://dummy.com/producto-stuvw45",
"https://dummy.com/producto-xyzab67",
"https://dummy.com/producto-cdefg89",
"https://dummy.com/producto-hijkl01",
"https://dummy.com/producto-mnopq23",
"https://dummy.com/producto-rstuv45",
"https://dummy.com/producto-wxyza67",
"https://dummy.com/producto-bcdef89",
"https://dummy.com/producto-ghijk01",
"https://dummy.com/producto-lmnop23",
"https://dummy.com/producto-opqrs45",
"https://dummy.com/producto-tuvwx67",
"https://dummy.com/producto-yzabc89",
"https://dummy.com/producto-defgh01",
"https://dummy.com/producto-ijklm23",
"https://dummy.com/producto-nopqr45",
"https://dummy.com/producto-stuvw67",
"https://dummy.com/producto-xyzab89",
"https://dummy.com/producto-cdefg01",
"https://dummy.com/producto-hijkl23",
"https://dummy.com/producto-mnopq45",
"https://dummy.com/producto-rstuv67",
"https://dummy.com/producto-wxyza89",
"https://dummy.com/producto-bcdef01",
"https://dummy.com/producto-ghijk23",
"https://dummy.com/producto-lmnop45",
"https://dummy.com/producto-opqrs67",
"https://dummy.com/producto-tuvwx89",
"https://dummy.com/producto-yzabc01",
"https://dummy.com/producto-defgh23",
"https://dummy.com/producto-ijklm45",
"https://dummy.com/producto-nopqr67",
"https://dummy.com/producto-stuvw89",
"https://dummy.com/producto-xyzab01",
"https://dummy.com/producto-abcde01u",
"https://dummy.com/producto-fghij23u",
"https://dummy.com/producto-klmno45u",
"https://dummy.com/producto-pqrst67u",
"https://dummy.com/producto-uvwxy89u",
"https://dummy.com/producto-zabcd12u",
"https://dummy.com/producto-efghi34u",
"https://dummy.com/producto-jklmn56u",
"https://dummy.com/producto-opqrs78u",
"https://dummy.com/producto-tuvwx90u",
"https://dummy.com/producto-yzabc23u",
"https://dummy.com/producto-defgh45u",
"https://dummy.com/producto-ijklm67u",
"https://dummy.com/producto-nopqr89u",
"https://dummy.com/producto-stuvw01u",
"https://dummy.com/producto-xyzab23u",
"https://dummy.com/producto-cdefg45u",
"https://dummy.com/producto-hijkl67u",
"https://dummy.com/producto-mnopq89u",
"https://dummy.com/producto-rstuv01u"
]
}

24 changes: 24 additions & 0 deletions storescraper/bin/dummy-keyboard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-1234567",
"https://dummy.com/producto-2345678",
"https://dummy.com/producto-3456789",
"https://dummy.com/producto-4567890",
"https://dummy.com/producto-5678901",
"https://dummy.com/producto-6789012",
"https://dummy.com/producto-7890123",
"https://dummy.com/producto-8901234",
"https://dummy.com/producto-9012345",
"https://dummy.com/producto-1123456",
"https://dummy.com/producto-2234567",
"https://dummy.com/producto-3345678",
"https://dummy.com/producto-4456789",
"https://dummy.com/producto-5567890",
"https://dummy.com/producto-6678901",
"https://dummy.com/producto-7789012",
"https://dummy.com/producto-8890123",
"https://dummy.com/producto-9901234",
"https://dummy.com/producto-1012345",
"https://dummy.com/producto-2123456"
]
}
24 changes: 24 additions & 0 deletions storescraper/bin/dummy-microphone.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-12345678",
"https://dummy.com/producto-23456789",
"https://dummy.com/producto-34567890",
"https://dummy.com/producto-45678901",
"https://dummy.com/producto-56789012",
"https://dummy.com/producto-67890123",
"https://dummy.com/producto-78901234",
"https://dummy.com/producto-89012345",
"https://dummy.com/producto-90123456",
"https://dummy.com/producto-11234567",
"https://dummy.com/producto-22345678",
"https://dummy.com/producto-33456789",
"https://dummy.com/producto-44567890",
"https://dummy.com/producto-55678901",
"https://dummy.com/producto-66789012",
"https://dummy.com/producto-77890123",
"https://dummy.com/producto-88901234",
"https://dummy.com/producto-99012345",
"https://dummy.com/producto-10123456",
"https://dummy.com/producto-21234567"
]
}
24 changes: 24 additions & 0 deletions storescraper/bin/dummy-monitor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-123456789",
"https://dummy.com/producto-234567890",
"https://dummy.com/producto-345678901",
"https://dummy.com/producto-456789012",
"https://dummy.com/producto-567890123",
"https://dummy.com/producto-678901234",
"https://dummy.com/producto-789012345",
"https://dummy.com/producto-890123456",
"https://dummy.com/producto-901234567",
"https://dummy.com/producto-112345678",
"https://dummy.com/producto-223456789",
"https://dummy.com/producto-334567890",
"https://dummy.com/producto-445678901",
"https://dummy.com/producto-556789012",
"https://dummy.com/producto-667890123",
"https://dummy.com/producto-778901234",
"https://dummy.com/producto-889012345",
"https://dummy.com/producto-990123456",
"https://dummy.com/producto-101234567",
"https://dummy.com/producto-212345678"
]
}
24 changes: 24 additions & 0 deletions storescraper/bin/dummy-motherboard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-19435",
"https://dummy.com/producto-42761",
"https://dummy.com/producto-68593",
"https://dummy.com/producto-78432",
"https://dummy.com/producto-34829",
"https://dummy.com/producto-59614",
"https://dummy.com/producto-12357",
"https://dummy.com/producto-47189",
"https://dummy.com/producto-23974",
"https://dummy.com/producto-65832",
"https://dummy.com/producto-73928",
"https://dummy.com/producto-80147",
"https://dummy.com/producto-96725",
"https://dummy.com/producto-14295",
"https://dummy.com/producto-27394",
"https://dummy.com/producto-48052",
"https://dummy.com/producto-61923",
"https://dummy.com/producto-53789",
"https://dummy.com/producto-81247",
"https://dummy.com/producto-96254"
]
}
24 changes: 24 additions & 0 deletions storescraper/bin/dummy-mouse.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-5297",
"https://dummy.com/producto-6841",
"https://dummy.com/producto-3528",
"https://dummy.com/producto-7943",
"https://dummy.com/producto-4182",
"https://dummy.com/producto-6573",
"https://dummy.com/producto-9124",
"https://dummy.com/producto-3608",
"https://dummy.com/producto-7485",
"https://dummy.com/producto-5362",
"https://dummy.com/producto-6719",
"https://dummy.com/producto-8234",
"https://dummy.com/producto-5123",
"https://dummy.com/producto-4058",
"https://dummy.com/producto-7391",
"https://dummy.com/producto-6247",
"https://dummy.com/producto-8153",
"https://dummy.com/producto-4826",
"https://dummy.com/producto-5704",
"https://dummy.com/producto-2398"
]
}
24 changes: 24 additions & 0 deletions storescraper/bin/dummy-oven.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-8423",
"https://dummy.com/producto-5831",
"https://dummy.com/producto-4267",
"https://dummy.com/producto-7154",
"https://dummy.com/producto-3942",
"https://dummy.com/producto-6589",
"https://dummy.com/producto-7325",
"https://dummy.com/producto-8196",
"https://dummy.com/producto-5412",
"https://dummy.com/producto-6743",
"https://dummy.com/producto-2587",
"https://dummy.com/producto-9134",
"https://dummy.com/producto-3876",
"https://dummy.com/producto-4719",
"https://dummy.com/producto-6842",
"https://dummy.com/producto-5237",
"https://dummy.com/producto-7394",
"https://dummy.com/producto-8916",
"https://dummy.com/producto-4672",
"https://dummy.com/producto-3185"
]
}
24 changes: 24 additions & 0 deletions storescraper/bin/dummy-printer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"urls": [
"https://dummy.com/producto-7423",
"https://dummy.com/producto-5198",
"https://dummy.com/producto-6347",
"https://dummy.com/producto-2815",
"https://dummy.com/producto-9584",
"https://dummy.com/producto-3721",
"https://dummy.com/producto-8094",
"https://dummy.com/producto-1538",
"https://dummy.com/producto-6792",
"https://dummy.com/producto-8465",
"https://dummy.com/producto-4937",
"https://dummy.com/producto-2764",
"https://dummy.com/producto-9051",
"https://dummy.com/producto-3816",
"https://dummy.com/producto-7109",
"https://dummy.com/producto-5397",
"https://dummy.com/producto-6283",
"https://dummy.com/producto-8142",
"https://dummy.com/producto-4759",
"https://dummy.com/producto-3628"
]
}
Loading