Login

Stuff

Author:
NixonDash
Posted:
February 21, 2023
Language:
Python
Version:
3.2
Score:
0 (after 0 ratings)

Stuff

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# models.py
from django.db import models

class Project(models.Model):
    title = models.CharField(max_length=200)
    description = models.TextField()
    company_name = models.CharField(max_length=200)
    website = models.URLField()
    date_posted = models.DateField(auto_now_add=True)

# forms.py
from django import forms

class ScraperForm(forms.Form):
    keywords = forms.CharField(label='Keywords', max_length=200)
    websites = forms.CharField(label='Websites', max_length=200)

# views.py
from django.shortcuts import render
from .models import Project
from .forms import ScraperForm
import requests
from bs4 import BeautifulSoup
from django.utils import timezone

def scrape_projects(request):
    if request.method == 'POST':
        form = ScraperForm(request.POST)
        if form.is_valid():
            keywords = form.cleaned_data['keywords'].split()
            websites = form.cleaned_data['websites'].split()
            for website in websites:
                url = website.strip()
                response = requests.get(url)
                soup = BeautifulSoup(response.content, 'html.parser')
                for keyword in keywords:
                    projects = soup.find_all(string=lambda text: text and keyword.lower() in text.lower())
                    for project in projects:
                        # Extract project information
                        title = project.find('h2').text.strip()
                        description = project.find('p').text.strip()
                        company_name = project.find('span', class_='company-name').text.strip()
                        website = url
                        date_posted = timezone.now()
                        # Save project to database
                        project_obj = Project(title=title, description=description, company_name=company_name, website=website, date_posted=date_posted)
                        project_obj.save()
            projects = Project.objects.all()
            return render(request, 'projects.html', {'projects': projects})
    else:
        form = ScraperForm()
    return render(request, 'index.html', {'form': form})

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 3 months, 1 week ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 3 months, 2 weeks ago
  3. Serializer factory with Django Rest Framework by julio 10 months, 2 weeks ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 11 months ago
  5. Help text hyperlinks by sa2812 12 months ago

Comments

Please login first before commenting.