Total Blog Views: 67
Blog Status: publish
Created By: swazahmad Created at: 08-14-2021
Tags: ruby on rails data scraping webscraping wikipedia scrape any website data scrapin expert ruby on rails
The ruby on rails Application to scrape the link and find the occurrence of link in particular page In the application
First Of We will create New Rails Application and configure DB
rails new
scrape_wikipedia
Add Below Gem In GemFile
gem 'bootstrap', '4.0.0.alpha6'
gem 'popper_js', '~> 1.11.1'
source 'https://rails-assets.org' do
gem 'rails-assets-chosen'
gem 'rails-assets-tether', '>= 1.3.3'
end
gem 'bootstrap_form', '~> 4.0'
gem 'jquery-datatables-rails', '~> 3.4.0'
gem 'jquery-rails'
gem 'jquery-ui-rails'
gem 'select2-rails'
# Use ActiveRecord Sessions
gem 'carrierwave', '~> 2.0'
gem 'devise', '~> 4.7', '>= 4.7.2'
gem 'cloudinary'
gem 'httparty'
gem 'nokogiri'
gem 'pagy'
gem 'sweetalert-rails'
gem 'will_paginate-bootstrap'
bundle install
Update application.js file
//= require jquery
//= require popper
//= require tether
//= require chosen
//= require bootstrap-sprockets
//= require rails-ujs
//= require activestorage
//= require select2
//= require sweetalert
//= require turbolinks
//= require_tree .
*= require_tree . *= require select2 *= require sweetalert *= require select2-bootstrap *= require_self */ @import "bootstrap"; @import "rails_bootstrap_forms"; [type="file"] { height: 0; overflow: hidden; width: 0; } [type="file"] + label { background: #f15d22; border: none; border-radius: 5px; color: #fff; cursor: pointer; display: inline-block; font-family: 'Rubik', sans-serif; font-size: inherit; font-weight: 500; margin-bottom: 1rem; outline: none; padding: 1rem 50px; position: relative; transition: all 0.3s; vertical-align: middle; &:hover { background-color: darken(#f15d22, 10%); } }
$ rails g scaffold
UploadCsv users:text csv_file:string generated_csv:string
class UploadCsvsController < ApplicationController before_action :set_upload_csv, only: %i[show edit update destroy] require 'csv' require 'httparty' require 'nokogiri' # GET /upload_csvs # GET /upload_csvs.json def index @pagy, @upload_csvs = pagy(UploadCsv.all, page: params[:page], items: 5) end # GET /upload_csvs/1 # GET /upload_csvs/1.json def show; end # GET /upload_csvs/new def new @upload_csv = UploadCsv.new end # GET /upload_csvs/1/edit def edit; end # POST /upload_csvs # POST /upload_csvs.json def create @upload_csv = params[:upload_csv] ? UploadCsv.new(upload_csv_params) : UploadCsv.new respond_to do |format| if @upload_csv.save format.html { redirect_to root_path, notice: 'Uploaded csv, we will let you know once that is processed and email will be deliver.' } format.json { render :show, status: :created, location: @upload_csv } else format.html { render :new } format.json { render json: @upload_csv.errors, status: :unprocessable_entity } end end end # PATCH/PUT /upload_csvs/1 # PATCH/PUT /upload_csvs/1.json def update respond_to do |format| if @upload_csv.update(upload_csv_params) format.html { redirect_to @upload_csv, notice: 'Upload csv was successfully updated.' } format.json { render :show, status: :ok, location: @upload_csv } else format.html { render :edit } format.json { render json: @upload_csv.errors, status: :unprocessable_entity } end end end # DELETE /upload_csvs/1 # DELETE /upload_csvs/1.json def destroy @upload_csv.destroy respond_to do |format| format.html { redirect_to upload_csvs_url, notice: 'Upload csv was successfully destroyed.' } format.json { head :no_content } end end private # Use callbacks to share common setup or constraints between actions. def set_upload_csv @upload_csv = UploadCsv.find(params[:id]) end # Only allow a list of trusted parameters through. def upload_csv_params params.require(:upload_csv).permit(:csv_file, :generated_csv, users: []) end end
<div class="row"> <%= bootstrap_form_with(model: upload_csv, local: true) do |form| %> <% if upload_csv.errors.any? %> <div id="error_explanation"> <h2><%= pluralize(upload_csv.errors.count, "error") %> prohibited this upload_csv from being saved:</h2> <ul> <% upload_csv.errors.full_messages.each do |message| %> <li><%= message %></li> <% end %> </ul> </div> <% end %> <select class="js-searchable" name="upload_csv[users][]" multiple="multiple" style="width: 270px;"> </select> <%= form.file_field :csv_file %> <div class="actions"> <%= form.submit %> </div> <% end %> </div> <script type="text/javascript"> $(document).ready(function(){ $(".js-searchable").select2({ tags: true, placeholder: "Add multiple email Address only email allowed.", tokenSeparators: [','], createTag: function(term, data) { var value = term.term; if(validateEmail(value)) { return { id: value, text: value }; }else return null; } }) }) function validateEmail(email) { var re = /^(([^<>()[\]\\.,;:\s@"]+(\.[^<>()[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/; return re.test(email); } </script>
$ rails generate uploader Avatar
class UploadCsv < ApplicationRecord validates :csv_file, presence: true validates :users, presence: true mount_uploader :csv_file, AvatarUploader after_create :processed_csv, on: :create def processed_csv GenrateCsvJob.perform_later(self) end end
Let us Generate A Job To Perform Scraping Data From The Provided CSV and send email To List of users
jobs/genrate_csv_job.rb
class GenrateCsvJob < ApplicationJob
queue_as :default
def perform(upload_csv)
processed_csv(upload_csv)
file = Tempfile.open(["#{Rails.root}/public/generated_csv", '.csv']) do |csv|
csv << %w[referal_link home_link count]
@new_array.each do |new_array|
csv << new_array
end
file = "#{Rails.root}/public/product_data.csv"
headers = ['referal_link', 'home_link', 'count']
file = CSV.open(file, 'w', write_headers: true, headers: headers) do |writer|
@new_array.each do |new_array|
writer << new_array
end
upload_csv.update(generated_csv: file)
end
end
if @new_array.present?
UserMailer.send_csv(upload_csv).deliver_now!
end
end
# Method to get the link count and stores in the array
def processed_csv(upload_csv)
@new_array = []
CSV.foreach(upload_csv.csv_file.path, headers: true, header_converters: :symbol) do |row|
row_map = row.to_h
page = HTTParty.get(row_map[:refferal_link])
page_parse = Nokogiri::HTML(page)
link_array = page_parse.css('a').map { |link| link['href'] }
link_array_group = link_array.group_by(&:itself).map { |k, v| [k, v.length] }.to_h
@new_array.push([row_map[:refferal_link], row_map[:home_link], (link_array_group[row_map[:home_link]]).to_s])
end
end
end
Generate UserMailer to send send_csv to list of users with attached CSV
$
rails generate mailer UserMailer send_csv
Update mailers/user_mailer.rb
class UserMailer < ApplicationMailer
def send_csv(upload_csv)
@greeting = 'Hi'
attachments['parsed.csv'] = File.read(upload_csv.generated_csv)
mail(to: JSON.parse(upload_csv.users), subject: 'Csv is parsed succesfully.')
end
end
we have the “Get things executed” lifestyle at our place of work. There are not any excuses, no if’s or however’s in our dictionary. committed to navigating the ship of creativity to create cell answers, we resolve the real-lifestyles troubles of our clients and their clients. Our passion for work has won us many awards, year after 12 months.
© Copyright Shadbox. All Rights Reserved
Rate Blog :
Share on :
Do you have any blog suggestion? please click on the link