I'm trying to get informations from a sci-fi Italian books catalog and I wrote a script 'Urania.it.ifs to retrieve them.
I have problems in loading the source code of the pages:
Code: Select all
(***************************************************
Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/
[Infos]
Authors=
Title=Urania.it.ifs
Description=
Site=
Language=IT
Version=
Requires=3.5.1
Comments=
License=
GetInfo=0
[Options]
***************************************************)
program Urania;
uses
StringUtils1; // Script needs external unit StringUtils1.pas in scripts folder !
var
ComicURL, UrlBase, ImgUrl, ComicSeries, ComicNumber, Collana: string; // Define some script variables
Page, SavePage, Value, saveValue : string;
CharAbNormal, CharNormal : String;
update: string;
Titolo_e_Periodicita: string;
sw_serie, StartDelimiter, endDelimiter : string;
numCollana : integer;
CharCut: integer;
DataPubbl,Mese_Pubblicazione: String;
ComicInit, ComicFine: Integer;
strComicInit, strComicFine: String;
// i, j: integer;
const
crlf = #13#10; // carriage return/line feed
// ***** Analyze Item's Page *****
// era procedure AnalyzePageAlborist(URL: String); // Variable "URL" is handed over (former variable "ComicURL")
procedure AnalyzePageAlborist; // Variable "URL" is handed over (former variable "ComicURL")
begin
Page := GetPage(ComicURL); // Fetch source code from website and store inside "Page"
Value := ''; // Make sure "Value" is empty
Value := TextBetween(Page, '<HTML>', '</HTML>'); // Extract the picture URL from "Page"
if length (Value) < 512 then
begin
showError ('Errore. collana ' + getfield(fieldMedia) + ' n.' + getfield(fieldMediaType) + '. URL errato *' + Page + '*');
exit;
showmessage ('proseguo con errori');
end
CharAbNormal := '<B';
CharNormal := '<b';
StringReplace(Page, CharAbNormal, CharNormal);
CharAbNormal := '</B';
CharNormal := '</b';
StringReplace(Page, CharAbNormal, CharNormal);
SavePage := Page;
// Picture import
Value := ''; // Make sure "Value" is empty
Value := TextBetween(Page, '<img src="', '"'); // Extract the picture URL from "Page"
if Value = '' then // If "Value" is still empty ( = no picture URL ) then..
Showmessage (URL + ' Immagine non trovata');
value := Urlbase + value;
if Value <> '' then // If "Value" now contains picture URL then..
GetPicture(Value); // .. download and save picture
// Serie import
Value := ''; // Make sure "Value" is empty
Value := TextBetween(Page, '<title>Archivio arretrati: scheda dell''albo di ', '</title>');
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
SetField(fieldSource, Value); // Save title to field Label
// Titolo tradotto
Value := '';
Value := TextBetween(Page, '<table border=0 cellspacing=0 cellpadding=0>', '</table>');
Titolo_e_Periodicita := Value;
Value := TextBetween(Value, '<b>', '</b>'); // Extract exact title from variable "Value" now
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
// Value := StringReplace(Value, '’', '''');
SetField(fieldTranslatedTitle, Value); // Save title to field TranslatedTitle
// Description / Storia
// struttura dei primi numeri
Value := '';
saveValue := '';
// Storia
Value := TextBetween(Page, '<table width=100% cellspacing=0 cellpadding=0 border=0>', '</DIV>'); // Extract description part from variable "Page"
Value := TextBetween(Value, '<font face="Arial" size=2>', 'In questo numero:'); // Extract exact description from variable "Value" now
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
saveValue := Value;
// ***************** inizio periodicità
StartDelimiter := '<font face="Arial" size=2>';
CharCut := Pos(StartDelimiter, Value);
EndDelimiter := '</font>';
Value := TextBetween(Page, StartDelimiter, EndDelimiter);
CharCut := CharCut + length(StartDelimiter) + length(Value) + length(EndDelimiter);
Delete(SaveValue, 1, Charcut); // stringa in cui cercare il prossimo campo
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
// DataPubbl := Value + '</font>'; //serie 18: contiene titolo + 'mensile' e ripristino fine delimiter
Mese_Pubblicazione := ''; // pulisco Mese_Pubblicazione
Mese_Pubblicazione := TextBetween(Page, '<font face="Verdana" size="1" color="#FFFFFF">', '</font>');
HTMLDecode(Mese_Pubblicazione); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Mese_Pubblicazione); // Clean title from HTML tags (if some exist)
Mese_Pubblicazione := FullTrim (Mese_Pubblicazione);
Value := Mese_Pubblicazione;
// Inizio Data di pubblicazione e periodicità
if (ComicSeries = '1') or (ComicSeries = '10') or (ComicSeries = '13')
or (ComicSeries = '17') or (ComicSeries = '18')
then
begin
Value := TextBetween(Titolo_e_Periodicita, ',', '<br>');
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
if Mese_pubblicazione <> '' then
Value := Mese_pubblicazione + ' - ' + Value;
end
// fine Data di pubblicazione e periodicità
SetField(fieldDirector, Value); // Save data pubblicazione to Field Director
// ***************** fine periodicità
// Comments + In questo numero
Value := TextBetween(page, '<DIV VALIGN=TOP>', ' </td>'); // Extract description part from variable "Page"
Value := TextBetween(value, '<DIV VALIGN=TOP><font face="Arial" size=2>', '</font></DIV>');
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length (Value) < 2 then // 2 as there must be crlf
showError ('Errore. collana ' + getfield(fieldMedia) + ' n.' + getfield(fieldMediaType) + '. Trama non presente');
saveValue := Value;
SetField(fieldDescription, saveValue); // Save description to field Description
// <td width="50%" valign="bottom" ><font color="#0080C0" face="Verdana" size=1>
// Comments / In questo numero
Value := TextBetween(Page, '<td width="50%" valign="bottom" ><font color="#0080C0" face="Verdana" size=1>', ' </td>');
Value := TextBetween(Value, '<strong>', '</strong>'); // Extract exact description from variable "Value" now
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value);
if length(Value) > 0 then
SetField(fieldComments, ('In questo numero: ' + Value)); // Save description to field Description
// fieldActors / Autori
EstraiAutori;
SetField(fieldActors, saveValue); // Save description to field Actors
end; // *********************** End of procedure "AnalyzePageAlborist" *****************************************
Procedure EstraiAutori;
begin // fieldActors / Autori
SaveValue := '';
Value := '';
Value := TextBetween(Page, 'Soggetto e sceneggiatura:', '</b>'); // NB: questo deve essere il primo della sequenza!
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := 'Soggetto e sceneggiatura: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto e Sceneggiatura:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto e sceneggiatura: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Testi:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Testi: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Testo, disegni e copertina:', '</b>)<br>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Testo, disegni e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Testo, disegni e copertina:', '</b><br>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Testo, disegni e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto, sceneggiatura, disegni e copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto, sceneggiatura, disegni e copertina:' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto, sceneggiatura e copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto, sceneggiatura e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto, sceneggiatura e disegni:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto, sceneggiatura e disegni: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Sceneggiatura:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Sceneggiatura: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Disegni e copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Disegni e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Matite:', '</b><br>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Matite: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Disegni:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Disegni: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Copertina: ' + Value + crlf;
end
end; // *********************** End of procedure "EstraiAutori" *****************************************
// ***** Beginning of the script *****
begin
if CheckVersion(3,5,0) then // Checks if Ant Movie Catalog version is 3.5.0 or higher
begin
ComicNumber := GetField(fieldMediaType);
if ComicNumber = '' then
Input('http://www.mondourania.com/urania/', ComicSeries + 'select the number of magazine: ', ComicNumber);
ComicInit := StrToInt(ComicNumber, 0);
ComicInit := (ComicInit div 20) * 20 + 1;
ComicFine := ComicInit + 19;
strComicInit := IntToStr(ComicInit);
strComicFine := IntToStr(ComicFine);
ComicURL := 'http://www.mondourania.com/urania/u' + StrComicInit + '-' + StrComicFine + '/urania' + ComicNumber + '.htm';
UrlBase := 'http://www.mondourania.com/urania/u' + StrComicInit + '-' + StrComicFine + '/';
Setfield(fieldURL, ComicURL); // Save variable URL to field URL
//era AnalyzePageAlborist(ComicURL; // Script hands over item URL and jumps to procedure AnalyzePageAlborist
AnalyzePageAlborist; // Script hands over item URL and jumps to procedure AnalyzePageAlborist
end
else
ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.0)');
// If Checkversion fails end.
end.
You can try selecting any number in the serie (from 1 to 2400).
Would you help me?
Thanks in advance for this wonderful program that helps me in retrieving a lot of the informations I like from the net!
Bye, Fulvio.