Science Fictio 'Urania'
Posted: 2008-11-10 21:22:15
Dear Friends, I'm here with a donkey's question (as usual):
I'm trying to get informations from a sci-fi Italian books catalog and I wrote a script 'Urania.it.ifs to retrieve them.
I have problems in loading the source code of the pages:
One of the first things I do is to write the URL and, looking at the page pointed in the fieldUrl of the record selected it is showed to me... but the script gives an error.
You can try selecting any number in the serie (from 1 to 2400).
Would you help me?
Thanks in advance for this wonderful program that helps me in retrieving a lot of the informations I like from the net!
Bye, Fulvio.
I'm trying to get informations from a sci-fi Italian books catalog and I wrote a script 'Urania.it.ifs to retrieve them.
I have problems in loading the source code of the pages:
Code: Select all
(***************************************************
Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/
[Infos]
Authors=
Title=Urania.it.ifs
Description=
Site=
Language=IT
Version=
Requires=3.5.1
Comments=
License=
GetInfo=0
[Options]
***************************************************)
program Urania;
uses
StringUtils1; // Script needs external unit StringUtils1.pas in scripts folder !
var
ComicURL, UrlBase, ImgUrl, ComicSeries, ComicNumber, Collana: string; // Define some script variables
Page, SavePage, Value, saveValue : string;
CharAbNormal, CharNormal : String;
update: string;
Titolo_e_Periodicita: string;
sw_serie, StartDelimiter, endDelimiter : string;
numCollana : integer;
CharCut: integer;
DataPubbl,Mese_Pubblicazione: String;
ComicInit, ComicFine: Integer;
strComicInit, strComicFine: String;
// i, j: integer;
const
crlf = #13#10; // carriage return/line feed
// ***** Analyze Item's Page *****
// era procedure AnalyzePageAlborist(URL: String); // Variable "URL" is handed over (former variable "ComicURL")
procedure AnalyzePageAlborist; // Variable "URL" is handed over (former variable "ComicURL")
begin
Page := GetPage(ComicURL); // Fetch source code from website and store inside "Page"
Value := ''; // Make sure "Value" is empty
Value := TextBetween(Page, '<HTML>', '</HTML>'); // Extract the picture URL from "Page"
if length (Value) < 512 then
begin
showError ('Errore. collana ' + getfield(fieldMedia) + ' n.' + getfield(fieldMediaType) + '. URL errato *' + Page + '*');
exit;
showmessage ('proseguo con errori');
end
CharAbNormal := '<B';
CharNormal := '<b';
StringReplace(Page, CharAbNormal, CharNormal);
CharAbNormal := '</B';
CharNormal := '</b';
StringReplace(Page, CharAbNormal, CharNormal);
SavePage := Page;
// Picture import
Value := ''; // Make sure "Value" is empty
Value := TextBetween(Page, '<img src="', '"'); // Extract the picture URL from "Page"
if Value = '' then // If "Value" is still empty ( = no picture URL ) then..
Showmessage (URL + ' Immagine non trovata');
value := Urlbase + value;
if Value <> '' then // If "Value" now contains picture URL then..
GetPicture(Value); // .. download and save picture
// Serie import
Value := ''; // Make sure "Value" is empty
Value := TextBetween(Page, '<title>Archivio arretrati: scheda dell''albo di ', '</title>');
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
SetField(fieldSource, Value); // Save title to field Label
// Titolo tradotto
Value := '';
Value := TextBetween(Page, '<table border=0 cellspacing=0 cellpadding=0>', '</table>');
Titolo_e_Periodicita := Value;
Value := TextBetween(Value, '<b>', '</b>'); // Extract exact title from variable "Value" now
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
// Value := StringReplace(Value, '’', '''');
SetField(fieldTranslatedTitle, Value); // Save title to field TranslatedTitle
// Description / Storia
// struttura dei primi numeri
Value := '';
saveValue := '';
// Storia
Value := TextBetween(Page, '<table width=100% cellspacing=0 cellpadding=0 border=0>', '</DIV>'); // Extract description part from variable "Page"
Value := TextBetween(Value, '<font face="Arial" size=2>', 'In questo numero:'); // Extract exact description from variable "Value" now
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
saveValue := Value;
// ***************** inizio periodicità
StartDelimiter := '<font face="Arial" size=2>';
CharCut := Pos(StartDelimiter, Value);
EndDelimiter := '</font>';
Value := TextBetween(Page, StartDelimiter, EndDelimiter);
CharCut := CharCut + length(StartDelimiter) + length(Value) + length(EndDelimiter);
Delete(SaveValue, 1, Charcut); // stringa in cui cercare il prossimo campo
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
// DataPubbl := Value + '</font>'; //serie 18: contiene titolo + 'mensile' e ripristino fine delimiter
Mese_Pubblicazione := ''; // pulisco Mese_Pubblicazione
Mese_Pubblicazione := TextBetween(Page, '<font face="Verdana" size="1" color="#FFFFFF">', '</font>');
HTMLDecode(Mese_Pubblicazione); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Mese_Pubblicazione); // Clean title from HTML tags (if some exist)
Mese_Pubblicazione := FullTrim (Mese_Pubblicazione);
Value := Mese_Pubblicazione;
// Inizio Data di pubblicazione e periodicità
if (ComicSeries = '1') or (ComicSeries = '10') or (ComicSeries = '13')
or (ComicSeries = '17') or (ComicSeries = '18')
then
begin
Value := TextBetween(Titolo_e_Periodicita, ',', '<br>');
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value); // Clean title from HTML tags (if some exist)
Value := FullTrim (Value);
if Mese_pubblicazione <> '' then
Value := Mese_pubblicazione + ' - ' + Value;
end
// fine Data di pubblicazione e periodicità
SetField(fieldDirector, Value); // Save data pubblicazione to Field Director
// ***************** fine periodicità
// Comments + In questo numero
Value := TextBetween(page, '<DIV VALIGN=TOP>', ' </td>'); // Extract description part from variable "Page"
Value := TextBetween(value, '<DIV VALIGN=TOP><font face="Arial" size=2>', '</font></DIV>');
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length (Value) < 2 then // 2 as there must be crlf
showError ('Errore. collana ' + getfield(fieldMedia) + ' n.' + getfield(fieldMediaType) + '. Trama non presente');
saveValue := Value;
SetField(fieldDescription, saveValue); // Save description to field Description
// <td width="50%" valign="bottom" ><font color="#0080C0" face="Verdana" size=1>
// Comments / In questo numero
Value := TextBetween(Page, '<td width="50%" valign="bottom" ><font color="#0080C0" face="Verdana" size=1>', ' </td>');
Value := TextBetween(Value, '<strong>', '</strong>'); // Extract exact description from variable "Value" now
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value);
if length(Value) > 0 then
SetField(fieldComments, ('In questo numero: ' + Value)); // Save description to field Description
// fieldActors / Autori
EstraiAutori;
SetField(fieldActors, saveValue); // Save description to field Actors
end; // *********************** End of procedure "AnalyzePageAlborist" *****************************************
Procedure EstraiAutori;
begin // fieldActors / Autori
SaveValue := '';
Value := '';
Value := TextBetween(Page, 'Soggetto e sceneggiatura:', '</b>'); // NB: questo deve essere il primo della sequenza!
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := 'Soggetto e sceneggiatura: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto e Sceneggiatura:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto e sceneggiatura: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Testi:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Testi: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Testo, disegni e copertina:', '</b>)<br>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Testo, disegni e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Testo, disegni e copertina:', '</b><br>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Testo, disegni e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto, sceneggiatura, disegni e copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto, sceneggiatura, disegni e copertina:' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto, sceneggiatura e copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim(Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto, sceneggiatura e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Soggetto, sceneggiatura e disegni:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Soggetto, sceneggiatura e disegni: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Sceneggiatura:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := SaveValue + 'Sceneggiatura: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Disegni e copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Disegni e copertina: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Matite:', '</b><br>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Matite: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Disegni:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Disegni: ' + Value + crlf;
end
Value := '';
Value := TextBetween(Page, 'Copertina:', '</b>'); // Extract part from variable "Page"
HTMLDecode(Value); // Clean description from HTML codes (if some exist)
HTMLRemoveTags(Value);
Value := FullTrim (Value); // Clean up the description
if length(Value) > 0 then
begin
saveValue := saveValue + 'Copertina: ' + Value + crlf;
end
end; // *********************** End of procedure "EstraiAutori" *****************************************
// ***** Beginning of the script *****
begin
if CheckVersion(3,5,0) then // Checks if Ant Movie Catalog version is 3.5.0 or higher
begin
ComicNumber := GetField(fieldMediaType);
if ComicNumber = '' then
Input('http://www.mondourania.com/urania/', ComicSeries + 'select the number of magazine: ', ComicNumber);
ComicInit := StrToInt(ComicNumber, 0);
ComicInit := (ComicInit div 20) * 20 + 1;
ComicFine := ComicInit + 19;
strComicInit := IntToStr(ComicInit);
strComicFine := IntToStr(ComicFine);
ComicURL := 'http://www.mondourania.com/urania/u' + StrComicInit + '-' + StrComicFine + '/urania' + ComicNumber + '.htm';
UrlBase := 'http://www.mondourania.com/urania/u' + StrComicInit + '-' + StrComicFine + '/';
Setfield(fieldURL, ComicURL); // Save variable URL to field URL
//era AnalyzePageAlborist(ComicURL; // Script hands over item URL and jumps to procedure AnalyzePageAlborist
AnalyzePageAlborist; // Script hands over item URL and jumps to procedure AnalyzePageAlborist
end
else
ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.0)');
// If Checkversion fails end.
end.
You can try selecting any number in the serie (from 1 to 2400).
Would you help me?
Thanks in advance for this wonderful program that helps me in retrieving a lot of the informations I like from the net!
Bye, Fulvio.