%% Facts
%genome_dir('/mnt/home/knw/Robot_Scientist/KEGG/genomes/').
%sequence_dir('/mnt/home/knw/Robot_Scientist/KEGG/sequences/').
genome_dir('/home/condor/enzyme_files/mnt/home/knw/Robot_Scientist/KEGG/genomes/').
sequence_dir('/home/condor/enzyme_files/mnt/home/knw/Robot_Scientist/KEGG/sequences/').
%% added by maria to output all the likely orfs
%output_dir('find_orfs/').
output_dir('/home/condor/enzyme_files/find_orfs/').
match_method(fasta,'fasta34 -q ').
match_method(ssearch,'ssearch34 -q ').
%% Change by maria to include BLAST


%% genes already searched before cluster node crashed on 3/12/2006
done_genes(ThisEnzyme,EList):-
(ThisEnzyme == '1.1.1.17',
  EList=['bab_bbp516','bas_BUsg551','bha_BH3851','bsu_BG11216','buc_BU571','cac_CAC0157','ecc_c4417','ece_Z5024','ecj_JW3574',
	    'eco_b3600','ecs_ECs4476','efa_EF0413','lla_L33416','lpl_0233','lpl_lp','mmy_0017','mmy_MSC','mpn_MPN652','mpu_7500',
	    'mpu_MYPU','oih_OB2600','pmu_PM1062','sam_MW2085','sau_SA1963','sav_SAV2159','sfl_SF3634','sfx_S4134','smu_SMU.1182',
	    'spn_SP0397','spr_spr0359','stm_STM3686','stt_t3833','sty_STY4110','tte_TTE0342','vch_VCA1046','vpa_VP0369','vvu_VV10639',
	    'vy_VV0504','ype_YPO4067','ypk_y4086']
 ;
%% node 20
ThisEnzyme == '2.7.7.3',
  EList=['pai_PAE0887','pfu_PF1084','pab_PAB0944','pho_PH0624','pai_PAE0887','pfu_PF1084','pab_PAB0944','pho_PH0624']
;   
%% node 08
ThisEnzyme == '3.1.3.5',
  EList=['afu_AF0876','mth_MTH1492','dra_DR0505','mpu_MYPU','mpu_0550','afu_AF0876','mth_MTH1492','dra_DR0505','mpu_MYPU','mpu_0550','afu_AF0876','mth_MTH1492','dra_DR0505','mpu_MYPU','mpu_0550','afu_AF0876','mth_MTH1492','dra_DR0505','mpu_MYPU','mpu_0550']
;   
%% node 11
ThisEnzyme == '4.1.1.36',
  EList=['aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','wbr_WGLp402','aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','wbr_WGLp402','aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','wbr_WGLp402']
;   
%% node 04
ThisEnzyme == '2.4.2.1',
  EList=['ape_APE0993','dra_DR2166','pgi_PG0558','bth_BT1881','rba_RB5437','rba_RB6588','fnu_FN0435','ape_APE0993','dra_DR2166','pgi_PG0558','bth_BT1881','rba_RB5437','rba_RB6588','fnu_FN0435']
;   
%% node 12
ThisEnzyme == '6.3.2.5',
  EList=['aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','bba_Bd3562','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','wbr_WGLp402','aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','bba_Bd3562','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','wbr_WGLp402','aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','bba_Bd3562','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','plu_plu4866','aae_aq','aae_815','dra_DR0579','pgi_PG1851','bth_BT1362','rba_RB7087','fnu_FN0711','mpa_MAP1125','mbo_Mb1426','mtc_MT1436','mtu_Rv1391','oih_OB1504','bca_BCE3912','bce_BC3867','ban_BA4007','bha_BH2510','bsu_BG13388','rpa_RPA0081','bja_bll0759','bms_BRA1064','bme_BMEII0235','atc_AGR','atc_C','atc_552','atu_Atu0316','mlo_mlr3167','bba_Bd3562','gsu_GSU1124','neu_NE1463','bpa_BPP1982','bpe_BP1751','rso_RSc2461','nma_NMA1916','nme_NMB1658','pst_PSPTO0085','ppu_PP5285','pae_PA5320','vpa_VP0181','vvy_VV0283','vvu_VV10828','vch_VC0215','xac_XAC3914','xcc_XCC3859','xft_PD0118','xfa_XF0149','pmu_PM1153','hdu_HD0733','hin_HI0953','wbr_WGLp402']
;  
%% node	22
ThisEnzyme == '2.7.1.35',
  EList=['dra_DRA0184','bth_BT4458','blo_BL0934','poy_PAM434','oih_OB1040','bca_BCE5542','bce_BC5413','ban_BA5663','bsu_BG10598','rpa_RPA2799','bja_blr4233','bms_BR1830','bme_BMEI0221','atc_AGR','atc_C','atc_4518','atu_Atu2487','mlo_mlr4132','bpa_BPP2900','bpe_BP1321','pst_PSPTO5521','ppu_PP5357','pae_PA5516','vpa_VPA1632','vvy_VVA0065','vvu_VV21237','xac_XAC1524','xcc_XCC1476','pmu_PM0290','hin_HI0405','plu_plu2595','ypk_y1967','ype_YPO2368','dra_DRA0184','bth_BT4458','blo_BL0934','poy_PAM434','oih_OB1040','bca_BCE5542','bce_BC5413','ban_BA5663','bsu_BG10598','rpa_RPA2799','bja_blr4233','bms_BR1830','bme_BMEI0221','atc_AGR','atc_C','atc_4518','atu_Atu2487','mlo_mlr4132','bpa_BPP2900','bpe_BP1321','pst_PSPTO5521','ppu_PP5357','pae_PA5516','vpa_VPA1632','vvy_VVA0065','vvu_VV21237','xac_XAC1524','xcc_XCC1476','pmu_PM0290','hin_HI0405','plu_plu2595','ypk_y1967','ype_YPO2368']
;   
%% node 05
ThisEnzyme == '2.7.1.56',
  EList=['dra_DRB0074','fnu_FN1440','mmy_MSC','mmy_0647','mmy_MSC','mmy_0832','mpe_MYPE7750','mpn_MPN079','dra_DRB0074','fnu_FN1440','mmy_MSC','mmy_0647','mmy_MSC','mmy_0832','mpe_MYPE7750','mpn_MPN079']
;   
%% node 13
ThisEnzyme == '2.7.7.2',
  EList=['aae_aq','aae_139','dra_DR1008','pgi_PG0957','bth_BT2543','pmt_PMT0364','pmm_PMM1273','pma_Pro1347','ana_alr4848','gvi_gll0217','fnu_FN0707','blo_BL1619']
;   
%% node 17
ThisEnzyme == '2.3.1.51',
  EList=['pgi_PG1249','bth_BT0243','pmt_PMT1998','pmm_PMM0138','pma_Pro0160','ana_alr0241','gvi_gll3009','blo_BL1424','pgi_PG1249','bth_BT0243','pmt_PMT1998','pmm_PMM0138','pma_Pro0160','ana_alr0241','gvi_gll3009','blo_BL1424']
;   
%% node 35
ThisEnzyme == '2.7.1.21',
  EList=['pai_PAE2453','hal_VNG1515G','dra_DR1984','pgi_PG0925','bth_BT2275','rba_RB8399','poy_PAM104','mmy_MSC','mmy_0149','mga_MGA','mga_0502','mpe_MYPE10320','mpu_MYPU','mpu_1450','mpn_MPN044','pai_PAE2453','hal_VNG1515G','dra_DR1984','pgi_PG0925','bth_BT2275','rba_RB8399','poy_PAM104','mmy_MSC','mmy_0149','mga_MGA','mga_0502','mpe_MYPE10320','mpu_MYPU','mpu_1450','mpn_MPN044','pai_PAE2453','hal_VNG1515G','dra_DR1984','pgi_PG0925','bth_BT2275','rba_RB8399','poy_PAM104','mmy_MSC','mmy_0149','mga_MGA','mga_0502','mpe_MYPE10320','mpu_MYPU','mpu_1450','mpn_MPN044']
; 
%% node 21
ThisEnzyme == '6.3.2.12',
  EList=[]
;   
%% node 06
ThisEnzyme == '1.17.4.2',
  EList=[]
;  
%% node 23
ThisEnzyme == '2.7.4.16',
  EList=['pai_PAE1022','pai_PAE1024','ape_APE0235','pfu_PF1877','pab_PAB2358','pho_PH1833','pai_PAE1022','pai_PAE1024','ape_APE0235','pfu_PF1877','pab_PAB2358','pho_PH1833','pai_PAE1022','pai_PAE1024','ape_APE0235','pfu_PF1877','pab_PAB2358','pho_PH1833']
;   
%% node 19
ThisEnzyme == '2.1.1.13',
  EList=['dra_DR0966','bth_BT0180','pmt_PMT0729','pmm_PMM0877','pma_Pro0959','ana_alr0308','gvi_gll0477','rba_RB9857','fnu_FN0163','mpa_MAP1859c','mle_ML1307','mbo_Mb2148c','mtc_MT2183','mtu_Rv2124c','bca_BCE4332','bca_BCE4333','bce_BC4250','bce_BC4251','ban_BA4478','ban_BA4479','bha_BH1630','rpa_RPA3702','bja_bll1418','bms_BR0188','bme_BMEI1759','atc_AGR','atc_C','atc_3907','atu_Atu2155','gsu_GSU2921','wsu_WS1234','neu_NE1623','bpa_BPP3983','bpe_BP3594','rso_RSc0294','rso_RSc0295','pst_PSPTO2732','ppu_PP2375','pae_PA1843','vpa_VP2717','vvy_VV2960','vvu_VV11423','vch_VC0390','xac_XAC1559','xac_XAC1560','xcc_XCC1511','xcc_XCC1512','ypk_y0020','ype_YPO3722','ddi_DDB0230138','dra_DR0966','bth_BT0180','pmt_PMT0729','pmm_PMM0877','pma_Pro0959','ana_alr0308','gvi_gll0477','rba_RB9857','fnu_FN0163','mpa_MAP1859c','mle_ML1307','mbo_Mb2148c','mtc_MT2183','mtu_Rv2124c','bca_BCE4332','bca_BCE4333','bce_BC4250','bce_BC4251','ban_BA4478','ban_BA4479','bha_BH1630','rpa_RPA3702','bja_bll1418','bms_BR0188','bme_BMEI1759','atc_AGR','atc_C','atc_3907','atu_Atu2155','gsu_GSU2921','wsu_WS1234','neu_NE1623','bpa_BPP3983','bpe_BP3594','rso_RSc0294','rso_RSc0295','pst_PSPTO2732','ppu_PP2375','pae_PA1843','vpa_VP2717','vvy_VV2960','vvu_VV11423','vch_VC0390','xac_XAC1559','xac_XAC1560','xcc_XCC1511','xcc_XCC1512','ypk_y0020','ype_YPO3722','ddi_DDB0230138']
;   
%% node 28
ThisEnzyme == '2.3.1.30',
  EList=['hal_VNG1481G','mma_MM2939','mma_MM3269','mac_MA2721','mac_MA3442','pgi_PG0115','bth_BT0607','bth_BT3256','pmt_PMT0117','pmm_PMM1638','pma_Pro1800','ana_all4037','ana_alr1404','gvi_gll1785','gvi_glr2514','rba_RB5098','mpa_MAP2124','mle_ML0838','mbo_Mb2363','mtc_MT2398','mtu_Rv2335','oih_OB0098','bca_BCE0088','bce_BC0109','ban_BA0088','bha_BH0110','bsu_BG10155','rpa_RPA1425','rpa_RPA3429','bja_bll5586','bja_bll7158','bja_blr2371','bms_BR1262','bme_BMEI0734','atc_AGR','atc_C','atc_2895','atu_Atu1571','mlo_mlr0175','bba_Bd3116','gsu_GSU2572','wsu_WS0553','hhe_HH1234','hpj_jhp1133','hpy_HP1210','neu_NE0575','neu_NE1695','bpa_BPP2351','bpe_BP1972','rso_RSc1162','rso_RSp1439','nma_NMA0742','nme_NMB0560','pst_PSPTO1421','pst_PSPTO5178','ppu_PP0228','ppu_PP0840','pae_PA3816','vpa_VP2833','vvy_VV3088','vvu_VV11276','vch_VC2649','pmu_PM1430','hdu_HD0659','hin_HI0606','bfl_Bfl603','bab_bbp051','bas_BUsg051','buc_BU054','plu_plu4837','ypk_y0072','ype_YPO0070','hal_VNG1481G','mma_MM2939','mma_MM3269','mac_MA2721','mac_MA3442','pgi_PG0115','bth_BT0607','bth_BT3256','pmt_PMT0117','pmm_PMM1638','pma_Pro1800','ana_all4037','ana_alr1404','gvi_gll1785','gvi_glr2514','rba_RB5098','mpa_MAP2124','mle_ML0838','mbo_Mb2363','mtc_MT2398','mtu_Rv2335','oih_OB0098','bca_BCE0088','bce_BC0109','ban_BA0088','bha_BH0110','bsu_BG10155','rpa_RPA1425','rpa_RPA3429','bja_bll5586','bja_bll7158','bja_blr2371','bms_BR1262','bme_BMEI0734','atc_AGR','atc_C','atc_2895','atu_Atu1571','mlo_mlr0175','bba_Bd3116','gsu_GSU2572','wsu_WS0553','hhe_HH1234','hpj_jhp1133','hpy_HP1210','neu_NE0575','neu_NE1695','bpa_BPP2351','bpe_BP1972','rso_RSc1162','rso_RSp1439','nma_NMA0742','nme_NMB0560','pst_PSPTO1421','pst_PSPTO5178','ppu_PP0228','ppu_PP0840','pae_PA3816','vpa_VP2833','vvy_VV3088','vvu_VV11276','vch_VC2649','pmu_PM1430','hdu_HD0659','hin_HI0606','bfl_Bfl603','bab_bbp051','bas_BUsg051','buc_BU054','plu_plu4837','ypk_y0072','ype_YPO0070']
;   
%% node 39
ThisEnzyme == '3.5.99.6',
  EList=['dra_DRA0154','pgi_PG0803','pgi_PG1285','bth_BT0258','bth_BT3587','bth_BT4127','pmt_PMT1424','ana_all0727','rba_RB1355','rba_RB2532','rba_RB3340','rba_RB3556','fnu_FN1143','blo_BL1343','mmy_MSC','mmy_0129','mmy_MSC','mmy_0821','mpe_MYPE1760','mpu_MYPU','mpu_3620','oih_OB0611','bca_BCE4121','bce_BC4054','ban_BA4273','bha_BH0420','bsu_BG12631','bsu_BG12746','bms_BRA0912','bme_BMEII0384','atc_AGR','atc_C','atc_4724','atu_Atu2607','mlo_mll4767','pae_PA3759','vpa_VPA0038','vvy_VVA0028','vvu_VV21200','vch_VCA1025','xac_XAC0714','xcc_XCC3411','xft_PD0683','xfa_XF1464','pmu_PM0875','hdu_HD1847','hin_HI0141','bfl_Bfl323','plu_plu1317','ypk_y1202','ype_YPO2627','dra_DRA0154','pgi_PG0803','pgi_PG1285','bth_BT0258','bth_BT3587','bth_BT4127','pmt_PMT1424','ana_all0727','rba_RB1355','rba_RB2532','rba_RB3340','rba_RB3556','fnu_FN1143','blo_BL1343','mmy_MSC','mmy_0129','mmy_MSC','mmy_0821','mpe_MYPE1760','mpu_MYPU','mpu_3620','oih_OB0611','bca_BCE4121','bce_BC4054','ban_BA4273','bha_BH0420','bsu_BG12631','bsu_BG12746','bms_BRA0912','bme_BMEII0384','atc_AGR','atc_C','atc_4724','atu_Atu2607','mlo_mll4767','pae_PA3759','vpa_VPA0038','vvy_VVA0028','vvu_VV21200','vch_VCA1025','xac_XAC0714','xcc_XCC3411','xft_PD0683','xfa_XF1464','pmu_PM0875','hdu_HD1847','hin_HI0141','bfl_Bfl323','plu_plu1317','ypk_y1202','ype_YPO2627']

).


match_method(psiblast,'/usr/local/blast/bin/blastpgp -d "/data/blast/data/nr /data/blast/data/SCE/s.cerevisiae.pep" -e 1 -F F -h 0.002 -t 1 ').

%% writelist/2, to be added to utilities.pl
writelist(_Stream,[]):-!.

writelist(Stream,[H|T]):-
  write(Stream,H),nl(Stream),!,
  writelist(Stream,T).




% Check if String is an orf_header
orf_header(String) :-
	name(String,[F|_Rest]),
	name('>',[F]).

% find the Orf in the orf_header String
correct_orf_field(Orf,String) :-
	orf_header(String),
	find_in_string(Orf,String).

% If you are at the end of stream and encounter '' do nothing
string_from_file(Stream,'',MoreFile) :-
	at_end_of_stream(Stream),
	MoreFile = 'false'.

% read a line of the sequence from Stream as a string until you reach the end of the stream
string_from_file(Stream,String,MoreFile) :-
	read_line_to_codes(Stream,Codes),
	name(String,Codes),
	(   at_end_of_stream(Stream),
	    MoreFile = 'false'

	;

	MoreFile = 'true').


% Get all the string lines into a list of strings (split at ' ')
all_words_from_file(_Stream,PrevString,SoFar,Strings,MoreFile) :-
	MoreFile == 'false',
	separated_string(' ',PrevString,Chain),
	chain_list(Chain,Words),
	conc(SoFar,Words,Strings).



all_words_from_file(Stream,PrevString,SoFar,Strings,_MoreFile) :-
	separated_string(' ',PrevString,Chain),
	chain_list(Chain,Words),
	conc(SoFar,Words,NewSoFar),
	string_from_file(Stream,NewString,NewMoreFile),!,
	all_words_from_file(Stream,NewString,NewSoFar,Strings,NewMoreFile).

% concatenate strings together
all_strings_from_file(_Stream,PrevString,SoFar,Strings,MoreFile) :-
	MoreFile == 'false',
	conc(SoFar,[PrevString],Strings).

all_strings_from_file(Stream,PrevString,SoFar,Strings,_MoreFile) :-
	string_from_file(Stream,NewString,NewMoreFile),!,
	all_strings_from_file(Stream,NewString,[PrevString|SoFar],Strings,NewMoreFile).


strings_from_file(File,Words) :-
	open(File,read,Stream),
	all_strings_from_file(Stream,'',[],Words,'true'),
	close(Stream).

	
words_from_file(File,Words) :-
	open(File,read,Stream),
	all_words_from_file(Stream,'',[],Words,'true'),
	close(Stream).

sequence_information(Organism,File,SFacts) :-
	strings_from_file(File,Words),
	words_with_string(Organism,Words,[],Found),
	reverse(Found,RFound),
	all_yeast_sequence_info(RFound,[],Information),
	make_sequence_rankings(1,Information,[],SFacts).

%% Maria's version of sequence_information
% here
sequence_informationm(Enzyme,File) :-
       %construct Outfile, from File
       output_dir(OutDir),
       name(File,FILE),
       name(OutDir,OUTDIR),
       append(OUTDIR,MIDDLE,FILE),
       append(NAME,[46|_],MIDDLE),!,
       name(Name,NAME),
       multiple_atom_concat([OutDir,'enz','_',Enzyme,'.txt'],OutFile),
       open(OutFile,append,Stream),
       format(Stream,'~q ~n',[Name]),
       close(Stream),
       multiple_atom_concat(['grep -E "(sce:|round)" ',File,'  >> ', OutFile],Command1),
       unix(system(Command1)).     
       % read in sceinfo and split the lines into predicates (Round, ORF, E-value)

	


%% Maria's version of sequence_information_from_files
sequence_information_from_filesm(_Enz,[]).

sequence_information_from_filesm(Enzyme,[F|Files]) :-
	sequence_informationm(Enzyme,F),!,
	sequence_information_from_filesm(Enzyme,Files).



sequence_information_from_files(_O,[],S,S).

sequence_information_from_files(Organism,[F|Files],Done,SFacts) :-
	sequence_information(Organism,F,Facts),
	conc(Done,Facts,NewDone),!,
	sequence_information_from_files(Organism,Files,NewDone,SFacts).


process_sequence_info(Rank,String,SFact) :-
	separated_string(' ',String,SChain),
	chain_list(SChain,SList),
	remove_nulls(SList,[],[First|Rest]),
	last_item(Rest,Last),
	SFact =..[sequence,First,Last,Rank].


make_sequence_rankings(_I,[],R,R).

make_sequence_rankings(I,[S|Strings],Done,Rankings) :-
	process_sequence_info(I,S,SFact),
	significant_sequence(SFact,NewSFact),
	I2 is I + 1,!,
	make_sequence_rankings(I2,Strings,[NewSFact|Done],Rankings).

make_sequence_rankings(I,[_S|Strings],Done,Rankings) :-
	!,make_sequence_rankings(I,Strings,Done,Rankings).


words_with_string(_S,[],RDone,Found) :-
	reverse(RDone,Found).

words_with_string(String,[Word|Words],Done,Found) :-
	find_in_string(String,Word),!,
	words_with_string(String,Words,[Word|Done],Found).

words_with_string(String,[_Word|Words],Done,Found) :-
        !,words_with_string(String,Words,Done,Found).


all_yeast_sequence_info([S|_Strings],RYInfo,YInfo) :-
	find_in_string('>>',S),
	reverse(RYInfo,YInfo).

all_yeast_sequence_info([S|Strings],Done,YInfo) :-
	!,all_yeast_sequence_info(Strings,[S|Done],YInfo).


construct_sequence(_Stream,SoFar,Sequence,String,MoreFile) :-
	join_string(SoFar,String,Sequence),
	MoreFile == 'false'.

construct_sequence(_Stream,Sequence,Sequence,String,_MoreFile) :-
	orf_header(String).

construct_sequence(Stream,SoFar,Sequence,String,_MoreFile) :-
	join_string(SoFar,String,NewSoFar),
	string_from_file(Stream,NewString,NewMoreFile),!,
	construct_sequence(Stream,NewSoFar,Sequence,NewString,NewMoreFile).


find_and_construct_sequence(_Stream,_Orf,_Sequence,_String,_Header,MoreFile) :-
	MoreFile == 'false',!,fail.

find_and_construct_sequence(Stream,Orf,Sequence,String,String,_MoreFile) :-
	correct_orf_field(Orf,String),
	string_from_file(Stream,NewString,NewMoreFile),
	construct_sequence(Stream,'',Sequence,NewString,NewMoreFile).

find_and_construct_sequence(Stream,Orf,Sequence,_String,Header,_MoreFile) :-
	string_from_file(Stream,NewString,NewMoreFile),!,
	find_and_construct_sequence(Stream,Orf,Sequence,NewString,Header,NewMoreFile).


sequence_from_file(Orf,File,Header,Sequence) :-
	open(File,read,Stream),
	find_and_construct_sequence(Stream,Orf,Sequence,'',Header,'true'),!,
	close(Stream).

sequence_from_file(Orf,_File,_Header,_Sequence) :-
	write('no sequence found for '),write(Orf),nl,nl.


sequence_from_organism(Organism,Orf,FileType,Header,Sequence) :-
	genome_file_type(Organism,FileType,GFile),
	genome_dir(Dir),
	multiple_atom_concat([Dir,GFile],FName),
	sequence_from_file(Orf,FName,Header,Sequence).


get_genome_file_name(Organism,Type,FName) :-
	genome_file_type(Organism,Type,GFile),
	genome_dir(Dir),
	multiple_atom_concat([Dir,GFile],FName).


%% Maria's version of create_sequence_file
% If the sequence file already exists, don't try writing to it.
create_sequence_filem(Organism,Orf,FName,FileType,_Header,_Sequence) :-
	sequence_dir(Dir),
	multiple_atom_concat([Dir,Organism,'_',Orf,'.',FileType],FName),
	exists_file(FName),!.
	%open(FName,read,Stream),!,
	%close(Stream).


create_sequence_filem(Organism,Orf,FName,FileType,Header,Sequence) :-
	sequence_dir(Dir),
	multiple_atom_concat([Dir,Organism,'_',Orf,'.',FileType],FName),
	open(FName,write,Stream),
	write(Stream,Header),
	nl(Stream),
	write(Stream,Sequence),
	close(Stream).


create_sequence_file(Organism,Orf,FName,FileType,Header,Sequence) :-
	sequence_dir(Dir),
	multiple_atom_concat([Dir,Organism,'_',Orf,'.',FileType],FName),
	open(FName,write,Stream),
	write(Stream,Header),
	nl(Stream),
	write(Stream,Sequence),
	close(Stream).



create_sequence_files_and_commands([],_Type,_Method,_TG,Cmds,Cmds).

create_sequence_files_and_commands([GN|GeneNames],Type,Method,
				   TargetGenome,Done,Cmds) :-
	GN = gene_name(Organism,_EC,Orf,_Gene),
	sequence_from_organism(Organism,Orf,Type,Header,Sequence),
	create_sequence_file(Organism,Orf,SFName,Type,Header,Sequence),
	get_genome_file_name(TargetGenome,Type,TGName),
	sequence_dir(SDir),
	match_method(Method,MCString),
	multiple_atom_concat([SDir,Organism,'_',Orf,'.match'],OutName),
	multiple_atom_concat([MCString,SFName,' ',TGName,' > ',OutName],Command),
	write(Command),nl,
	unix(system(Command)),!,
	create_sequence_files_and_commands(GeneNames,Type,Method,
				   TargetGenome,[OutName|Done],Cmds).
	
create_sequence_files_and_commands([_GN|GeneNames],Type,Method,
				   TargetGenome,Done,Cmds) :-
	!,create_sequence_files_and_commands(GeneNames,Type,Method,
				   TargetGenome,Done,Cmds).

%% Maria's version of create_sequence_files_and_commands
create_sequence_files_and_commandsm([],_Type,_Method,_TG,Cmds,Cmds).

create_sequence_files_and_commandsm([GN|GeneNames],Type,Method,
				   TargetGenome,Done,Cmds) :-
	GN = gene_name(Organism,_EC,Orf,_Gene),%trace,
	sequence_from_organism(Organism,Orf,Type,Header,Sequence),%trace,
	create_sequence_filem(Organism,Orf,SFName,Type,Header,Sequence),
	%get_genome_file_name(TargetGenome,Type,TGName), % we don't need this as psi-blast looks at all organisms
        output_dir(OutDir),
	%% here
	%run_blast(OutDir,Organism,Orf,SFName,Method,[],Outfiles),
        run_blast_n(20,OutDir,Organism,Orf,SFName,Method,[],Outfiles), % change this from 5 to 20 for the full amount
	conc(Outfiles,Done,MoreOutfiles),!,
	create_sequence_files_and_commandsm(GeneNames,Type,Method,
				   TargetGenome,MoreOutfiles,Cmds). % is it necessary to have the Done ones here?
	
create_sequence_files_and_commandsm([_GN|GeneNames],Type,Method,
				   TargetGenome,Done,Cmds) :-
	!,create_sequence_files_and_commandsm(GeneNames,Type,Method,
				   TargetGenome,Done,Cmds).

% Blast command that runs for as many iterations as we set it to
run_blast_n(N,OutDir,Organism,Orf,SFName,Method,[],[Outname]):-
      multiple_atom_concat([OutDir,Organism,'_',Orf],Lemma),
      multiple_atom_concat([Lemma,'_',N,'.match'],Outname), % '2' in the name of the file here signals the number of iterations
      match_method(Method,MCString),
      multiple_atom_concat([Lemma,'_',N,'.check'],OutCheck),
      multiple_atom_concat([MCString,'-i ',SFName,' -o ',Outname,' -j ',N,' -C ',OutCheck],Command),
      write(Command),nl,
      unix(system(Command)),
      write('done blast'),nl,!.


%% Maria's code
% run_blast/7 creates the files to be placed as arguments to the blast command and calls run_blast_loop/7.
% Orf is the Orf from Organism that codes for the enzyme of interest
% run_blast(+SDir,+Organism,+Orf,+SFName,+Method,[],-Outfiles)
run_blast(OutDir,Organism,Orf,SFName,Method,[],Outfiles):-
      multiple_atom_concat([OutDir,Organism,'_',Orf],Lemma),
      multiple_atom_concat([Lemma,'2.match'],Outname), % '2' in the name of the file here signals the number of iterations
      run_blast_loop(2,SFName,Outname,Method,Lemma,[],Outfiles).

% run_blast_loop/7 
% run_blast_loop(+It,+InName,+Outname,+Method,+Lemma,[],-Outfiles)
% 1st run of run_blast_loop/7
run_blast_loop(2,InName,Outname,Method,Lemma,[],Outfiles):-!,
      match_method(Method,MCString),
      multiple_atom_concat([Lemma,'1.check'],OutCheck),
      multiple_atom_concat([Lemma,'2.match'],Outname2),
      multiple_atom_concat([MCString,'-i ',InName,' -o ',Outname,' -j 2',' -C ',OutCheck],Command),
      write(Command),nl,
      unix(system(Command)),!,
      run_blast_loop(3,OutCheck,Outname2,Method,Lemma,[Outname],Outfiles).
      
% last run of run_blast_loop/7
run_blast_loop(20,InName,Outname,Method,_Lemma,Done,[Outname|Done]):-!,
      match_method(Method,MCString),		     
      multiple_atom_concat([MCString,'-R ',InName,' -o ',Outname,' -j 1'],Command),
      write(Command),nl,
      unix(system(Command)).

% intermediate run of run_blast_loop/7
run_blast_loop(It,InName,Outname,Method,Lemma,Done,Outfiles):-     
      match_method(Method,MCString),
      NIt is It+1,
      multiple_atom_concat([Lemma,It,'.check'],OutCheck),
      multiple_atom_concat([Lemma,NIt,'.match'],OutnameN),
      multiple_atom_concat([MCString,'-R ',InName,' -o ',Outname,' -j 1',' -C ',OutCheck],Command),
      write(Command),nl,
      unix(system(Command)),!,
      run_blast_loop(NIt,OutCheck,OutnameN,Method,Lemma,[Outname|Done],Outfiles).
		 
		

genome_files(Files) :-
	clauses_from_file('/mnt/data/Robot_Scientist/KEGG/genomes/genomes.txt',
			  Files).

genome_file_type(FName,Type) :-
	separated_string('.',FName,CH),
	chain_list(CH,CHL),
	last_item(CHL,Type).


species_abbreviation(File,ABB) :-
	open(File,read,Stream),
	string_from_file(Stream,NewString,_MF),
	string_fragment(2,4,NewString,ABB),
	close(Stream).

genome_species_abbreviation(File,ABB) :-
	open(File,read,Stream),
	string_from_file(Stream,NewString,_MF),
	string_fragment(5,7,NewString,ABB),
	close(Stream).


get_abbreviation(Type,File,ABB) :-
	Type == 'pep',
	species_abbreviation(File,ABB).


get_abbreviation(Type,File,ABB) :-
	Type == 'nuc',
	species_abbreviation(File,ABB).

get_abbreviation(Type,File,ABB) :-
	Type == 'genome',
	genome_species_abbreviation(File,ABB).


genome_file_information([],I,I).

genome_file_information([GF|GFileNames],Done,Info) :-
	GF = genome_file(GFName),
	genome_dir(Dir),
	multiple_atom_concat([Dir,GFName],GFile),
	genome_file_type(GFName,FType),
	get_abbreviation(FType,GFile,ABB),
	FI = genome_file_type(ABB,FType,GFName),!,
	genome_file_information(GFileNames,[FI|Done],Info).


genome_file_information([_GF|GFileNames],Done,Info) :-
	!,genome_file_information(GFileNames,Done,Info).


make_genome_info(GInfo) :-
	genome_files(GFiles),%trace,
	genome_file_information(GFiles,[],GInfo),
	open('genome_file_info.pl',write,Stream),
	write_file_terms(Stream,GInfo),
	close(Stream).


find_enzyme_orfs(Organism,Enzyme,FileType,Method,LikelyOrfs) :-
	genes_from_organisms(Organism,Enzyme,GeneNames),
	writelist(GeneNames),
	length(GeneNames,LN),write('Other Genes':LN),%get0(10),
	create_sequence_files_and_commands(GeneNames,FileType,Method,Organism,[],MatchFiles),
	sequence_information_from_files(Organism,MatchFiles,[],SequenceFacts),
	length(MatchFiles,NumberTests),
	create_rankings(NumberTests,SequenceFacts,[],Rankings),
	mean_rankings(NumberTests,Rankings,[],MeanRankings),
	quicksort_rankings(MeanRankings,LikelyOrfs).


%%% Maria's code
%% Find all enzymes with unknown orfs and put them in list.
%% there are unknown ORF names not assigned an enzyme class, but a reaction number.
%% We should make a link between the reaction number and some enzyme class. 
enzyme_list(List):-
	findall(Enzyme,(orf_fact(ORF,Enzyme,_EC,_G,_GD,_R),name(ORF,[H|_]),name('U',[H]),Enzyme \= 'none'),List).

for_all_enzymes:-
	enzyme_list(EzList),
	open(Stream,write,'enzyme_list.txt'),
	writelist(Stream,EzList),
        close(Stream),
	find_enzyme_orf_loop(EzList,[]).

find_enzyme_orf_loop([H|T],Done):-
	find_enzyme_orfsm(_Org,H,'pep',psiblast),!,
	find_enzyme_orf_loop(T,[H|Done]).

%For each enzyme in the list do the following
find_enzyme_orfsm(Organism,Enzyme,FileType,Method):-
        genes_from_organisms(Organism,Enzyme,AllGeneNames),
        %% remove from AllGeneNames the ones already in done_genes(List)
        done_genes(Enzyme,PList),
        subtract(AllGeneNames,PList,GeneNames), 
	writelist(GeneNames),
	length(GeneNames,LN),write('Other Genes':LN),%get0(10),
        create_sequence_files_and_commandsm(GeneNames,FileType,Method,Organism,[],MatchFiles),
        sequence_information_from_filesm(Enzyme,MatchFiles).
	%length(MatchFiles,NumberTests).
		  



significant_sequence(S,NewS) :-
	S = sequence(Orf,EScore,Rank),
	find_in_string('e-',EScore),
	name(EScore,ECodes),
	number_codes(ENum,ECodes),
	NewS = sequence(Orf,ENum,Rank).

significant_sequence(S,NewS) :-
	S = sequence(Orf,EScore,Rank),
	name(EScore,ECodes),
	number_codes(ENum,ECodes),
	ENum < 0.02,
	NewS = sequence(Orf,ENum,Rank).


find_and_update_ranking(_NumberTests,Orf1,E,[R|Rankings],Past,NewRankings) :-
	R = ranking(Orf2,OldScore,N),%trace,
	Orf1 == Orf2,
%	Score is (1/NumberTests) * E,
	NewScore is OldScore + E,
	N1 is N + 1,
	NewR = ranking(Orf1,NewScore,N1),
	conc(Past,[NewR],PastNew),
	conc(PastNew,Rankings,NewRankings).


find_and_update_ranking(NumberTests,Orf1,E,[R|Rankings],Past,NewRankings) :-
	!,find_and_update_ranking(NumberTests,Orf1,E,Rankings,[R|Past],NewRankings).



mean_rankings(_NumberTests,[],R,R).

mean_rankings(NumberTests,[R|Rankings],Done,NewRankings) :-
	R = ranking(Orf,Total,_N),
	Mean is Total/NumberTests,
	NewR = ranking(Orf,Mean),!,
	mean_rankings(NumberTests,Rankings,[NewR|Done],NewRankings).


create_rankings(_NumberTests,[],Rankings,Rankings).

create_rankings(NumberTests,[S|Sequences],Done,Rankings) :-
	S = sequence(Orf,_E,Rank),
	find_and_update_ranking(NumberTests,Orf,Rank,Done,[],Updated),!,
	create_rankings(NumberTests,Sequences,Updated,Rankings).

create_rankings(NumberTests,[S|Sequences],Done,Rankings) :-
	S = sequence(Orf,_E,Rank),%trace,
%	Score is (1/NumberTests) * E,
	NewR = ranking(Orf,Rank,1),!,
	create_rankings(NumberTests,Sequences,[NewR|Done],Rankings).


quicksort_rankings([],[]).

quicksort_rankings([X|Tail],Sorted) :-
	  split_ranking(X,Tail,Small,Big),
	  quicksort_rankings(Small,SortedSmall),
	  quicksort_rankings(Big,SortedBig),
	  append(SortedSmall,[X|SortedBig],Sorted).


split_ranking(_,[],[],[]).

split_ranking(X,[Y|Tail],[Y|Small],Big) :-
	X = ranking(_O1,XS),
	Y = ranking(_O2,YS),
	  XS < YS,!,
	  split_ranking(X,Tail,Small,Big).

split_ranking(X,[Y|Tail],Small,[Y|Big]) :-
	  split_ranking(X,Tail,Small,Big).


possible_orfs_for_orf_info(NumberFind,Organism,OrfInfo,Type,Method,Conjectures) :-
	OrfInfo = orf_info(OldOrf,Enzyme,Reaction),
	find_enzyme_orfs(Organism,Enzyme,Type,Method,SequenceFacts),
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,Organism,'.',Enzyme,'.result'],OutFile),
	open(OutFile,write,Stream),
	write_file_terms(Stream,SequenceFacts),
	close(Stream),
	number_from_list(1,NumberFind,SequenceFacts,Found),
	make_conjectures(Found,OldOrf,Enzyme,Reaction,[],Conjectures).


all_possible_orfs_and_conjectures(_N,_O,[],_ED,_T,_M,Conjectures,Conjectures).


all_possible_orfs_and_conjectures(NumberFind,Organism,[OI|OrfInfo],EnzymesDone,Type,
				  Method,Done,Conjectures) :-
	OI = orf_info(_OldOrf,Enzyme,_Reaction),
	\+member(Enzyme,EnzymesDone),
	possible_orfs_for_orf_info(NumberFind,Organism,OI,Type,Method,OIConjectures),
	conc(Done,OIConjectures,NewDone),!,
	all_possible_orfs_and_conjectures(NumberFind,Organism,OrfInfo,[Enzyme|EnzymesDone],Type,
					  Method,NewDone,Conjectures).

all_possible_orfs_and_conjectures(NumberFind,Organism,[OI|OrfInfo],EnzymesDone,Type,
				  Method,Done,Conjectures) :-
	OI = orf_info(OldOrf,Enzyme,Reaction),
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,Organism,'.',Enzyme,'.result'],OutFile),
	clauses_from_file(OutFile,SequenceFacts),
	number_from_list(1,NumberFind,SequenceFacts,Found),
	make_conjectures(Found,OldOrf,Enzyme,Reaction,[],OIConjectures),
	conc(Done,OIConjectures,NewDone),!,
	all_possible_orfs_and_conjectures(NumberFind,Organism,OrfInfo,EnzymesDone,Type,
					  Method,NewDone,Conjectures).


	
make_wet_conjectures([],Conjectures,Conjectures).

make_wet_conjectures([OI|OrfInfo],Done,Conjectures) :-
	OI = orf_info(Orf,Enzyme,Reaction),
	Con = conjecture(Orf,Enzyme,Reaction),!,
	make_wet_conjectures(OrfInfo,[Con|Done],Conjectures).


make_conjectures([],_O,_E,_R,RDone,Conjectures) :-
	reverse(RDone,Conjectures).

make_conjectures([R|Rankings],OldOrf,Enzyme,Reaction,Done,Conjectures) :-
	R = ranking(OrfInfo,Rank),
	separated_string(':',OrfInfo,Chain),
	chain_list(Chain,[_Org,Orf]),
	Conjecture = conjecture(OldOrf,Orf,Enzyme,Reaction,Rank),!,
	make_conjectures(Rankings,OldOrf,Enzyme,Reaction,[Conjecture|Done],Conjectures).
	



find_possible_orfs(_NF,_O,_T,_M,[],_CJF,_ExpF).

find_possible_orfs(NumberFind,Organism,Type,Method,[UK|UKGenes],CJFile,ExpFile) :-
	make_outer_recovery_point([UK|UKGenes]),
	conjectures_and_experiments(NumberFind,Organism,UK,Type,Method,
				    CJFile,ExpFile),!,
	find_possible_orfs(NumberFind,Organism,Type,Method,UKGenes,
			  CJFile,ExpFile).

%% find wet evidence biology

find_wet_conjectures_and_experiments([],_S1,_S2).

find_wet_conjectures_and_experiments([KG|KnownGenes],Stream1,Stream2) :-
	make_wet_outer_recovery_point([KG|KnownGenes]),
	wet_conjectures_and_experiments(KG,Stream1,Stream2),!,
	find_wet_conjectures_and_experiments(KnownGenes,Stream1,Stream2).


make_outer_recovery_point([R|Reactions]) :-
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'outer_recovery_point.pl'],ORPFile),
	ORP = outer_recovery_point(R,Reactions),
	open(ORPFile,write,Stream),
	write_file_terms(Stream,[ORP]),
	close(Stream).


%% outer recovery point for wet evidence genes

make_wet_outer_recovery_point([KG|KnownGenes]) :-
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'wet_outer_recovery_point.pl'],ORPFile),
	ORP = outer_recovery_point(KG,KnownGenes),
	open(ORPFile,write,Stream),
	write_file_terms(Stream,[ORP]),
	close(Stream).




make_static_recovery_point(NumberFind,Organism,Type,Method) :-
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'static_recovery_point.pl'],SRPFile),
	\+exists_file(SRPFile),
	SRP = static_recovery_point(NumberFind,Organism,Type,Method),
	open(SRPFile,write,Stream),
	write_file_terms(Stream,[SRP]),
	close(Stream).


execute_recovery(CJFile,ExpFile) :-
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'static_recovery_point.pl'],SRPFile),
	multiple_atom_concat([ExpDir,'outer_recovery_point.pl'],ORPFile),
	multiple_atom_concat([ExpDir,'inner_recovery_point.pl'],IRPFile),
	clauses_from_file(SRPFile,[SRP]),
	clauses_from_file(ORPFile,[ORP]),
	clauses_from_file(IRPFile,[IRP]),
	SRP = static_recovery_point(NumberFind,Organism,Type,Method),
	ORP = outer_recovery_point(UK,UKGenesLeft),
	IRP = inner_recovery_point(Met,MetabolitesLeft),
	multiple_atom_concat([ExpDir,UK,'_recovered_growth.pl'],OutFile),
	write('recovered: about to start recovered_growths'),nl,nl,%trace,
	%open(OutFile,append,Stream),
	find_recovered_growths(1,OutFile,UK,[Met|MetabolitesLeft]),
	%close(Stream),
	clauses_from_file(OutFile,Recovered),
	load_full_metabolite_graph,%trace,
	all_orf_info(UK,OrfInfo),
	knocked_out_orf_info(OrfInfo,_Reactions,[],KOOrfInfo),
	all_possible_orfs_and_conjectures(NumberFind,Organism,KOOrfInfo,[],Type,Method,[],Conjectures),
	write('passed possible orfs'),nl,nl,
	construct_experiments(Conjectures,Recovered,[],AllExperiments),
	real_conjectures(Conjectures,[],AllRealConjectures),
	make_set(AllExperiments,[],Experiments),
	make_set(AllRealConjectures,[],RealConjectures),
	writelist(RealConjectures),nl,nl,
	writelist(Experiments),nl,nl,
	open(CJFile,append,Stream1),
	open(ExpFile,append,Stream2),
	write_file_terms(Stream1,RealConjectures),
	write_file_terms(Stream2,Experiments),
	close(Stream1),
	close(Stream2),
	find_possible_orfs(NumberFind,Organism,Type,Method,
			   UKGenesLeft,CJFile,ExpFile).

	
execute_wet_recovery(CJFile,ExpFile) :-
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'wet_outer_recovery_point.pl'],ORPFile),
	multiple_atom_concat([ExpDir,'wet_inner_recovery_point.pl'],IRPFile),
	clauses_from_file(ORPFile,[ORP]),
	clauses_from_file(IRPFile,[IRP]),
	ORP = outer_recovery_point(Gene,GenesLeft),
	IRP = inner_recovery_point(Met,MetabolitesLeft),
	multiple_atom_concat([ExpDir,Gene,'_recovered_growth.pl'],OutFile),
	write('recovered: about to start recovered_growths'),nl,nl,%trace,
	%open(OutFile,append,Stream),
	findall_wet_recovered_growths(Gene,[Met|MetabolitesLeft],OutFile),
	%close(Stream),
	clauses_from_file(OutFile,Recovered),
	load_full_metabolite_graph,%trace,
	all_orf_info(Gene,OrfInfo),
	find_knocked_out_reactions_and_enzymes(Gene,Reactions,_Enzymes),
	knocked_out_orf_info(OrfInfo,Reactions,[],KOOrfInfo),
	write('passed possible orfs'),nl,nl,
	make_wet_conjectures(KOOrfInfo,[],Conjectures),
	construct_wet_experiments(Conjectures,Recovered,[],AllExperiments),%trace,
	make_set(AllExperiments,[],Experiments),
	writelist(Conjectures),nl,nl,
	writelist(Experiments),nl,nl,
	open(CJFile,append,Stream1),
	open(ExpFile,append,Stream2),
	write_file_terms(Stream1,Conjectures),
	write_file_terms(Stream2,Experiments),
	close(Stream1),
	close(Stream2),
	find_wet_conjectures_and_experiments(GenesLeft,CJFile,ExpFile).





%% parameters are usually ...
%% all_conjectures_and_experiments(3,sce,pep,fasta).

all_conjectures_and_experiments(NumberFind,Organism,Type,Method) :-
	interesting_unknown_genes(UKGenes),
	wild_type_reactions(WildReactions),
	all_compounds_from_reaction_set(WildReactions,Compounds),
	CR = compounds_from_reactions(Compounds),
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'compounds_from_reactions.pl'],CompFile),
	open(CompFile,write,CRStream),
	write_file_terms(CRStream,[CR]),
	close(CRStream),
	single_lists(Compounds,CompoundLists),
	CL = single_compound_lists(CompoundLists),
	multiple_atom_concat([ExpDir,'single_compound_lists.pl'],SLFile),
	open(SLFile,write,SLStream),
	write_file_terms(SLStream,[CL]),
	close(SLStream),
	multiple_atom_concat([ExpDir,'conjectures.pl'],CJFile),
	multiple_atom_concat([ExpDir,'experiments.pl'],ExpFile),
	(   make_static_recovery_point(NumberFind,Organism,Type,Method),
	    find_possible_orfs(NumberFind,Organism,Type,Method,UKGenes,CJFile,ExpFile)
	;
	    execute_recovery(CJFile,ExpFile)).


%% alternative_version for no wet evidence genes
	
all_wet_conjectures_and_experiments :-
	interesting_known_genes(KnownGenes),
	wild_type_reactions(WildReactions),
	all_compounds_from_reaction_set(WildReactions,Compounds),
	CR = compounds_from_reactions(Compounds),
	exp_generator_directory(ExpDir),
	multiple_atom_concat([ExpDir,'compounds_from_reactions.pl'],CompFile),
	open(CompFile,write,CRStream),
	write_file_terms(CRStream,[CR]),
	close(CRStream),
	single_lists(Compounds,CompoundLists),
	CL = single_compound_lists(CompoundLists),
	multiple_atom_concat([ExpDir,'single_compound_lists.pl'],SLFile),
	open(SLFile,write,SLStream),
	write_file_terms(SLStream,[CL]),
	close(SLStream),
	multiple_atom_concat([ExpDir,'wet_conjectures.pl'],CJFile),
	multiple_atom_concat([ExpDir,'wet_experiments.pl'],ExpFile),
	multiple_atom_concat([ExpDir,'wet_outer_recovery_point.pl'],ORPFile),

	(   \+exists_file(ORPFile),
	    find_wet_conjectures_and_experiments(KnownGenes,CJFile,ExpFile)
	;
	    execute_wet_recovery(CJFile,ExpFile)).

interesting_reactions([412]).

%interesting_reactions([226,227,228,229,230,231,251,412]).


orf_name_and_rank(_I,_Max,_OldOrf,_E,[],Done,Ranks) :-
	reverse(Done,Ranks).

orf_name_and_rank(I,Max,_OldOrf,_E,_Rankings,Done,Ranks) :-
	I > Max,
	reverse(Done,Ranks).

orf_name_and_rank(I,Max,OldOrf,Enzyme,[Ranking|Rankings],Done,Ranks) :-
	Ranking = ranking(Orf,_Score),
	separated_string(':',Orf,Chain),
	chain_list(Chain,[_Org,NewOrf]),
	NewRanking = match(OldOrf,NewOrf,Enzyme,I),
	I2 is I + 1,!,
	orf_name_and_rank(I2,Max,OldOrf,Enzyme,Rankings,[NewRanking|Done],Ranks).

